• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #if defined(__ARM_NEON)
17     #include <arm_neon.h>
18 #elif defined(__SSE__)
19     #include <immintrin.h>
20 #endif
21 
22 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
23 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
24 //
25 // Please do not use sizeof() directly, and size_t only when required.
26 // (We have no way of enforcing these requests...)
27 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
28 
29 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
30 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
31 
32 static const union {
33     uint32_t bits;
34     float    f;
35 } inf_ = { 0x7f800000 };
36 #define INFINITY_ inf_.f
37 
fmaxf_(float x,float y)38 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)39 static float fminf_(float x, float y) { return x < y ? x : y; }
40 
isfinitef_(float x)41 static bool isfinitef_(float x) { return 0 == x*0; }
42 
minus_1_ulp(float x)43 static float minus_1_ulp(float x) {
44     int32_t bits;
45     memcpy(&bits, &x, sizeof(bits));
46     bits = bits - 1;
47     memcpy(&x, &bits, sizeof(bits));
48     return x;
49 }
50 
eval_curve(const skcms_Curve * curve,float x)51 static float eval_curve(const skcms_Curve* curve, float x) {
52     if (curve->table_entries == 0) {
53         return skcms_TransferFunction_eval(&curve->parametric, x);
54     }
55 
56     float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
57     int   lo = (int)                   ix        ,
58           hi = (int)(float)minus_1_ulp(ix + 1.0f);
59     float t = ix - (float)lo;
60 
61     float l, h;
62     if (curve->table_8) {
63         l = curve->table_8[lo] * (1/255.0f);
64         h = curve->table_8[hi] * (1/255.0f);
65     } else {
66         uint16_t be_l, be_h;
67         memcpy(&be_l, curve->table_16 + 2*lo, 2);
68         memcpy(&be_h, curve->table_16 + 2*hi, 2);
69         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
70         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
71         l = le_l * (1/65535.0f);
72         h = le_h * (1/65535.0f);
73     }
74     return l + (h-l)*t;
75 }
76 
max_roundtrip_error(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)77 static float max_roundtrip_error(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
78     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
79     const float dx = 1.0f / (N - 1);
80     float err = 0;
81     for (uint32_t i = 0; i < N; i++) {
82         float x = i * dx,
83               y = eval_curve(curve, x);
84         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
85     }
86     return err;
87 }
88 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)89 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
90     return max_roundtrip_error(curve, inv_tf) < (1/512.0f);
91 }
92 
93 // Additional ICC signature values that are only used internally
94 enum {
95     // File signature
96     skcms_Signature_acsp = 0x61637370,
97 
98     // Tag signatures
99     skcms_Signature_rTRC = 0x72545243,
100     skcms_Signature_gTRC = 0x67545243,
101     skcms_Signature_bTRC = 0x62545243,
102     skcms_Signature_kTRC = 0x6B545243,
103 
104     skcms_Signature_rXYZ = 0x7258595A,
105     skcms_Signature_gXYZ = 0x6758595A,
106     skcms_Signature_bXYZ = 0x6258595A,
107 
108     skcms_Signature_A2B0 = 0x41324230,
109     skcms_Signature_A2B1 = 0x41324231,
110     skcms_Signature_mAB  = 0x6D414220,
111 
112     skcms_Signature_CHAD = 0x63686164,
113 
114     // Type signatures
115     skcms_Signature_curv = 0x63757276,
116     skcms_Signature_mft1 = 0x6D667431,
117     skcms_Signature_mft2 = 0x6D667432,
118     skcms_Signature_para = 0x70617261,
119     skcms_Signature_sf32 = 0x73663332,
120     // XYZ is also a PCS signature, so it's defined in skcms.h
121     // skcms_Signature_XYZ = 0x58595A20,
122 };
123 
read_big_u16(const uint8_t * ptr)124 static uint16_t read_big_u16(const uint8_t* ptr) {
125     uint16_t be;
126     memcpy(&be, ptr, sizeof(be));
127 #if defined(_MSC_VER)
128     return _byteswap_ushort(be);
129 #else
130     return __builtin_bswap16(be);
131 #endif
132 }
133 
read_big_u32(const uint8_t * ptr)134 static uint32_t read_big_u32(const uint8_t* ptr) {
135     uint32_t be;
136     memcpy(&be, ptr, sizeof(be));
137 #if defined(_MSC_VER)
138     return _byteswap_ulong(be);
139 #else
140     return __builtin_bswap32(be);
141 #endif
142 }
143 
read_big_i32(const uint8_t * ptr)144 static int32_t read_big_i32(const uint8_t* ptr) {
145     return (int32_t)read_big_u32(ptr);
146 }
147 
read_big_fixed(const uint8_t * ptr)148 static float read_big_fixed(const uint8_t* ptr) {
149     return read_big_i32(ptr) * (1.0f / 65536.0f);
150 }
151 
152 // Maps to an in-memory profile so that fields line up to the locations specified
153 // in ICC.1:2010, section 7.2
154 typedef struct {
155     uint8_t size                [ 4];
156     uint8_t cmm_type            [ 4];
157     uint8_t version             [ 4];
158     uint8_t profile_class       [ 4];
159     uint8_t data_color_space    [ 4];
160     uint8_t pcs                 [ 4];
161     uint8_t creation_date_time  [12];
162     uint8_t signature           [ 4];
163     uint8_t platform            [ 4];
164     uint8_t flags               [ 4];
165     uint8_t device_manufacturer [ 4];
166     uint8_t device_model        [ 4];
167     uint8_t device_attributes   [ 8];
168     uint8_t rendering_intent    [ 4];
169     uint8_t illuminant_X        [ 4];
170     uint8_t illuminant_Y        [ 4];
171     uint8_t illuminant_Z        [ 4];
172     uint8_t creator             [ 4];
173     uint8_t profile_id          [16];
174     uint8_t reserved            [28];
175     uint8_t tag_count           [ 4]; // Technically not part of header, but required
176 } header_Layout;
177 
178 typedef struct {
179     uint8_t signature [4];
180     uint8_t offset    [4];
181     uint8_t size      [4];
182 } tag_Layout;
183 
get_tag_table(const skcms_ICCProfile * profile)184 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
185     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
186 }
187 
188 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
189 // use of the type is for the CHAD tag that stores exactly nine values.
190 typedef struct {
191     uint8_t type     [ 4];
192     uint8_t reserved [ 4];
193     uint8_t values   [36];
194 } sf32_Layout;
195 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)196 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
197     skcms_ICCTag tag;
198     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
199         return false;
200     }
201 
202     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
203         return false;
204     }
205 
206     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
207     const uint8_t* values = sf32Tag->values;
208     for (int r = 0; r < 3; ++r)
209     for (int c = 0; c < 3; ++c, values += 4) {
210         m->vals[r][c] = read_big_fixed(values);
211     }
212     return true;
213 }
214 
215 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
216 // the type are for tags/data that store exactly one triple.
217 typedef struct {
218     uint8_t type     [4];
219     uint8_t reserved [4];
220     uint8_t X        [4];
221     uint8_t Y        [4];
222     uint8_t Z        [4];
223 } XYZ_Layout;
224 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)225 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
226     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
227         return false;
228     }
229 
230     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
231 
232     *x = read_big_fixed(xyzTag->X);
233     *y = read_big_fixed(xyzTag->Y);
234     *z = read_big_fixed(xyzTag->Z);
235     return true;
236 }
237 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)238 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
239                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
240     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
241            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
242            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
243 }
244 
tf_is_valid(const skcms_TransferFunction * tf)245 static bool tf_is_valid(const skcms_TransferFunction* tf) {
246     // Reject obviously malformed inputs
247     if (!isfinitef_(tf->a + tf->b + tf->c + tf->d + tf->e + tf->f + tf->g)) {
248         return false;
249     }
250 
251     // All of these parameters should be non-negative
252     if (tf->a < 0 || tf->c < 0 || tf->d < 0 || tf->g < 0) {
253         return false;
254     }
255 
256     return true;
257 }
258 
259 typedef struct {
260     uint8_t type          [4];
261     uint8_t reserved_a    [4];
262     uint8_t function_type [2];
263     uint8_t reserved_b    [2];
264     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
265 } para_Layout;
266 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)267 static bool read_curve_para(const uint8_t* buf, uint32_t size,
268                             skcms_Curve* curve, uint32_t* curve_size) {
269     if (size < SAFE_FIXED_SIZE(para_Layout)) {
270         return false;
271     }
272 
273     const para_Layout* paraTag = (const para_Layout*)buf;
274 
275     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
276     uint16_t function_type = read_big_u16(paraTag->function_type);
277     if (function_type > kGABCDEF) {
278         return false;
279     }
280 
281     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
282     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
283         return false;
284     }
285 
286     if (curve_size) {
287         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
288     }
289 
290     curve->table_entries = 0;
291     curve->parametric.a  = 1.0f;
292     curve->parametric.b  = 0.0f;
293     curve->parametric.c  = 0.0f;
294     curve->parametric.d  = 0.0f;
295     curve->parametric.e  = 0.0f;
296     curve->parametric.f  = 0.0f;
297     curve->parametric.g  = read_big_fixed(paraTag->variable);
298 
299     switch (function_type) {
300         case kGAB:
301             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
302             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
303             if (curve->parametric.a == 0) {
304                 return false;
305             }
306             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
307             break;
308         case kGABC:
309             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
310             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
311             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
312             if (curve->parametric.a == 0) {
313                 return false;
314             }
315             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
316             curve->parametric.f = curve->parametric.e;
317             break;
318         case kGABCD:
319             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
320             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
321             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
322             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
323             break;
324         case kGABCDEF:
325             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
326             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
327             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
328             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
329             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
330             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
331             break;
332     }
333     return tf_is_valid(&curve->parametric);
334 }
335 
336 typedef struct {
337     uint8_t type          [4];
338     uint8_t reserved      [4];
339     uint8_t value_count   [4];
340     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
341 } curv_Layout;
342 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)343 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
344                             skcms_Curve* curve, uint32_t* curve_size) {
345     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
346         return false;
347     }
348 
349     const curv_Layout* curvTag = (const curv_Layout*)buf;
350 
351     uint32_t value_count = read_big_u32(curvTag->value_count);
352     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
353         return false;
354     }
355 
356     if (curve_size) {
357         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
358     }
359 
360     if (value_count < 2) {
361         curve->table_entries = 0;
362         curve->parametric.a  = 1.0f;
363         curve->parametric.b  = 0.0f;
364         curve->parametric.c  = 0.0f;
365         curve->parametric.d  = 0.0f;
366         curve->parametric.e  = 0.0f;
367         curve->parametric.f  = 0.0f;
368         if (value_count == 0) {
369             // Empty tables are a shorthand for an identity curve
370             curve->parametric.g = 1.0f;
371         } else {
372             // Single entry tables are a shorthand for simple gamma
373             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
374         }
375     } else {
376         curve->table_8       = nullptr;
377         curve->table_16      = curvTag->variable;
378         curve->table_entries = value_count;
379     }
380 
381     return true;
382 }
383 
384 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
385 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)386 static bool read_curve(const uint8_t* buf, uint32_t size,
387                        skcms_Curve* curve, uint32_t* curve_size) {
388     if (!buf || size < 4 || !curve) {
389         return false;
390     }
391 
392     uint32_t type = read_big_u32(buf);
393     if (type == skcms_Signature_para) {
394         return read_curve_para(buf, size, curve, curve_size);
395     } else if (type == skcms_Signature_curv) {
396         return read_curve_curv(buf, size, curve, curve_size);
397     }
398 
399     return false;
400 }
401 
402 // mft1 and mft2 share a large chunk of data
403 typedef struct {
404     uint8_t type                 [ 4];
405     uint8_t reserved_a           [ 4];
406     uint8_t input_channels       [ 1];
407     uint8_t output_channels      [ 1];
408     uint8_t grid_points          [ 1];
409     uint8_t reserved_b           [ 1];
410     uint8_t matrix               [36];
411 } mft_CommonLayout;
412 
413 typedef struct {
414     mft_CommonLayout common      [1];
415 
416     uint8_t variable             [1/*variable*/];
417 } mft1_Layout;
418 
419 typedef struct {
420     mft_CommonLayout common      [1];
421 
422     uint8_t input_table_entries  [2];
423     uint8_t output_table_entries [2];
424     uint8_t variable             [1/*variable*/];
425 } mft2_Layout;
426 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)427 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
428     // MFT matrices are applied before the first set of curves, but must be identity unless the
429     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
430     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
431     // field/flag.
432     a2b->matrix_channels = 0;
433 
434     a2b->input_channels  = mftTag->input_channels[0];
435     a2b->output_channels = mftTag->output_channels[0];
436 
437     // We require exactly three (ie XYZ/Lab/RGB) output channels
438     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
439         return false;
440     }
441     // We require at least one, and no more than four (ie CMYK) input channels
442     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
443         return false;
444     }
445 
446     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
447         a2b->grid_points[i] = mftTag->grid_points[0];
448     }
449     // The grid only makes sense with at least two points along each axis
450     if (a2b->grid_points[0] < 2) {
451         return false;
452     }
453 
454     return true;
455 }
456 
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)457 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
458                             uint32_t input_table_entries, uint32_t output_table_entries,
459                             skcms_A2B* a2b) {
460     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
461     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
462     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
463 
464     // [input|output]_channels are <= 4, so still no overflow
465     uint32_t byte_len_all_input_tables  = a2b->input_channels * byte_len_per_input_table;
466     uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
467 
468     uint64_t grid_size = a2b->output_channels * byte_width;
469     for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
470         grid_size *= a2b->grid_points[axis];
471     }
472 
473     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
474         return false;
475     }
476 
477     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
478         a2b->input_curves[i].table_entries = input_table_entries;
479         if (byte_width == 1) {
480             a2b->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
481             a2b->input_curves[i].table_16 = nullptr;
482         } else {
483             a2b->input_curves[i].table_8  = nullptr;
484             a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
485         }
486     }
487 
488     if (byte_width == 1) {
489         a2b->grid_8  = table_base + byte_len_all_input_tables;
490         a2b->grid_16 = nullptr;
491     } else {
492         a2b->grid_8  = nullptr;
493         a2b->grid_16 = table_base + byte_len_all_input_tables;
494     }
495 
496     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
497     for (uint32_t i = 0; i < a2b->output_channels; ++i) {
498         a2b->output_curves[i].table_entries = output_table_entries;
499         if (byte_width == 1) {
500             a2b->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
501             a2b->output_curves[i].table_16 = nullptr;
502         } else {
503             a2b->output_curves[i].table_8  = nullptr;
504             a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
505         }
506     }
507 
508     return true;
509 }
510 
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)511 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
512     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
513         return false;
514     }
515 
516     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
517     if (!read_mft_common(mftTag->common, a2b)) {
518         return false;
519     }
520 
521     uint32_t input_table_entries  = 256;
522     uint32_t output_table_entries = 256;
523 
524     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
525                            input_table_entries, output_table_entries, a2b);
526 }
527 
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)528 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
529     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
530         return false;
531     }
532 
533     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
534     if (!read_mft_common(mftTag->common, a2b)) {
535         return false;
536     }
537 
538     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
539     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
540 
541     // ICC spec mandates that 2 <= table_entries <= 4096
542     if (input_table_entries < 2 || input_table_entries > 4096 ||
543         output_table_entries < 2 || output_table_entries > 4096) {
544         return false;
545     }
546 
547     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
548                            input_table_entries, output_table_entries, a2b);
549 }
550 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)551 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
552                         uint32_t num_curves, skcms_Curve* curves) {
553     for (uint32_t i = 0; i < num_curves; ++i) {
554         if (curve_offset > size) {
555             return false;
556         }
557 
558         uint32_t curve_bytes;
559         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
560             return false;
561         }
562 
563         if (curve_bytes > UINT32_MAX - 3) {
564             return false;
565         }
566         curve_bytes = (curve_bytes + 3) & ~3U;
567 
568         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
569         curve_offset = (uint32_t)new_offset_64;
570         if (new_offset_64 != curve_offset) {
571             return false;
572         }
573     }
574 
575     return true;
576 }
577 
578 typedef struct {
579     uint8_t type                 [ 4];
580     uint8_t reserved_a           [ 4];
581     uint8_t input_channels       [ 1];
582     uint8_t output_channels      [ 1];
583     uint8_t reserved_b           [ 2];
584     uint8_t b_curve_offset       [ 4];
585     uint8_t matrix_offset        [ 4];
586     uint8_t m_curve_offset       [ 4];
587     uint8_t clut_offset          [ 4];
588     uint8_t a_curve_offset       [ 4];
589 } mAB_Layout;
590 
591 typedef struct {
592     uint8_t grid_points          [16];
593     uint8_t grid_byte_width      [ 1];
594     uint8_t reserved             [ 3];
595     uint8_t variable             [1/*variable*/];
596 } mABCLUT_Layout;
597 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)598 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
599     if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
600         return false;
601     }
602 
603     const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
604 
605     a2b->input_channels  = mABTag->input_channels[0];
606     a2b->output_channels = mABTag->output_channels[0];
607 
608     // We require exactly three (ie XYZ/Lab/RGB) output channels
609     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
610         return false;
611     }
612     // We require no more than four (ie CMYK) input channels
613     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
614         return false;
615     }
616 
617     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
618     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
619     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
620     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
621     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
622 
623     // "B" curves must be present
624     if (0 == b_curve_offset) {
625         return false;
626     }
627 
628     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
629                      a2b->output_curves)) {
630         return false;
631     }
632 
633     // "M" curves and Matrix must be used together
634     if (0 != m_curve_offset) {
635         if (0 == matrix_offset) {
636             return false;
637         }
638         a2b->matrix_channels = a2b->output_channels;
639         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
640                          a2b->matrix_curves)) {
641             return false;
642         }
643 
644         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
645         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
646             return false;
647         }
648         float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
649         const uint8_t* mtx_buf = tag->buf + matrix_offset;
650         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
651         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
652         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
653         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
654         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
655         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
656         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
657         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
658         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
659         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
660         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
661         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
662     } else {
663         if (0 != matrix_offset) {
664             return false;
665         }
666         a2b->matrix_channels = 0;
667     }
668 
669     // "A" curves and CLUT must be used together
670     if (0 != a_curve_offset) {
671         if (0 == clut_offset) {
672             return false;
673         }
674         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
675                          a2b->input_curves)) {
676             return false;
677         }
678 
679         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
680             return false;
681         }
682         const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
683 
684         if (clut->grid_byte_width[0] == 1) {
685             a2b->grid_8  = clut->variable;
686             a2b->grid_16 = nullptr;
687         } else if (clut->grid_byte_width[0] == 2) {
688             a2b->grid_8  = nullptr;
689             a2b->grid_16 = clut->variable;
690         } else {
691             return false;
692         }
693 
694         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
695         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
696             a2b->grid_points[i] = clut->grid_points[i];
697             // The grid only makes sense with at least two points along each axis
698             if (a2b->grid_points[i] < 2) {
699                 return false;
700             }
701             grid_size *= a2b->grid_points[i];
702         }
703         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
704             return false;
705         }
706     } else {
707         if (0 != clut_offset) {
708             return false;
709         }
710 
711         // If there is no CLUT, the number of input and output channels must match
712         if (a2b->input_channels != a2b->output_channels) {
713             return false;
714         }
715 
716         // Zero out the number of input channels to signal that we're skipping this stage
717         a2b->input_channels = 0;
718     }
719 
720     return true;
721 }
722 
723 // If you pass f, we'll fit a possibly-non-zero value for *f.
724 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)725 static int fit_linear(const skcms_Curve* curve, int N, float tol,
726                       float* c, float* d, float* f = nullptr) {
727     assert(N > 1);
728     // We iteratively fit the first points to the TF's linear piece.
729     // We want the cx + f line to pass through the first and last points we fit exactly.
730     //
731     // As we walk along the points we find the minimum and maximum slope of the line before the
732     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
733     // emtpy, when we definitely can't add any more points.
734     //
735     // Some points' error intervals may intersect the running interval but not lie fully
736     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
737     // and once the search is done, back up to build the line through *that* point.
738     const float dx = 1.0f / (N - 1);
739 
740     int lin_points = 1;
741 
742     float f_zero = 0.0f;
743     if (f) {
744         *f = eval_curve(curve, 0);
745     } else {
746         f = &f_zero;
747     }
748 
749 
750     float slope_min = -INFINITY_;
751     float slope_max = +INFINITY_;
752     for (int i = 1; i < N; ++i) {
753         float x = i * dx;
754         float y = eval_curve(curve, x);
755 
756         float slope_max_i = (y + tol - *f) / x,
757               slope_min_i = (y - tol - *f) / x;
758         if (slope_max_i < slope_min || slope_max < slope_min_i) {
759             // Slope intervals would no longer overlap.
760             break;
761         }
762         slope_max = fminf_(slope_max, slope_max_i);
763         slope_min = fmaxf_(slope_min, slope_min_i);
764 
765         float cur_slope = (y - *f) / x;
766         if (slope_min <= cur_slope && cur_slope <= slope_max) {
767             lin_points = i + 1;
768             *c = cur_slope;
769         }
770     }
771 
772     // Set D to the last point that met our tolerance.
773     *d = (lin_points - 1) * dx;
774     return lin_points;
775 }
776 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)777 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
778     bool ok = false;
779     if (tag->type == skcms_Signature_mft1) {
780         ok = read_tag_mft1(tag, a2b);
781     } else if (tag->type == skcms_Signature_mft2) {
782         ok = read_tag_mft2(tag, a2b);
783     } else if (tag->type == skcms_Signature_mAB) {
784         ok = read_tag_mab(tag, a2b, pcs_is_xyz);
785     }
786     if (!ok) {
787         return false;
788     }
789 
790     // Detect and canonicalize identity tables.
791     skcms_Curve* curves[] = {
792         a2b->input_channels  > 0 ? a2b->input_curves  + 0 : nullptr,
793         a2b->input_channels  > 1 ? a2b->input_curves  + 1 : nullptr,
794         a2b->input_channels  > 2 ? a2b->input_curves  + 2 : nullptr,
795         a2b->input_channels  > 3 ? a2b->input_curves  + 3 : nullptr,
796         a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
797         a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
798         a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
799         a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
800         a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
801         a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
802     };
803 
804     for (int i = 0; i < ARRAY_COUNT(curves); i++) {
805         skcms_Curve* curve = curves[i];
806 
807         if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
808             int N = (int)curve->table_entries;
809 
810             float c = 0.0f, d = 0.0f, f = 0.0f;
811             if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
812                 && c == 1.0f
813                 && f == 0.0f) {
814                 curve->table_entries = 0;
815                 curve->table_8       = nullptr;
816                 curve->table_16      = nullptr;
817                 curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
818             }
819         }
820     }
821 
822     return true;
823 }
824 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)825 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
826     if (!profile || !profile->buffer || !tag) { return; }
827     if (idx > profile->tag_count) { return; }
828     const tag_Layout* tags = get_tag_table(profile);
829     tag->signature = read_big_u32(tags[idx].signature);
830     tag->size      = read_big_u32(tags[idx].size);
831     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
832     tag->type      = read_big_u32(tag->buf);
833 }
834 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)835 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
836     if (!profile || !profile->buffer || !tag) { return false; }
837     const tag_Layout* tags = get_tag_table(profile);
838     for (uint32_t i = 0; i < profile->tag_count; ++i) {
839         if (read_big_u32(tags[i].signature) == sig) {
840             tag->signature = sig;
841             tag->size      = read_big_u32(tags[i].size);
842             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
843             tag->type      = read_big_u32(tag->buf);
844             return true;
845         }
846     }
847     return false;
848 }
849 
usable_as_src(const skcms_ICCProfile * profile)850 static bool usable_as_src(const skcms_ICCProfile* profile) {
851     return profile->has_A2B
852        || (profile->has_trc && profile->has_toXYZD50);
853 }
854 
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)855 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
856     assert(SAFE_SIZEOF(header_Layout) == 132);
857 
858     if (!profile) {
859         return false;
860     }
861     memset(profile, 0, SAFE_SIZEOF(*profile));
862 
863     if (len < SAFE_SIZEOF(header_Layout)) {
864         return false;
865     }
866 
867     // Byte-swap all header fields
868     const header_Layout* header  = (const header_Layout*)buf;
869     profile->buffer              = (const uint8_t*)buf;
870     profile->size                = read_big_u32(header->size);
871     uint32_t version             = read_big_u32(header->version);
872     profile->data_color_space    = read_big_u32(header->data_color_space);
873     profile->pcs                 = read_big_u32(header->pcs);
874     uint32_t signature           = read_big_u32(header->signature);
875     float illuminant_X           = read_big_fixed(header->illuminant_X);
876     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
877     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
878     profile->tag_count           = read_big_u32(header->tag_count);
879 
880     // Validate signature, size (smaller than buffer, large enough to hold tag table),
881     // and major version
882     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
883     if (signature != skcms_Signature_acsp ||
884         profile->size > len ||
885         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
886         (version >> 24) > 4) {
887         return false;
888     }
889 
890     // Validate that illuminant is D50 white
891     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
892         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
893         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
894         return false;
895     }
896 
897     // Validate that all tag entries have sane offset + size
898     const tag_Layout* tags = get_tag_table(profile);
899     for (uint32_t i = 0; i < profile->tag_count; ++i) {
900         uint32_t tag_offset = read_big_u32(tags[i].offset);
901         uint32_t tag_size   = read_big_u32(tags[i].size);
902         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
903         if (tag_size < 4 || tag_end > profile->size) {
904             return false;
905         }
906     }
907 
908     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
909         return false;
910     }
911 
912     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
913 
914     // Pre-parse commonly used tags.
915     skcms_ICCTag kTRC;
916     if (profile->data_color_space == skcms_Signature_Gray &&
917         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
918         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
919             // Malformed tag
920             return false;
921         }
922         profile->trc[1] = profile->trc[0];
923         profile->trc[2] = profile->trc[0];
924         profile->has_trc = true;
925 
926         if (pcs_is_xyz) {
927             profile->toXYZD50.vals[0][0] = illuminant_X;
928             profile->toXYZD50.vals[1][1] = illuminant_Y;
929             profile->toXYZD50.vals[2][2] = illuminant_Z;
930             profile->has_toXYZD50 = true;
931         }
932     } else {
933         skcms_ICCTag rTRC, gTRC, bTRC;
934         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
935             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
936             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
937             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
938                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
939                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
940                 // Malformed TRC tags
941                 return false;
942             }
943             profile->has_trc = true;
944         }
945 
946         skcms_ICCTag rXYZ, gXYZ, bXYZ;
947         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
948             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
949             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
950             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
951                 // Malformed XYZ tags
952                 return false;
953             }
954             profile->has_toXYZD50 = true;
955         }
956     }
957 
958     skcms_ICCTag a2b_tag;
959 
960     // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
961     // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
962     // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
963     // and all our known users are thinking exclusively in terms of relative colormetric.
964     const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
965     for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
966         if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
967             if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
968                 // Malformed A2B tag
969                 return false;
970             }
971             profile->has_A2B = true;
972             break;
973         }
974     }
975 
976     return usable_as_src(profile);
977 }
978 
979 
skcms_sRGB_profile()980 const skcms_ICCProfile* skcms_sRGB_profile() {
981     static const skcms_ICCProfile sRGB_profile = {
982         nullptr,               // buffer, moot here
983 
984         0,                     // size, moot here
985         skcms_Signature_RGB,   // data_color_space
986         skcms_Signature_XYZ,   // pcs
987         0,                     // tag count, moot here
988 
989         // We choose to represent sRGB with its canonical transfer function,
990         // and with its canonical XYZD50 gamut matrix.
991         true,  // has_trc, followed by the 3 trc curves
992         {
993             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
994             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
995             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
996         },
997 
998         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
999         {{
1000             { 0.436065674f, 0.385147095f, 0.143066406f },
1001             { 0.222488403f, 0.716873169f, 0.060607910f },
1002             { 0.013916016f, 0.097076416f, 0.714096069f },
1003         }},
1004 
1005         false, // has_A2B, followed by a2b itself which we don't care about.
1006         {
1007             0,
1008             {
1009                 {{0, {0,0, 0,0,0,0,0}}},
1010                 {{0, {0,0, 0,0,0,0,0}}},
1011                 {{0, {0,0, 0,0,0,0,0}}},
1012                 {{0, {0,0, 0,0,0,0,0}}},
1013             },
1014             {0,0,0,0},
1015             nullptr,
1016             nullptr,
1017 
1018             0,
1019             {
1020                 {{0, {0,0, 0,0,0,0,0}}},
1021                 {{0, {0,0, 0,0,0,0,0}}},
1022                 {{0, {0,0, 0,0,0,0,0}}},
1023             },
1024             {{
1025                 { 0,0,0,0 },
1026                 { 0,0,0,0 },
1027                 { 0,0,0,0 },
1028             }},
1029 
1030             0,
1031             {
1032                 {{0, {0,0, 0,0,0,0,0}}},
1033                 {{0, {0,0, 0,0,0,0,0}}},
1034                 {{0, {0,0, 0,0,0,0,0}}},
1035             },
1036         },
1037     };
1038     return &sRGB_profile;
1039 }
1040 
skcms_XYZD50_profile()1041 const skcms_ICCProfile* skcms_XYZD50_profile() {
1042     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1043     static const skcms_ICCProfile XYZD50_profile = {
1044         nullptr,               // buffer, moot here
1045 
1046         0,                     // size, moot here
1047         skcms_Signature_RGB,   // data_color_space
1048         skcms_Signature_XYZ,   // pcs
1049         0,                     // tag count, moot here
1050 
1051         true,  // has_trc, followed by the 3 trc curves
1052         {
1053             {{0, {1,1, 0,0,0,0,0}}},
1054             {{0, {1,1, 0,0,0,0,0}}},
1055             {{0, {1,1, 0,0,0,0,0}}},
1056         },
1057 
1058         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1059         {{
1060             { 1,0,0 },
1061             { 0,1,0 },
1062             { 0,0,1 },
1063         }},
1064 
1065         false, // has_A2B, followed by a2b itself which we don't care about.
1066         {
1067             0,
1068             {
1069                 {{0, {0,0, 0,0,0,0,0}}},
1070                 {{0, {0,0, 0,0,0,0,0}}},
1071                 {{0, {0,0, 0,0,0,0,0}}},
1072                 {{0, {0,0, 0,0,0,0,0}}},
1073             },
1074             {0,0,0,0},
1075             nullptr,
1076             nullptr,
1077 
1078             0,
1079             {
1080                 {{0, {0,0, 0,0,0,0,0}}},
1081                 {{0, {0,0, 0,0,0,0,0}}},
1082                 {{0, {0,0, 0,0,0,0,0}}},
1083             },
1084             {{
1085                 { 0,0,0,0 },
1086                 { 0,0,0,0 },
1087                 { 0,0,0,0 },
1088             }},
1089 
1090             0,
1091             {
1092                 {{0, {0,0, 0,0,0,0,0}}},
1093                 {{0, {0,0, 0,0,0,0,0}}},
1094                 {{0, {0,0, 0,0,0,0,0}}},
1095             },
1096         },
1097     };
1098 
1099     return &XYZD50_profile;
1100 }
1101 
skcms_sRGB_TransferFunction()1102 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1103     return &skcms_sRGB_profile()->trc[0].parametric;
1104 }
1105 
skcms_sRGB_Inverse_TransferFunction()1106 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1107     static const skcms_TransferFunction sRGB_inv =
1108         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1109     return &sRGB_inv;
1110 }
1111 
skcms_Identity_TransferFunction()1112 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1113     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1114     return &identity;
1115 }
1116 
1117 const uint8_t skcms_252_random_bytes[] = {
1118     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1119     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1120     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1121     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1122     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1123     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1124     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1125     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1126     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1127     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1128     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1129     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1130     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1131     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1132     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1133     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1134 };
1135 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1136 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1137     // Test for exactly equal profiles first.
1138     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1139         return true;
1140     }
1141 
1142     // For now this is the essentially the same strategy we use in test_only.c
1143     // for our skcms_Transform() smoke tests:
1144     //    1) transform A to XYZD50
1145     //    2) transform B to XYZD50
1146     //    3) return true if they're similar enough
1147     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1148 
1149     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1150     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1151 
1152     if (A->data_color_space != B->data_color_space) {
1153         return false;
1154     }
1155 
1156     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1157     // TODO: working with RGBA_8888 either way is probably fastest.
1158     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1159     size_t npixels = 84;
1160     if (A->data_color_space == skcms_Signature_CMYK) {
1161         fmt = skcms_PixelFormat_RGBA_8888;
1162         npixels = 63;
1163     }
1164 
1165     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1166     // use pre-canned results and skip that skcms_Transform() call?
1167     uint8_t dstA[252],
1168             dstB[252];
1169     if (!skcms_Transform(
1170                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1171                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1172                 npixels)) {
1173         return false;
1174     }
1175     if (!skcms_Transform(
1176                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1177                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1178                 npixels)) {
1179         return false;
1180     }
1181 
1182     // TODO: make sure this final check has reasonable codegen.
1183     for (size_t i = 0; i < 252; i++) {
1184         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1185             return false;
1186         }
1187     }
1188     return true;
1189 }
1190 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1191 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1192                                       const skcms_TransferFunction* inv_tf) {
1193     if (!profile || !profile->has_trc) {
1194         return false;
1195     }
1196 
1197     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1198            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1199            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1200 }
1201 
is_zero_to_one(float x)1202 static bool is_zero_to_one(float x) {
1203     return 0 <= x && x <= 1;
1204 }
1205 
1206 typedef struct { float vals[3]; } skcms_Vector3;
1207 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1208 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1209     skcms_Vector3 dst = {{0,0,0}};
1210     for (int row = 0; row < 3; ++row) {
1211         dst.vals[row] = m->vals[row][0] * v->vals[0]
1212                       + m->vals[row][1] * v->vals[1]
1213                       + m->vals[row][2] * v->vals[2];
1214     }
1215     return dst;
1216 }
1217 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1218 bool skcms_PrimariesToXYZD50(float rx, float ry,
1219                              float gx, float gy,
1220                              float bx, float by,
1221                              float wx, float wy,
1222                              skcms_Matrix3x3* toXYZD50) {
1223     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1224         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1225         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1226         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1227         !toXYZD50) {
1228         return false;
1229     }
1230 
1231     // First, we need to convert xy values (primaries) to XYZ.
1232     skcms_Matrix3x3 primaries = {{
1233         { rx, gx, bx },
1234         { ry, gy, by },
1235         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1236     }};
1237     skcms_Matrix3x3 primaries_inv;
1238     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1239         return false;
1240     }
1241 
1242     // Assumes that Y is 1.0f.
1243     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1244     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1245 
1246     skcms_Matrix3x3 toXYZ = {{
1247         { XYZ.vals[0],           0,           0 },
1248         {           0, XYZ.vals[1],           0 },
1249         {           0,           0, XYZ.vals[2] },
1250     }};
1251     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1252 
1253     // Now convert toXYZ matrix to toXYZD50.
1254     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1255 
1256     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1257     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1258     // to be the best.
1259     skcms_Matrix3x3 xyz_to_lms = {{
1260         {  0.8951f,  0.2664f, -0.1614f },
1261         { -0.7502f,  1.7135f,  0.0367f },
1262         {  0.0389f, -0.0685f,  1.0296f },
1263     }};
1264     skcms_Matrix3x3 lms_to_xyz = {{
1265         {  0.9869929f, -0.1470543f, 0.1599627f },
1266         {  0.4323053f,  0.5183603f, 0.0492912f },
1267         { -0.0085287f,  0.0400428f, 0.9684867f },
1268     }};
1269 
1270     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1271     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1272 
1273     skcms_Matrix3x3 DXtoD50 = {{
1274         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1275         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1276         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1277     }};
1278     DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms);
1279     DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50);
1280 
1281     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1282     return true;
1283 }
1284 
1285 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1286 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1287     double a00 = src->vals[0][0],
1288            a01 = src->vals[1][0],
1289            a02 = src->vals[2][0],
1290            a10 = src->vals[0][1],
1291            a11 = src->vals[1][1],
1292            a12 = src->vals[2][1],
1293            a20 = src->vals[0][2],
1294            a21 = src->vals[1][2],
1295            a22 = src->vals[2][2];
1296 
1297     double b0 = a00*a11 - a01*a10,
1298            b1 = a00*a12 - a02*a10,
1299            b2 = a01*a12 - a02*a11,
1300            b3 = a20,
1301            b4 = a21,
1302            b5 = a22;
1303 
1304     double determinant = b0*b5
1305                        - b1*b4
1306                        + b2*b3;
1307 
1308     if (determinant == 0) {
1309         return false;
1310     }
1311 
1312     double invdet = 1.0 / determinant;
1313     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1314         return false;
1315     }
1316 
1317     b0 *= invdet;
1318     b1 *= invdet;
1319     b2 *= invdet;
1320     b3 *= invdet;
1321     b4 *= invdet;
1322     b5 *= invdet;
1323 
1324     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1325     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1326     dst->vals[2][0] = (float)(        +     b2 );
1327     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1328     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1329     dst->vals[2][1] = (float)(        -     b1 );
1330     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1331     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1332     dst->vals[2][2] = (float)(        +     b0 );
1333 
1334     for (int r = 0; r < 3; ++r)
1335     for (int c = 0; c < 3; ++c) {
1336         if (!isfinitef_(dst->vals[r][c])) {
1337             return false;
1338         }
1339     }
1340     return true;
1341 }
1342 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1343 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1344     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1345     for (int r = 0; r < 3; r++)
1346         for (int c = 0; c < 3; c++) {
1347             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1348                          + A->vals[r][1] * B->vals[1][c]
1349                          + A->vals[r][2] * B->vals[2][c];
1350         }
1351     return m;
1352 }
1353 
1354 #if defined(__clang__) || defined(__GNUC__)
1355     #define small_memcpy __builtin_memcpy
1356 #else
1357     #define small_memcpy memcpy
1358 #endif
1359 
log2f_(float x)1360 static float log2f_(float x) {
1361     // The first approximation of log2(x) is its exponent 'e', minus 127.
1362     int32_t bits;
1363     small_memcpy(&bits, &x, sizeof(bits));
1364 
1365     float e = (float)bits * (1.0f / (1<<23));
1366 
1367     // If we use the mantissa too we can refine the error signficantly.
1368     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
1369     float m;
1370     small_memcpy(&m, &m_bits, sizeof(m));
1371 
1372     return (e - 124.225514990f
1373               -   1.498030302f*m
1374               -   1.725879990f/(0.3520887068f + m));
1375 }
1376 
exp2f_(float x)1377 static float exp2f_(float x) {
1378     float fract = x - floorf_(x);
1379 
1380     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
1381                                         -   1.490129070f*fract
1382                                         +  27.728023300f/(4.84252568f - fract));
1383     if (fbits > INT_MAX) {
1384         return INFINITY_;
1385     } else if (fbits < INT_MIN) {
1386         return -INFINITY_;
1387     }
1388     int32_t bits = (int32_t)fbits;
1389     small_memcpy(&x, &bits, sizeof(x));
1390     return x;
1391 }
1392 
powf_(float x,float y)1393 float powf_(float x, float y) {
1394     return (x == 0) || (x == 1) ? x
1395                                 : exp2f_(log2f_(x) * y);
1396 }
1397 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)1398 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
1399     float sign = x < 0 ? -1.0f : 1.0f;
1400     x *= sign;
1401 
1402     return sign * (x < tf->d ? tf->c * x + tf->f
1403                              : powf_(tf->a * x + tf->b, tf->g) + tf->e);
1404 }
1405 
1406 #if defined(__clang__)
1407     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by tf_is_valid() on the way out.
1408 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1409 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1410     if (!tf_is_valid(src)) {
1411         return false;
1412     }
1413 
1414     // We're inverting this function, solving for x in terms of y.
1415     //   y = (cx + f)         x < d
1416     //       (ax + b)^g + e   x ≥ d
1417     // The inverse of this function can be expressed in the same piecewise form.
1418     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1419 
1420     // We'll start by finding the new threshold inv.d.
1421     // In principle we should be able to find that by solving for y at x=d from either side.
1422     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1423     float d_l =       src->c * src->d + src->f,
1424           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1425     if (fabsf_(d_l - d_r) > 1/512.0f) {
1426         return false;
1427     }
1428     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1429 
1430     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1431     if (inv.d > 0) {
1432         // Inverting the linear section is pretty straightfoward:
1433         //        y       = cx + f
1434         //        y - f   = cx
1435         //   (1/c)y - f/c = x
1436         inv.c =    1.0f/src->c;
1437         inv.f = -src->f/src->c;
1438     }
1439 
1440     // The interesting part is inverting the nonlinear section:
1441     //         y                = (ax + b)^g + e.
1442     //         y - e            = (ax + b)^g
1443     //        (y - e)^1/g       =  ax + b
1444     //        (y - e)^1/g - b   =  ax
1445     //   (1/a)(y - e)^1/g - b/a =   x
1446     //
1447     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1448     //   let k = (1/a)^g
1449     //   (1/a)( y -  e)^1/g - b/a = x
1450     //        (ky - ke)^1/g - b/a = x
1451 
1452     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1453     inv.g = 1.0f / src->g;
1454     inv.a = k;
1455     inv.b = -k * src->e;
1456     inv.e = -src->b / src->a;
1457 
1458     // Now in principle we're done.
1459     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f,
1460     // we'll tweak e.  These two values should be close to each other,
1461     // just down to numerical precision issues, especially from powf_.
1462     float s = powf_(src->a + src->b, src->g) + src->e;
1463     inv.e = 1.0f - powf_(inv.a * s + inv.b, inv.g);
1464 
1465     *dst = inv;
1466     return tf_is_valid(dst);
1467 }
1468 
1469 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1470 
1471 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1472 //
1473 //   tf(x) =  cx + f          x < d
1474 //   tf(x) = (ax + b)^g + e   x ≥ d
1475 //
1476 // When fitting, we add the additional constraint that both pieces meet at d:
1477 //
1478 //   cd + f = (ad + b)^g + e
1479 //
1480 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1481 //
1482 //   tf(x) =                           cx + f   x < d
1483 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1484 //
1485 // Our overall strategy is then:
1486 //    For a couple tolerances,
1487 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1488 //       - invert c,d,f
1489 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1490 //                          (and by constraint, inverted e) to the inverse of the table.
1491 //    Return the parameters with least maximum error.
1492 //
1493 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1494 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1495 //
1496 //    let y = Table(x)
1497 //    r(x) = x - f_inv(y)
1498 //
1499 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
1500 //          - ln(ad + b)*(ad + b)^g
1501 //    ∂r/∂a = yg(ay + b)^(g-1)
1502 //          - dg(ad + b)^(g-1)
1503 //    ∂r/∂b =  g(ay + b)^(g-1)
1504 //          -  g(ad + b)^(g-1)
1505 
1506 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1507 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,const float P[3],float dfdP[3])1508 static float rg_nonlinear(float x,
1509                           const skcms_Curve* curve,
1510                           const skcms_TransferFunction* tf,
1511                           const float P[3],
1512                           float dfdP[3]) {
1513     const float y = eval_curve(curve, x);
1514 
1515     const float g = P[0],  a = P[1],  b = P[2],
1516                 c = tf->c, d = tf->d, f = tf->f;
1517 
1518     const float Y = fmaxf_(a*y + b, 0.0f),
1519                 D =        a*d + b;
1520     assert (D >= 0);
1521 
1522     // The gradient.
1523     dfdP[0] = 0.69314718f*log2f_(Y)*powf_(Y, g)
1524             - 0.69314718f*log2f_(D)*powf_(D, g);
1525     dfdP[1] = y*g*powf_(Y, g-1)
1526             - d*g*powf_(D, g-1);
1527     dfdP[2] =   g*powf_(Y, g-1)
1528             -   g*powf_(D, g-1);
1529 
1530     // The residual.
1531     const float f_inv = powf_(Y, g)
1532                       - powf_(D, g)
1533                       + c*d + f;
1534     return x - f_inv;
1535 }
1536 
gauss_newton_step(const skcms_Curve * curve,const skcms_TransferFunction * tf,float P[3],float x0,float dx,int N)1537 static bool gauss_newton_step(const skcms_Curve* curve,
1538                               const skcms_TransferFunction* tf,
1539                               float P[3],
1540                               float x0, float dx, int N) {
1541     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1542     //
1543     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1544     //   where r(P) is the residual vector
1545     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
1546     //
1547     // Let's review the shape of each of these expressions:
1548     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
1549     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
1550     //   Jf^T   is [3 x N], the transpose of Jf
1551     //
1552     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1553     //                                              and so is its inverse (Jf^T Jf)^-1
1554     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1555     //
1556     // Our implementation strategy to get to the final ∆P is
1557     //   1) evaluate Jf^T Jf,   call that lhs
1558     //   2) evaluate Jf^T r(P), call that rhs
1559     //   3) invert lhs
1560     //   4) multiply inverse lhs by rhs
1561     //
1562     // This is a friendly implementation strategy because we don't have to have any
1563     // buffers that scale with N, and equally nice don't have to perform any matrix
1564     // operations that are variable size.
1565     //
1566     // Other implementation strategies could trade this off, e.g. evaluating the
1567     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1568     // the residuals.  That would probably require implementing singular value
1569     // decomposition, and would create a [3 x N] matrix to be multiplied by the
1570     // [N x 1] residual vector, but on the upside I think that'd eliminate the
1571     // possibility of this gauss_newton_step() function ever failing.
1572 
1573     // 0) start off with lhs and rhs safely zeroed.
1574     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1575     skcms_Vector3   rhs = {  {0,0,0} };
1576 
1577     // 1,2) evaluate lhs and evaluate rhs
1578     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1579     //   so we'll have to update lhs and rhs at the same time.
1580     for (int i = 0; i < N; i++) {
1581         float x = x0 + i*dx;
1582 
1583         float dfdP[3] = {0,0,0};
1584         float resid = rg_nonlinear(x,curve,tf,P, dfdP);
1585 
1586         for (int r = 0; r < 3; r++) {
1587             for (int c = 0; c < 3; c++) {
1588                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1589             }
1590             rhs.vals[r] += dfdP[r] * resid;
1591         }
1592     }
1593 
1594     // If any of the 3 P parameters are unused, this matrix will be singular.
1595     // Detect those cases and fix them up to indentity instead, so we can invert.
1596     for (int k = 0; k < 3; k++) {
1597         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1598             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1599             lhs.vals[k][k] = 1;
1600         }
1601     }
1602 
1603     // 3) invert lhs
1604     skcms_Matrix3x3 lhs_inv;
1605     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1606         return false;
1607     }
1608 
1609     // 4) multiply inverse lhs by rhs
1610     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1611     P[0] += dP.vals[0];
1612     P[1] += dP.vals[1];
1613     P[2] += dP.vals[2];
1614     return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]);
1615 }
1616 
1617 
1618 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1619 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1620     float P[3] = { tf->g, tf->a, tf->b };
1621 
1622     // No matter where we start, dx should always represent N even steps from 0 to 1.
1623     const float dx = 1.0f / (N-1);
1624 
1625     for (int j = 0; j < 3/*TODO: tune*/; j++) {
1626         // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
1627         // We don't really know how to fix up a if it goes negative.
1628         if (P[1] < 0) {
1629             return false;
1630         }
1631         // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
1632         if (P[1] * tf->d + P[2] < 0) {
1633             P[2] = -P[1] * tf->d;
1634         }
1635         assert (P[1] >= 0 &&
1636                 P[1] * tf->d + P[2] >= 0);
1637 
1638         if (!gauss_newton_step(curve, tf,
1639                                P,
1640                                L*dx, dx, N-L)) {
1641             return false;
1642         }
1643     }
1644 
1645     // We need to apply our fixups one last time
1646     if (P[1] < 0) {
1647         return false;
1648     }
1649     if (P[1] * tf->d + P[2] < 0) {
1650         P[2] = -P[1] * tf->d;
1651     }
1652 
1653     tf->g = P[0];
1654     tf->a = P[1];
1655     tf->b = P[2];
1656     tf->e =   tf->c*tf->d + tf->f
1657       - powf_(tf->a*tf->d + tf->b, tf->g);
1658     return true;
1659 }
1660 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1661 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1662                             skcms_TransferFunction* approx,
1663                             float* max_error) {
1664     if (!curve || !approx || !max_error) {
1665         return false;
1666     }
1667 
1668     if (curve->table_entries == 0) {
1669         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1670         return false;
1671     }
1672 
1673     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1674         // We need at least two points, and must put some reasonable cap on the maximum number.
1675         return false;
1676     }
1677 
1678     int N = (int)curve->table_entries;
1679     const float dx = 1.0f / (N - 1);
1680 
1681     *max_error = INFINITY_;
1682     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1683     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1684         skcms_TransferFunction tf,
1685                                tf_inv;
1686 
1687         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1688         tf.f = 0.0f;
1689         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1690 
1691         if (L == N) {
1692             // If the entire data set was linear, move the coefficients to the nonlinear portion
1693             // with G == 1.  This lets use a canonical representation with d == 0.
1694             tf.g = 1;
1695             tf.a = tf.c;
1696             tf.b = tf.f;
1697             tf.c = tf.d = tf.e = tf.f = 0;
1698         } else if (L == N - 1) {
1699             // Degenerate case with only two points in the nonlinear segment. Solve directly.
1700             tf.g = 1;
1701             tf.a = (eval_curve(curve, (N-1)*dx) -
1702                     eval_curve(curve, (N-2)*dx))
1703                  / dx;
1704             tf.b = eval_curve(curve, (N-2)*dx)
1705                  - tf.a * (N-2)*dx;
1706             tf.e = 0;
1707         } else {
1708             // Start by guessing a gamma-only curve through the midpoint.
1709             int mid = (L + N) / 2;
1710             float mid_x = mid / (N - 1.0f);
1711             float mid_y = eval_curve(curve, mid_x);
1712             tf.g = log2f_(mid_y) / log2f_(mid_x);;
1713             tf.a = 1;
1714             tf.b = 0;
1715             tf.e =    tf.c*tf.d + tf.f
1716               - powf_(tf.a*tf.d + tf.b, tf.g);
1717 
1718 
1719             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1720                 !fit_nonlinear(curve, L,N, &tf_inv)) {
1721                 continue;
1722             }
1723 
1724             // We fit tf_inv, so calculate tf to keep in sync.
1725             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1726                 continue;
1727             }
1728         }
1729 
1730         // We find our error by roundtripping the table through tf_inv.
1731         //
1732         // (The most likely use case for this approximation is to be inverted and
1733         // used as the transfer function for a destination color space.)
1734         //
1735         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1736         // invertible, so re-verify that here (and use the new inverse for testing).
1737         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1738             continue;
1739         }
1740 
1741         float err = max_roundtrip_error(curve, &tf_inv);
1742         if (*max_error > err) {
1743             *max_error = err;
1744             *approx    = tf;
1745         }
1746     }
1747     return isfinitef_(*max_error);
1748 }
1749 
1750 // ~~~~ Impl. of skcms_Transform() ~~~~
1751 
1752 typedef enum {
1753     Op_load_a8,
1754     Op_load_g8,
1755     Op_load_8888_palette8,
1756     Op_load_4444,
1757     Op_load_565,
1758     Op_load_888,
1759     Op_load_8888,
1760     Op_load_1010102,
1761     Op_load_161616LE,
1762     Op_load_16161616LE,
1763     Op_load_161616BE,
1764     Op_load_16161616BE,
1765     Op_load_hhh,
1766     Op_load_hhhh,
1767     Op_load_fff,
1768     Op_load_ffff,
1769 
1770     Op_swap_rb,
1771     Op_clamp,
1772     Op_invert,
1773     Op_force_opaque,
1774     Op_premul,
1775     Op_unpremul,
1776     Op_matrix_3x3,
1777     Op_matrix_3x4,
1778     Op_lab_to_xyz,
1779 
1780     Op_tf_r,
1781     Op_tf_g,
1782     Op_tf_b,
1783     Op_tf_a,
1784 
1785     Op_table_r,
1786     Op_table_g,
1787     Op_table_b,
1788     Op_table_a,
1789 
1790     Op_clut,
1791 
1792     Op_store_a8,
1793     Op_store_g8,
1794     Op_store_4444,
1795     Op_store_565,
1796     Op_store_888,
1797     Op_store_8888,
1798     Op_store_1010102,
1799     Op_store_161616LE,
1800     Op_store_16161616LE,
1801     Op_store_161616BE,
1802     Op_store_16161616BE,
1803     Op_store_hhh,
1804     Op_store_hhhh,
1805     Op_store_fff,
1806     Op_store_ffff,
1807 } Op;
1808 
1809 // Without this wasm would try to use the N=4 128-bit vector code path,
1810 // which while ideal, causes tons of compiler problems.  This would be
1811 // a good thing to revisit as emcc matures (currently 1.38.5).
1812 #if 1 && defined(__EMSCRIPTEN_major__)
1813     #if !defined(SKCMS_PORTABLE)
1814         #define  SKCMS_PORTABLE
1815     #endif
1816 #endif
1817 
1818 #if defined(__clang__)
1819     template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
1820 #elif defined(__GNUC__)
1821     // For some reason GCC accepts this nonsense, but not the more straightforward version,
1822     //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
1823     template <int N, typename T>
1824     struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
1825 
1826     template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
1827 #endif
1828 
1829 // First, instantiate our default exec_ops() implementation using the default compiliation target.
1830 
1831 namespace baseline {
1832 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__))
1833     #define N 1
1834     using F   = float;
1835     using U64 = uint64_t;
1836     using U32 = uint32_t;
1837     using I32 = int32_t;
1838     using U16 = uint16_t;
1839     using U8  = uint8_t;
1840 
1841 #elif defined(__AVX512F__)
1842     #define N 16
1843     using   F = Vec<N,float>;
1844     using I32 = Vec<N,int32_t>;
1845     using U64 = Vec<N,uint64_t>;
1846     using U32 = Vec<N,uint32_t>;
1847     using U16 = Vec<N,uint16_t>;
1848     using  U8 = Vec<N,uint8_t>;
1849 #elif defined(__AVX__)
1850     #define N 8
1851     using   F = Vec<N,float>;
1852     using I32 = Vec<N,int32_t>;
1853     using U64 = Vec<N,uint64_t>;
1854     using U32 = Vec<N,uint32_t>;
1855     using U16 = Vec<N,uint16_t>;
1856     using  U8 = Vec<N,uint8_t>;
1857 #else
1858     #define N 4
1859     using   F = Vec<N,float>;
1860     using I32 = Vec<N,int32_t>;
1861     using U64 = Vec<N,uint64_t>;
1862     using U32 = Vec<N,uint32_t>;
1863     using U16 = Vec<N,uint16_t>;
1864     using  U8 = Vec<N,uint8_t>;
1865 #endif
1866 
1867     #include "src/Transform_inl.h"
1868     #undef N
1869 }
1870 
1871 // Now, instantiate any other versions of run_program() we may want for runtime detection.
1872 #if !defined(SKCMS_PORTABLE) &&                           \
1873         (( defined(__clang__) && __clang_major__ >= 5) || \
1874          (!defined(__clang__) && defined(__GNUC__)))      \
1875      && defined(__x86_64__) && !defined(__AVX2__)
1876 
1877     #if defined(__clang__)
1878         #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
1879     #elif defined(__GNUC__)
1880         #pragma GCC push_options
1881         #pragma GCC target("avx2,f16c")
1882     #endif
1883 
1884     namespace hsw {
1885         #define USING_AVX
1886         #define USING_AVX_F16C
1887         #define USING_AVX2
1888         #define N 8
1889         using   F = Vec<N,float>;
1890         using I32 = Vec<N,int32_t>;
1891         using U64 = Vec<N,uint64_t>;
1892         using U32 = Vec<N,uint32_t>;
1893         using U16 = Vec<N,uint16_t>;
1894         using  U8 = Vec<N,uint8_t>;
1895 
1896         #include "src/Transform_inl.h"
1897 
1898         // src/Transform_inl.h will undefine USING_* for us.
1899         #undef N
1900     }
1901 
1902     #if defined(__clang__)
1903         #pragma clang attribute pop
1904     #elif defined(__GNUC__)
1905         #pragma GCC pop_options
1906     #endif
1907 
1908     #define TEST_FOR_HSW
1909 
hsw_ok()1910     static bool hsw_ok() {
1911         static const bool ok = []{
1912             // See http://www.sandpile.org/x86/cpuid.htm
1913 
1914             // First, a basic cpuid(1).
1915             uint32_t eax, ebx, ecx, edx;
1916             __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1917                                          : "0"(1), "2"(0));
1918 
1919             // Sanity check for prerequisites.
1920             if ((edx & (1<<25)) != (1<<25)) { return false; }   // SSE
1921             if ((edx & (1<<26)) != (1<<26)) { return false; }   // SSE2
1922             if ((ecx & (1<< 0)) != (1<< 0)) { return false; }   // SSE3
1923             if ((ecx & (1<< 9)) != (1<< 9)) { return false; }   // SSSE3
1924             if ((ecx & (1<<19)) != (1<<19)) { return false; }   // SSE4.1
1925             if ((ecx & (1<<20)) != (1<<20)) { return false; }   // SSE4.2
1926 
1927             if ((ecx & (3<<26)) != (3<<26)) { return false; }   // XSAVE + OSXSAVE
1928 
1929             {
1930                 uint32_t eax_xgetbv, edx_xgetbv;
1931                 __asm__ __volatile__("xgetbv" : "=a"(eax_xgetbv), "=d"(edx_xgetbv) : "c"(0));
1932                 if ((eax_xgetbv & (3<<1)) != (3<<1)) { return false; }  // XMM+YMM state saved?
1933             }
1934 
1935             if ((ecx & (1<<28)) != (1<<28)) { return false; }   // AVX
1936             if ((ecx & (1<<29)) != (1<<29)) { return false; }   // F16C
1937             if ((ecx & (1<<12)) != (1<<12)) { return false; }   // FMA  (TODO: not currently used)
1938 
1939             // Call cpuid(7) to check for our final AVX2 feature bit!
1940             __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1941                                          : "0"(7), "2"(0));
1942             if ((ebx & (1<< 5)) != (1<< 5)) { return false; }   // AVX2
1943 
1944             return true;
1945         }();
1946 
1947         return ok;
1948     }
1949 
1950 #endif
1951 
is_identity_tf(const skcms_TransferFunction * tf)1952 static bool is_identity_tf(const skcms_TransferFunction* tf) {
1953     return tf->g == 1 && tf->a == 1
1954         && tf->b == 0 && tf->c == 0 && tf->d == 0 && tf->e == 0 && tf->f == 0;
1955 }
1956 
1957 typedef struct {
1958     Op          op;
1959     const void* arg;
1960 } OpAndArg;
1961 
select_curve_op(const skcms_Curve * curve,int channel)1962 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
1963     static const struct { Op parametric, table; } ops[] = {
1964         { Op_tf_r, Op_table_r },
1965         { Op_tf_g, Op_table_g },
1966         { Op_tf_b, Op_table_b },
1967         { Op_tf_a, Op_table_a },
1968     };
1969 
1970     const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
1971 
1972     if (curve->table_entries == 0) {
1973         return is_identity_tf(&curve->parametric)
1974             ? noop
1975             : OpAndArg{ ops[channel].parametric, &curve->parametric };
1976     }
1977 
1978     return OpAndArg{ ops[channel].table, curve };
1979 }
1980 
bytes_per_pixel(skcms_PixelFormat fmt)1981 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
1982     switch (fmt >> 1) {   // ignore rgb/bgr
1983         case skcms_PixelFormat_A_8                >> 1: return  1;
1984         case skcms_PixelFormat_G_8                >> 1: return  1;
1985         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
1986         case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
1987         case skcms_PixelFormat_RGB_565            >> 1: return  2;
1988         case skcms_PixelFormat_RGB_888            >> 1: return  3;
1989         case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
1990         case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
1991         case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
1992         case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
1993         case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
1994         case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
1995         case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
1996         case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
1997         case skcms_PixelFormat_RGB_fff            >> 1: return 12;
1998         case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
1999     }
2000     assert(false);
2001     return 0;
2002 }
2003 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2004 static bool prep_for_destination(const skcms_ICCProfile* profile,
2005                                  skcms_Matrix3x3* fromXYZD50,
2006                                  skcms_TransferFunction* invR,
2007                                  skcms_TransferFunction* invG,
2008                                  skcms_TransferFunction* invB) {
2009     // We only support destinations with parametric transfer functions
2010     // and with gamuts that can be transformed from XYZD50.
2011     return profile->has_trc
2012         && profile->has_toXYZD50
2013         && profile->trc[0].table_entries == 0
2014         && profile->trc[1].table_entries == 0
2015         && profile->trc[2].table_entries == 0
2016         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2017         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2018         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2019         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2020 }
2021 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2022 bool skcms_Transform(const void*             src,
2023                      skcms_PixelFormat       srcFmt,
2024                      skcms_AlphaFormat       srcAlpha,
2025                      const skcms_ICCProfile* srcProfile,
2026                      void*                   dst,
2027                      skcms_PixelFormat       dstFmt,
2028                      skcms_AlphaFormat       dstAlpha,
2029                      const skcms_ICCProfile* dstProfile,
2030                      size_t                  npixels) {
2031     return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2032                                       dst, dstFmt, dstAlpha, dstProfile,
2033                                       npixels, nullptr);
2034 }
2035 
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2036 bool skcms_TransformWithPalette(const void*             src,
2037                                 skcms_PixelFormat       srcFmt,
2038                                 skcms_AlphaFormat       srcAlpha,
2039                                 const skcms_ICCProfile* srcProfile,
2040                                 void*                   dst,
2041                                 skcms_PixelFormat       dstFmt,
2042                                 skcms_AlphaFormat       dstAlpha,
2043                                 const skcms_ICCProfile* dstProfile,
2044                                 size_t                  nz,
2045                                 const void*             palette) {
2046     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2047                  src_bpp = bytes_per_pixel(srcFmt);
2048     // Let's just refuse if the request is absurdly big.
2049     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2050         return false;
2051     }
2052     int n = (int)nz;
2053 
2054     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2055     if (!srcProfile) {
2056         srcProfile = skcms_sRGB_profile();
2057     }
2058     if (!dstProfile) {
2059         dstProfile = skcms_sRGB_profile();
2060     }
2061 
2062     // We can't transform in place unless the PixelFormats are the same size.
2063     if (dst == src && dst_bpp != src_bpp) {
2064         return false;
2065     }
2066     // TODO: more careful alias rejection (like, dst == src + 1)?
2067 
2068     if (needs_palette(srcFmt) && !palette) {
2069         return false;
2070     }
2071 
2072     Op          program  [32];
2073     const void* arguments[32];
2074 
2075     Op*          ops  = program;
2076     const void** args = arguments;
2077 
2078     skcms_TransferFunction inv_dst_tf_r, inv_dst_tf_g, inv_dst_tf_b;
2079     skcms_Matrix3x3        from_xyz;
2080 
2081     switch (srcFmt >> 1) {
2082         default: return false;
2083         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
2084         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
2085         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
2086         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
2087         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
2088         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
2089         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
2090         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
2091         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2092         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
2093         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2094         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
2095         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
2096         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
2097         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
2098 
2099         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
2100                                                         *args++ = palette;
2101                                                         break;
2102     }
2103     if (srcFmt & 1) {
2104         *ops++ = Op_swap_rb;
2105     }
2106     skcms_ICCProfile gray_dst_profile;
2107     if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2108         // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2109         // luminance (Y) by the destination transfer function.
2110         gray_dst_profile = *dstProfile;
2111         skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2112         dstProfile = &gray_dst_profile;
2113     }
2114 
2115     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2116         // Photoshop creates CMYK images as inverse CMYK.
2117         // These happen to be the only ones we've _ever_ seen.
2118         *ops++ = Op_invert;
2119         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2120         srcAlpha = skcms_AlphaFormat_Unpremul;
2121     }
2122 
2123     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2124         *ops++ = Op_force_opaque;
2125     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2126         *ops++ = Op_unpremul;
2127     }
2128 
2129     if (dstProfile != srcProfile) {
2130 
2131         if (!prep_for_destination(dstProfile,
2132                                   &from_xyz, &inv_dst_tf_r, &inv_dst_tf_b, &inv_dst_tf_g)) {
2133             return false;
2134         }
2135 
2136         if (srcProfile->has_A2B) {
2137             if (srcProfile->A2B.input_channels) {
2138                 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2139                     OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2140                     if (oa.arg) {
2141                         *ops++  = oa.op;
2142                         *args++ = oa.arg;
2143                     }
2144                 }
2145                 *ops++ = Op_clamp;
2146                 *ops++ = Op_clut;
2147                 *args++ = &srcProfile->A2B;
2148             }
2149 
2150             if (srcProfile->A2B.matrix_channels == 3) {
2151                 for (int i = 0; i < 3; i++) {
2152                     OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2153                     if (oa.arg) {
2154                         *ops++  = oa.op;
2155                         *args++ = oa.arg;
2156                     }
2157                 }
2158 
2159                 static const skcms_Matrix3x4 I = {{
2160                     {1,0,0,0},
2161                     {0,1,0,0},
2162                     {0,0,1,0},
2163                 }};
2164                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2165                     *ops++  = Op_matrix_3x4;
2166                     *args++ = &srcProfile->A2B.matrix;
2167                 }
2168             }
2169 
2170             if (srcProfile->A2B.output_channels == 3) {
2171                 for (int i = 0; i < 3; i++) {
2172                     OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2173                     if (oa.arg) {
2174                         *ops++  = oa.op;
2175                         *args++ = oa.arg;
2176                     }
2177                 }
2178             }
2179 
2180             if (srcProfile->pcs == skcms_Signature_Lab) {
2181                 *ops++ = Op_lab_to_xyz;
2182             }
2183 
2184         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2185             for (int i = 0; i < 3; i++) {
2186                 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2187                 if (oa.arg) {
2188                     *ops++  = oa.op;
2189                     *args++ = oa.arg;
2190                 }
2191             }
2192         } else {
2193             return false;
2194         }
2195 
2196         // A2B sources should already be in XYZD50 at this point.
2197         // Others still need to be transformed using their toXYZD50 matrix.
2198         // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2199         // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2200         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2201         static const skcms_Matrix3x3 I = {{
2202             { 1.0f, 0.0f, 0.0f },
2203             { 0.0f, 1.0f, 0.0f },
2204             { 0.0f, 0.0f, 1.0f },
2205         }};
2206         const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2207 
2208         // There's a chance the source and destination gamuts are identical,
2209         // in which case we can skip the gamut transform.
2210         if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2211             // Concat the entire gamut transform into from_xyz,
2212             // now slightly misnamed but it's a handy spot to stash the result.
2213             from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2214             *ops++  = Op_matrix_3x3;
2215             *args++ = &from_xyz;
2216         }
2217 
2218         // Encode back to dst RGB using its parametric transfer functions.
2219         if (!is_identity_tf(&inv_dst_tf_r)) { *ops++ = Op_tf_r; *args++ = &inv_dst_tf_r; }
2220         if (!is_identity_tf(&inv_dst_tf_g)) { *ops++ = Op_tf_g; *args++ = &inv_dst_tf_g; }
2221         if (!is_identity_tf(&inv_dst_tf_b)) { *ops++ = Op_tf_b; *args++ = &inv_dst_tf_b; }
2222     }
2223 
2224     // Clamp here before premul to make sure we're clamping to fixed-point values _and_ gamut,
2225     // not just to values that fit in the fixed point representation.
2226     //
2227     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2228     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2229     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2230         *ops++ = Op_clamp;
2231     }
2232     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2233         *ops++ = Op_force_opaque;
2234     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2235         *ops++ = Op_premul;
2236     }
2237     if (dstFmt & 1) {
2238         *ops++ = Op_swap_rb;
2239     }
2240     switch (dstFmt >> 1) {
2241         default: return false;
2242         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
2243         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
2244         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
2245         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
2246         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
2247         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
2248         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
2249         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
2250         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2251         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
2252         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2253         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
2254         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
2255         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
2256         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
2257     }
2258 
2259     auto run = baseline::run_program;
2260 #if defined(TEST_FOR_HSW)
2261     if (hsw_ok()) { run = hsw::run_program; }
2262 #endif
2263     run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2264     return true;
2265 }
2266 
assert_usable_as_destination(const skcms_ICCProfile * profile)2267 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2268 #if defined(NDEBUG)
2269     (void)profile;
2270 #else
2271     skcms_Matrix3x3 fromXYZD50;
2272     skcms_TransferFunction invR, invG, invB;
2273     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2274 #endif
2275 }
2276 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2277 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2278     skcms_Matrix3x3 fromXYZD50;
2279     if (!profile->has_trc || !profile->has_toXYZD50
2280         || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2281         return false;
2282     }
2283 
2284     skcms_TransferFunction tf[3];
2285     for (int i = 0; i < 3; i++) {
2286         skcms_TransferFunction inv;
2287         if (profile->trc[i].table_entries == 0
2288             && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2289             tf[i] = profile->trc[i].parametric;
2290             continue;
2291         }
2292 
2293         float max_error;
2294         // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2295         if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2296             return false;
2297         }
2298     }
2299 
2300     for (int i = 0; i < 3; ++i) {
2301         profile->trc[i].table_entries = 0;
2302         profile->trc[i].parametric = tf[i];
2303     }
2304 
2305     assert_usable_as_destination(profile);
2306     return true;
2307 }
2308 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2309 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2310     // Operate on a copy of profile, so we can choose the best TF for the original curves
2311     skcms_ICCProfile result = *profile;
2312     if (!skcms_MakeUsableAsDestination(&result)) {
2313         return false;
2314     }
2315 
2316     int best_tf = 0;
2317     float min_max_error = INFINITY_;
2318     for (int i = 0; i < 3; i++) {
2319         skcms_TransferFunction inv;
2320         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2321             return false;
2322         }
2323 
2324         float err = 0;
2325         for (int j = 0; j < 3; ++j) {
2326             err = fmaxf_(err, max_roundtrip_error(&profile->trc[j], &inv));
2327         }
2328         if (min_max_error > err) {
2329             min_max_error = err;
2330             best_tf = i;
2331         }
2332     }
2333 
2334     for (int i = 0; i < 3; i++) {
2335         result.trc[i].parametric = result.trc[best_tf].parametric;
2336     }
2337 
2338     *profile = result;
2339     assert_usable_as_destination(profile);
2340     return true;
2341 }
2342