• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #if defined(__ARM_NEON)
17     #include <arm_neon.h>
18 #elif defined(__SSE__)
19     #include <immintrin.h>
20 
21     #if defined(__clang__)
22         // That #include <immintrin.h> is usually enough, but Clang's headers
23         // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24         // defined, because lots of programs on Windows would include that and
25         // it'd be a lot slower.  But we want all those headers included so we
26         // can use their features after runtime checks later.
27         #include <smmintrin.h>
28         #include <avxintrin.h>
29         #include <avx2intrin.h>
30         #include <avx512fintrin.h>
31         #include <avx512dqintrin.h>
32     #endif
33 #endif
34 
35 static bool runtime_cpu_detection = true;
skcms_DisableRuntimeCPUDetection()36 void skcms_DisableRuntimeCPUDetection() {
37     runtime_cpu_detection = false;
38 }
39 
40 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42 //
43 // Please do not use sizeof() directly, and size_t only when required.
44 // (We have no way of enforcing these requests...)
45 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46 
47 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49 
50 static const union {
51     uint32_t bits;
52     float    f;
53 } inf_ = { 0x7f800000 };
54 #define INFINITY_ inf_.f
55 
56 #if defined(__clang__) || defined(__GNUC__)
57     #define small_memcpy __builtin_memcpy
58 #else
59     #define small_memcpy memcpy
60 #endif
61 
log2f_(float x)62 static float log2f_(float x) {
63     // The first approximation of log2(x) is its exponent 'e', minus 127.
64     int32_t bits;
65     small_memcpy(&bits, &x, sizeof(bits));
66 
67     float e = (float)bits * (1.0f / (1<<23));
68 
69     // If we use the mantissa too we can refine the error signficantly.
70     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71     float m;
72     small_memcpy(&m, &m_bits, sizeof(m));
73 
74     return (e - 124.225514990f
75               -   1.498030302f*m
76               -   1.725879990f/(0.3520887068f + m));
77 }
logf_(float x)78 static float logf_(float x) {
79     const float ln2 = 0.69314718f;
80     return ln2*log2f_(x);
81 }
82 
exp2f_(float x)83 static float exp2f_(float x) {
84     float fract = x - floorf_(x);
85 
86     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
87                                         -   1.490129070f*fract
88                                         +  27.728023300f/(4.84252568f - fract));
89 
90     // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
91     // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
92     // Negative values are effectively underflow - we'll end up returning a (different) negative
93     // value, which makes no sense. So clamp to zero.
94     if (fbits >= (float)INT_MAX) {
95         return INFINITY_;
96     } else if (fbits < 0) {
97         return 0;
98     }
99 
100     int32_t bits = (int32_t)fbits;
101     small_memcpy(&x, &bits, sizeof(x));
102     return x;
103 }
104 
105 // Not static, as it's used by some test tools.
powf_(float x,float y)106 float powf_(float x, float y) {
107     assert (x >= 0);
108     return (x == 0) || (x == 1) ? x
109                                 : exp2f_(log2f_(x) * y);
110 }
111 
expf_(float x)112 static float expf_(float x) {
113     const float log2_e = 1.4426950408889634074f;
114     return exp2f_(log2_e * x);
115 }
116 
fmaxf_(float x,float y)117 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)118 static float fminf_(float x, float y) { return x < y ? x : y; }
119 
isfinitef_(float x)120 static bool isfinitef_(float x) { return 0 == x*0; }
121 
minus_1_ulp(float x)122 static float minus_1_ulp(float x) {
123     int32_t bits;
124     memcpy(&bits, &x, sizeof(bits));
125     bits = bits - 1;
126     memcpy(&x, &bits, sizeof(bits));
127     return x;
128 }
129 
130 // Most transfer functions we work with are sRGBish.
131 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
132 // and repurpose the other fields to hold the parameters of the HDR functions.
133 struct TF_PQish  { float A,B,C,D,E,F; };
134 struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
135 // We didn't originally support a scale factor K for HLG, and instead just stored 0 in
136 // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
137 // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
138 
TFKind_marker(skcms_TFType kind)139 static float TFKind_marker(skcms_TFType kind) {
140     // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
141     return -(float)kind;
142 }
143 
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)144 static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
145                                                              , TF_HLGish* hlg = nullptr) {
146     if (tf.g < 0 && static_cast<float>(static_cast<int>(tf.g)) == tf.g) {
147         // TODO: soundness checks for PQ/HLG like we do for sRGBish?
148         switch ((int)tf.g) {
149             case -skcms_TFType_PQish:
150                 if (pq) {
151                     memcpy(pq , &tf.a, sizeof(*pq ));
152                 }
153                 return skcms_TFType_PQish;
154             case -skcms_TFType_HLGish:
155                 if (hlg) {
156                     memcpy(hlg, &tf.a, sizeof(*hlg));
157                 }
158                 return skcms_TFType_HLGish;
159             case -skcms_TFType_HLGinvish:
160                 if (hlg) {
161                     memcpy(hlg, &tf.a, sizeof(*hlg));
162                 }
163                 return skcms_TFType_HLGinvish;
164         }
165         return skcms_TFType_Invalid;
166     }
167 
168     // Basic soundness checks for sRGBish transfer functions.
169     if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
170             // a,c,d,g should be non-negative to make any sense.
171             && tf.a >= 0
172             && tf.c >= 0
173             && tf.d >= 0
174             && tf.g >= 0
175             // Raising a negative value to a fractional tf->g produces complex numbers.
176             && tf.a * tf.d + tf.b >= 0) {
177         return skcms_TFType_sRGBish;
178     }
179 
180     return skcms_TFType_Invalid;
181 }
182 
skcms_TransferFunction_getType(const skcms_TransferFunction * tf)183 skcms_TFType skcms_TransferFunction_getType(const skcms_TransferFunction* tf) {
184     return classify(*tf);
185 }
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)186 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
187     return classify(*tf) == skcms_TFType_sRGBish;
188 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)189 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
190     return classify(*tf) == skcms_TFType_PQish;
191 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)192 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
193     return classify(*tf) == skcms_TFType_HLGish;
194 }
195 
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)196 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
197                                       float A, float B, float C,
198                                       float D, float E, float F) {
199     *tf = { TFKind_marker(skcms_TFType_PQish), A,B,C,D,E,F };
200     assert(skcms_TransferFunction_isPQish(tf));
201     return true;
202 }
203 
skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction * tf,float K,float R,float G,float a,float b,float c)204 bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
205                                              float K, float R, float G,
206                                              float a, float b, float c) {
207     *tf = { TFKind_marker(skcms_TFType_HLGish), R,G, a,b,c, K-1.0f };
208     assert(skcms_TransferFunction_isHLGish(tf));
209     return true;
210 }
211 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)212 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
213     float sign = x < 0 ? -1.0f : 1.0f;
214     x *= sign;
215 
216     TF_PQish  pq;
217     TF_HLGish hlg;
218     switch (classify(*tf, &pq, &hlg)) {
219         case skcms_TFType_Invalid: break;
220 
221         case skcms_TFType_HLGish: {
222             const float K = hlg.K_minus_1 + 1.0f;
223             return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
224                                             : expf_((x-hlg.c)*hlg.a) + hlg.b);
225         }
226 
227         // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
228         case skcms_TFType_HLGinvish: {
229             const float K = hlg.K_minus_1 + 1.0f;
230             x /= K;
231             return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
232                                   : hlg.a * logf_(x - hlg.b) + hlg.c);
233         }
234 
235         case skcms_TFType_sRGBish:
236             return sign * (x < tf->d ?       tf->c * x + tf->f
237                                      : powf_(tf->a * x + tf->b, tf->g) + tf->e);
238 
239         case skcms_TFType_PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
240                                                          / (pq.D + pq.E * powf_(x, pq.C)),
241                                                      pq.F);
242     }
243     return 0;
244 }
245 
246 
eval_curve(const skcms_Curve * curve,float x)247 static float eval_curve(const skcms_Curve* curve, float x) {
248     if (curve->table_entries == 0) {
249         return skcms_TransferFunction_eval(&curve->parametric, x);
250     }
251 
252     float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1);
253     int   lo = (int)                   ix        ,
254           hi = (int)(float)minus_1_ulp(ix + 1.0f);
255     float t = ix - (float)lo;
256 
257     float l, h;
258     if (curve->table_8) {
259         l = curve->table_8[lo] * (1/255.0f);
260         h = curve->table_8[hi] * (1/255.0f);
261     } else {
262         uint16_t be_l, be_h;
263         memcpy(&be_l, curve->table_16 + 2*lo, 2);
264         memcpy(&be_h, curve->table_16 + 2*hi, 2);
265         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
266         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
267         l = le_l * (1/65535.0f);
268         h = le_h * (1/65535.0f);
269     }
270     return l + (h-l)*t;
271 }
272 
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)273 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
274     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
275     const float dx = 1.0f / static_cast<float>(N - 1);
276     float err = 0;
277     for (uint32_t i = 0; i < N; i++) {
278         float x = static_cast<float>(i) * dx,
279               y = eval_curve(curve, x);
280         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
281     }
282     return err;
283 }
284 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)285 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
286     return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
287 }
288 
289 // Additional ICC signature values that are only used internally
290 enum {
291     // File signature
292     skcms_Signature_acsp = 0x61637370,
293 
294     // Tag signatures
295     skcms_Signature_rTRC = 0x72545243,
296     skcms_Signature_gTRC = 0x67545243,
297     skcms_Signature_bTRC = 0x62545243,
298     skcms_Signature_kTRC = 0x6B545243,
299 
300     skcms_Signature_rXYZ = 0x7258595A,
301     skcms_Signature_gXYZ = 0x6758595A,
302     skcms_Signature_bXYZ = 0x6258595A,
303 
304     skcms_Signature_A2B0 = 0x41324230,
305     skcms_Signature_B2A0 = 0x42324130,
306 
307     skcms_Signature_CHAD = 0x63686164,
308     skcms_Signature_WTPT = 0x77747074,
309 
310     skcms_Signature_CICP = 0x63696370,
311 
312     // Type signatures
313     skcms_Signature_curv = 0x63757276,
314     skcms_Signature_mft1 = 0x6D667431,
315     skcms_Signature_mft2 = 0x6D667432,
316     skcms_Signature_mAB  = 0x6D414220,
317     skcms_Signature_mBA  = 0x6D424120,
318     skcms_Signature_para = 0x70617261,
319     skcms_Signature_sf32 = 0x73663332,
320     // XYZ is also a PCS signature, so it's defined in skcms.h
321     // skcms_Signature_XYZ = 0x58595A20,
322 };
323 
read_big_u16(const uint8_t * ptr)324 static uint16_t read_big_u16(const uint8_t* ptr) {
325     uint16_t be;
326     memcpy(&be, ptr, sizeof(be));
327 #if defined(_MSC_VER)
328     return _byteswap_ushort(be);
329 #else
330     return __builtin_bswap16(be);
331 #endif
332 }
333 
read_big_u32(const uint8_t * ptr)334 static uint32_t read_big_u32(const uint8_t* ptr) {
335     uint32_t be;
336     memcpy(&be, ptr, sizeof(be));
337 #if defined(_MSC_VER)
338     return _byteswap_ulong(be);
339 #else
340     return __builtin_bswap32(be);
341 #endif
342 }
343 
read_big_i32(const uint8_t * ptr)344 static int32_t read_big_i32(const uint8_t* ptr) {
345     return (int32_t)read_big_u32(ptr);
346 }
347 
read_big_fixed(const uint8_t * ptr)348 static float read_big_fixed(const uint8_t* ptr) {
349     return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
350 }
351 
352 // Maps to an in-memory profile so that fields line up to the locations specified
353 // in ICC.1:2010, section 7.2
354 typedef struct {
355     uint8_t size                [ 4];
356     uint8_t cmm_type            [ 4];
357     uint8_t version             [ 4];
358     uint8_t profile_class       [ 4];
359     uint8_t data_color_space    [ 4];
360     uint8_t pcs                 [ 4];
361     uint8_t creation_date_time  [12];
362     uint8_t signature           [ 4];
363     uint8_t platform            [ 4];
364     uint8_t flags               [ 4];
365     uint8_t device_manufacturer [ 4];
366     uint8_t device_model        [ 4];
367     uint8_t device_attributes   [ 8];
368     uint8_t rendering_intent    [ 4];
369     uint8_t illuminant_X        [ 4];
370     uint8_t illuminant_Y        [ 4];
371     uint8_t illuminant_Z        [ 4];
372     uint8_t creator             [ 4];
373     uint8_t profile_id          [16];
374     uint8_t reserved            [28];
375     uint8_t tag_count           [ 4]; // Technically not part of header, but required
376 } header_Layout;
377 
378 typedef struct {
379     uint8_t signature [4];
380     uint8_t offset    [4];
381     uint8_t size      [4];
382 } tag_Layout;
383 
get_tag_table(const skcms_ICCProfile * profile)384 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
385     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
386 }
387 
388 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
389 // use of the type is for the CHAD tag that stores exactly nine values.
390 typedef struct {
391     uint8_t type     [ 4];
392     uint8_t reserved [ 4];
393     uint8_t values   [36];
394 } sf32_Layout;
395 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)396 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
397     skcms_ICCTag tag;
398     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
399         return false;
400     }
401 
402     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
403         return false;
404     }
405 
406     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
407     const uint8_t* values = sf32Tag->values;
408     for (int r = 0; r < 3; ++r)
409     for (int c = 0; c < 3; ++c, values += 4) {
410         m->vals[r][c] = read_big_fixed(values);
411     }
412     return true;
413 }
414 
415 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
416 // the type are for tags/data that store exactly one triple.
417 typedef struct {
418     uint8_t type     [4];
419     uint8_t reserved [4];
420     uint8_t X        [4];
421     uint8_t Y        [4];
422     uint8_t Z        [4];
423 } XYZ_Layout;
424 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)425 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
426     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
427         return false;
428     }
429 
430     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
431 
432     *x = read_big_fixed(xyzTag->X);
433     *y = read_big_fixed(xyzTag->Y);
434     *z = read_big_fixed(xyzTag->Z);
435     return true;
436 }
437 
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])438 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
439     skcms_ICCTag tag;
440     return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
441            read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
442 }
443 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)444 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
445                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
446     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
447            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
448            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
449 }
450 
451 typedef struct {
452     uint8_t type          [4];
453     uint8_t reserved_a    [4];
454     uint8_t function_type [2];
455     uint8_t reserved_b    [2];
456     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
457 } para_Layout;
458 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)459 static bool read_curve_para(const uint8_t* buf, uint32_t size,
460                             skcms_Curve* curve, uint32_t* curve_size) {
461     if (size < SAFE_FIXED_SIZE(para_Layout)) {
462         return false;
463     }
464 
465     const para_Layout* paraTag = (const para_Layout*)buf;
466 
467     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
468     uint16_t function_type = read_big_u16(paraTag->function_type);
469     if (function_type > kGABCDEF) {
470         return false;
471     }
472 
473     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
474     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
475         return false;
476     }
477 
478     if (curve_size) {
479         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
480     }
481 
482     curve->table_entries = 0;
483     curve->parametric.a  = 1.0f;
484     curve->parametric.b  = 0.0f;
485     curve->parametric.c  = 0.0f;
486     curve->parametric.d  = 0.0f;
487     curve->parametric.e  = 0.0f;
488     curve->parametric.f  = 0.0f;
489     curve->parametric.g  = read_big_fixed(paraTag->variable);
490 
491     switch (function_type) {
492         case kGAB:
493             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
494             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
495             if (curve->parametric.a == 0) {
496                 return false;
497             }
498             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
499             break;
500         case kGABC:
501             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
502             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
503             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
504             if (curve->parametric.a == 0) {
505                 return false;
506             }
507             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
508             curve->parametric.f = curve->parametric.e;
509             break;
510         case kGABCD:
511             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
512             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
513             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
514             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
515             break;
516         case kGABCDEF:
517             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
518             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
519             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
520             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
521             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
522             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
523             break;
524     }
525     return skcms_TransferFunction_isSRGBish(&curve->parametric);
526 }
527 
528 typedef struct {
529     uint8_t type          [4];
530     uint8_t reserved      [4];
531     uint8_t value_count   [4];
532     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
533 } curv_Layout;
534 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)535 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
536                             skcms_Curve* curve, uint32_t* curve_size) {
537     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
538         return false;
539     }
540 
541     const curv_Layout* curvTag = (const curv_Layout*)buf;
542 
543     uint32_t value_count = read_big_u32(curvTag->value_count);
544     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
545         return false;
546     }
547 
548     if (curve_size) {
549         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
550     }
551 
552     if (value_count < 2) {
553         curve->table_entries = 0;
554         curve->parametric.a  = 1.0f;
555         curve->parametric.b  = 0.0f;
556         curve->parametric.c  = 0.0f;
557         curve->parametric.d  = 0.0f;
558         curve->parametric.e  = 0.0f;
559         curve->parametric.f  = 0.0f;
560         if (value_count == 0) {
561             // Empty tables are a shorthand for an identity curve
562             curve->parametric.g = 1.0f;
563         } else {
564             // Single entry tables are a shorthand for simple gamma
565             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
566         }
567     } else {
568         curve->table_8       = nullptr;
569         curve->table_16      = curvTag->variable;
570         curve->table_entries = value_count;
571     }
572 
573     return true;
574 }
575 
576 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
577 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)578 static bool read_curve(const uint8_t* buf, uint32_t size,
579                        skcms_Curve* curve, uint32_t* curve_size) {
580     if (!buf || size < 4 || !curve) {
581         return false;
582     }
583 
584     uint32_t type = read_big_u32(buf);
585     if (type == skcms_Signature_para) {
586         return read_curve_para(buf, size, curve, curve_size);
587     } else if (type == skcms_Signature_curv) {
588         return read_curve_curv(buf, size, curve, curve_size);
589     }
590 
591     return false;
592 }
593 
594 // mft1 and mft2 share a large chunk of data
595 typedef struct {
596     uint8_t type                 [ 4];
597     uint8_t reserved_a           [ 4];
598     uint8_t input_channels       [ 1];
599     uint8_t output_channels      [ 1];
600     uint8_t grid_points          [ 1];
601     uint8_t reserved_b           [ 1];
602     uint8_t matrix               [36];
603 } mft_CommonLayout;
604 
605 typedef struct {
606     mft_CommonLayout common      [1];
607 
608     uint8_t variable             [1/*variable*/];
609 } mft1_Layout;
610 
611 typedef struct {
612     mft_CommonLayout common      [1];
613 
614     uint8_t input_table_entries  [2];
615     uint8_t output_table_entries [2];
616     uint8_t variable             [1/*variable*/];
617 } mft2_Layout;
618 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)619 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
620     // MFT matrices are applied before the first set of curves, but must be identity unless the
621     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
622     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
623     // field/flag.
624     a2b->matrix_channels = 0;
625     a2b-> input_channels = mftTag-> input_channels[0];
626     a2b->output_channels = mftTag->output_channels[0];
627 
628     // We require exactly three (ie XYZ/Lab/RGB) output channels
629     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
630         return false;
631     }
632     // We require at least one, and no more than four (ie CMYK) input channels
633     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
634         return false;
635     }
636 
637     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
638         a2b->grid_points[i] = mftTag->grid_points[0];
639     }
640     // The grid only makes sense with at least two points along each axis
641     if (a2b->grid_points[0] < 2) {
642         return false;
643     }
644     return true;
645 }
646 
647 // All as the A2B version above, except where noted.
read_mft_common(const mft_CommonLayout * mftTag,skcms_B2A * b2a)648 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
649     // Same as A2B.
650     b2a->matrix_channels = 0;
651     b2a-> input_channels = mftTag-> input_channels[0];
652     b2a->output_channels = mftTag->output_channels[0];
653 
654 
655     // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
656     if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
657         return false;
658     }
659     if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
660         return false;
661     }
662 
663     // Same as A2B.
664     for (uint32_t i = 0; i < b2a->input_channels; ++i) {
665         b2a->grid_points[i] = mftTag->grid_points[0];
666     }
667     if (b2a->grid_points[0] < 2) {
668         return false;
669     }
670     return true;
671 }
672 
673 template <typename A2B_or_B2A>
init_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,A2B_or_B2A * out)674 static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
675                         uint32_t input_table_entries, uint32_t output_table_entries,
676                         A2B_or_B2A* out) {
677     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
678     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
679     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
680 
681     // [input|output]_channels are <= 4, so still no overflow
682     uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
683     uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
684 
685     uint64_t grid_size = out->output_channels * byte_width;
686     for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
687         grid_size *= out->grid_points[axis];
688     }
689 
690     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
691         return false;
692     }
693 
694     for (uint32_t i = 0; i < out->input_channels; ++i) {
695         out->input_curves[i].table_entries = input_table_entries;
696         if (byte_width == 1) {
697             out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
698             out->input_curves[i].table_16 = nullptr;
699         } else {
700             out->input_curves[i].table_8  = nullptr;
701             out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
702         }
703     }
704 
705     if (byte_width == 1) {
706         out->grid_8  = table_base + byte_len_all_input_tables;
707         out->grid_16 = nullptr;
708     } else {
709         out->grid_8  = nullptr;
710         out->grid_16 = table_base + byte_len_all_input_tables;
711     }
712 
713     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
714     for (uint32_t i = 0; i < out->output_channels; ++i) {
715         out->output_curves[i].table_entries = output_table_entries;
716         if (byte_width == 1) {
717             out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
718             out->output_curves[i].table_16 = nullptr;
719         } else {
720             out->output_curves[i].table_8  = nullptr;
721             out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
722         }
723     }
724 
725     return true;
726 }
727 
728 template <typename A2B_or_B2A>
read_tag_mft1(const skcms_ICCTag * tag,A2B_or_B2A * out)729 static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
730     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
731         return false;
732     }
733 
734     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
735     if (!read_mft_common(mftTag->common, out)) {
736         return false;
737     }
738 
739     uint32_t input_table_entries  = 256;
740     uint32_t output_table_entries = 256;
741 
742     return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
743                        input_table_entries, output_table_entries, out);
744 }
745 
746 template <typename A2B_or_B2A>
read_tag_mft2(const skcms_ICCTag * tag,A2B_or_B2A * out)747 static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
748     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
749         return false;
750     }
751 
752     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
753     if (!read_mft_common(mftTag->common, out)) {
754         return false;
755     }
756 
757     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
758     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
759 
760     // ICC spec mandates that 2 <= table_entries <= 4096
761     if (input_table_entries < 2 || input_table_entries > 4096 ||
762         output_table_entries < 2 || output_table_entries > 4096) {
763         return false;
764     }
765 
766     return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
767                        input_table_entries, output_table_entries, out);
768 }
769 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)770 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
771                         uint32_t num_curves, skcms_Curve* curves) {
772     for (uint32_t i = 0; i < num_curves; ++i) {
773         if (curve_offset > size) {
774             return false;
775         }
776 
777         uint32_t curve_bytes;
778         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
779             return false;
780         }
781 
782         if (curve_bytes > UINT32_MAX - 3) {
783             return false;
784         }
785         curve_bytes = (curve_bytes + 3) & ~3U;
786 
787         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
788         curve_offset = (uint32_t)new_offset_64;
789         if (new_offset_64 != curve_offset) {
790             return false;
791         }
792     }
793 
794     return true;
795 }
796 
797 // mAB and mBA tags use the same encoding, including color lookup tables.
798 typedef struct {
799     uint8_t type                 [ 4];
800     uint8_t reserved_a           [ 4];
801     uint8_t input_channels       [ 1];
802     uint8_t output_channels      [ 1];
803     uint8_t reserved_b           [ 2];
804     uint8_t b_curve_offset       [ 4];
805     uint8_t matrix_offset        [ 4];
806     uint8_t m_curve_offset       [ 4];
807     uint8_t clut_offset          [ 4];
808     uint8_t a_curve_offset       [ 4];
809 } mAB_or_mBA_Layout;
810 
811 typedef struct {
812     uint8_t grid_points          [16];
813     uint8_t grid_byte_width      [ 1];
814     uint8_t reserved             [ 3];
815     uint8_t variable             [1/*variable*/];
816 } CLUT_Layout;
817 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)818 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
819     if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
820         return false;
821     }
822 
823     const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
824 
825     a2b->input_channels  = mABTag->input_channels[0];
826     a2b->output_channels = mABTag->output_channels[0];
827 
828     // We require exactly three (ie XYZ/Lab/RGB) output channels
829     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
830         return false;
831     }
832     // We require no more than four (ie CMYK) input channels
833     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
834         return false;
835     }
836 
837     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
838     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
839     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
840     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
841     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
842 
843     // "B" curves must be present
844     if (0 == b_curve_offset) {
845         return false;
846     }
847 
848     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
849                      a2b->output_curves)) {
850         return false;
851     }
852 
853     // "M" curves and Matrix must be used together
854     if (0 != m_curve_offset) {
855         if (0 == matrix_offset) {
856             return false;
857         }
858         a2b->matrix_channels = a2b->output_channels;
859         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
860                          a2b->matrix_curves)) {
861             return false;
862         }
863 
864         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
865         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
866             return false;
867         }
868         float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
869         const uint8_t* mtx_buf = tag->buf + matrix_offset;
870         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
871         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
872         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
873         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
874         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
875         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
876         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
877         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
878         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
879         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
880         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
881         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
882     } else {
883         if (0 != matrix_offset) {
884             return false;
885         }
886         a2b->matrix_channels = 0;
887     }
888 
889     // "A" curves and CLUT must be used together
890     if (0 != a_curve_offset) {
891         if (0 == clut_offset) {
892             return false;
893         }
894         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
895                          a2b->input_curves)) {
896             return false;
897         }
898 
899         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
900             return false;
901         }
902         const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
903 
904         if (clut->grid_byte_width[0] == 1) {
905             a2b->grid_8  = clut->variable;
906             a2b->grid_16 = nullptr;
907         } else if (clut->grid_byte_width[0] == 2) {
908             a2b->grid_8  = nullptr;
909             a2b->grid_16 = clut->variable;
910         } else {
911             return false;
912         }
913 
914         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];  // the payload
915         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
916             a2b->grid_points[i] = clut->grid_points[i];
917             // The grid only makes sense with at least two points along each axis
918             if (a2b->grid_points[i] < 2) {
919                 return false;
920             }
921             grid_size *= a2b->grid_points[i];
922         }
923         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
924             return false;
925         }
926     } else {
927         if (0 != clut_offset) {
928             return false;
929         }
930 
931         // If there is no CLUT, the number of input and output channels must match
932         if (a2b->input_channels != a2b->output_channels) {
933             return false;
934         }
935 
936         // Zero out the number of input channels to signal that we're skipping this stage
937         a2b->input_channels = 0;
938     }
939 
940     return true;
941 }
942 
943 // Exactly the same as read_tag_mab(), except where there are comments.
944 // TODO: refactor the two to eliminate common code?
read_tag_mba(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)945 static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
946     if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
947         return false;
948     }
949 
950     const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
951 
952     b2a->input_channels  = mBATag->input_channels[0];
953     b2a->output_channels = mBATag->output_channels[0];
954 
955     // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
956     if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
957         return false;
958     }
959     if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
960         return false;
961     }
962 
963     uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
964     uint32_t matrix_offset  = read_big_u32(mBATag->matrix_offset);
965     uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
966     uint32_t clut_offset    = read_big_u32(mBATag->clut_offset);
967     uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
968 
969     if (0 == b_curve_offset) {
970         return false;
971     }
972 
973     // "B" curves are our inputs, not outputs.
974     if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
975                      b2a->input_curves)) {
976         return false;
977     }
978 
979     if (0 != m_curve_offset) {
980         if (0 == matrix_offset) {
981             return false;
982         }
983         // Matrix channels is tied to input_channels (3), not output_channels.
984         b2a->matrix_channels = b2a->input_channels;
985 
986         if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
987                          b2a->matrix_curves)) {
988             return false;
989         }
990 
991         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
992             return false;
993         }
994         float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f;  // TODO: understand
995         const uint8_t* mtx_buf = tag->buf + matrix_offset;
996         b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
997         b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
998         b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
999         b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
1000         b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
1001         b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
1002         b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
1003         b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
1004         b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
1005         b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
1006         b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
1007         b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
1008     } else {
1009         if (0 != matrix_offset) {
1010             return false;
1011         }
1012         b2a->matrix_channels = 0;
1013     }
1014 
1015     if (0 != a_curve_offset) {
1016         if (0 == clut_offset) {
1017             return false;
1018         }
1019 
1020         // "A" curves are our output, not input.
1021         if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1022                          b2a->output_curves)) {
1023             return false;
1024         }
1025 
1026         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1027             return false;
1028         }
1029         const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1030 
1031         if (clut->grid_byte_width[0] == 1) {
1032             b2a->grid_8  = clut->variable;
1033             b2a->grid_16 = nullptr;
1034         } else if (clut->grid_byte_width[0] == 2) {
1035             b2a->grid_8  = nullptr;
1036             b2a->grid_16 = clut->variable;
1037         } else {
1038             return false;
1039         }
1040 
1041         uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1042         for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1043             b2a->grid_points[i] = clut->grid_points[i];
1044             if (b2a->grid_points[i] < 2) {
1045                 return false;
1046             }
1047             grid_size *= b2a->grid_points[i];
1048         }
1049         if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1050             return false;
1051         }
1052     } else {
1053         if (0 != clut_offset) {
1054             return false;
1055         }
1056 
1057         if (b2a->input_channels != b2a->output_channels) {
1058             return false;
1059         }
1060 
1061         // Zero out *output* channels to skip this stage.
1062         b2a->output_channels = 0;
1063     }
1064     return true;
1065 }
1066 
1067 // If you pass f, we'll fit a possibly-non-zero value for *f.
1068 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)1069 static int fit_linear(const skcms_Curve* curve, int N, float tol,
1070                       float* c, float* d, float* f = nullptr) {
1071     assert(N > 1);
1072     // We iteratively fit the first points to the TF's linear piece.
1073     // We want the cx + f line to pass through the first and last points we fit exactly.
1074     //
1075     // As we walk along the points we find the minimum and maximum slope of the line before the
1076     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
1077     // emtpy, when we definitely can't add any more points.
1078     //
1079     // Some points' error intervals may intersect the running interval but not lie fully
1080     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
1081     // and once the search is done, back up to build the line through *that* point.
1082     const float dx = 1.0f / static_cast<float>(N - 1);
1083 
1084     int lin_points = 1;
1085 
1086     float f_zero = 0.0f;
1087     if (f) {
1088         *f = eval_curve(curve, 0);
1089     } else {
1090         f = &f_zero;
1091     }
1092 
1093 
1094     float slope_min = -INFINITY_;
1095     float slope_max = +INFINITY_;
1096     for (int i = 1; i < N; ++i) {
1097         float x = static_cast<float>(i) * dx;
1098         float y = eval_curve(curve, x);
1099 
1100         float slope_max_i = (y + tol - *f) / x,
1101               slope_min_i = (y - tol - *f) / x;
1102         if (slope_max_i < slope_min || slope_max < slope_min_i) {
1103             // Slope intervals would no longer overlap.
1104             break;
1105         }
1106         slope_max = fminf_(slope_max, slope_max_i);
1107         slope_min = fmaxf_(slope_min, slope_min_i);
1108 
1109         float cur_slope = (y - *f) / x;
1110         if (slope_min <= cur_slope && cur_slope <= slope_max) {
1111             lin_points = i + 1;
1112             *c = cur_slope;
1113         }
1114     }
1115 
1116     // Set D to the last point that met our tolerance.
1117     *d = static_cast<float>(lin_points - 1) * dx;
1118     return lin_points;
1119 }
1120 
1121 // If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
canonicalize_identity(skcms_Curve * curve)1122 static void canonicalize_identity(skcms_Curve* curve) {
1123     if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1124         int N = (int)curve->table_entries;
1125 
1126         float c = 0.0f, d = 0.0f, f = 0.0f;
1127         if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
1128             && c == 1.0f
1129             && f == 0.0f) {
1130             curve->table_entries = 0;
1131             curve->table_8       = nullptr;
1132             curve->table_16      = nullptr;
1133             curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
1134         }
1135     }
1136 }
1137 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)1138 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1139     bool ok = false;
1140     if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1141     if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1142     if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1143     if (!ok) {
1144         return false;
1145     }
1146 
1147     if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1148     if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1149     if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1150     if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1151 
1152     if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1153     if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1154     if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1155 
1156     if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1157     if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1158     if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1159 
1160     return true;
1161 }
1162 
read_b2a(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1163 static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1164     bool ok = false;
1165     if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1166     if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1167     if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1168     if (!ok) {
1169         return false;
1170     }
1171 
1172     if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1173     if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1174     if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1175 
1176     if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1177     if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1178     if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1179 
1180     if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1181     if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1182     if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1183     if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1184 
1185     return true;
1186 }
1187 
1188 typedef struct {
1189     uint8_t type                     [4];
1190     uint8_t reserved                 [4];
1191     uint8_t color_primaries          [1];
1192     uint8_t transfer_characteristics [1];
1193     uint8_t matrix_coefficients      [1];
1194     uint8_t video_full_range_flag    [1];
1195 } CICP_Layout;
1196 
read_cicp(const skcms_ICCTag * tag,skcms_CICP * cicp)1197 static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1198     if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
1199         return false;
1200     }
1201 
1202     const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
1203 
1204     cicp->color_primaries          = cicpTag->color_primaries[0];
1205     cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1206     cicp->matrix_coefficients      = cicpTag->matrix_coefficients[0];
1207     cicp->video_full_range_flag    = cicpTag->video_full_range_flag[0];
1208     return true;
1209 }
1210 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)1211 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1212     if (!profile || !profile->buffer || !tag) { return; }
1213     if (idx > profile->tag_count) { return; }
1214     const tag_Layout* tags = get_tag_table(profile);
1215     tag->signature = read_big_u32(tags[idx].signature);
1216     tag->size      = read_big_u32(tags[idx].size);
1217     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
1218     tag->type      = read_big_u32(tag->buf);
1219 }
1220 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1221 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1222     if (!profile || !profile->buffer || !tag) { return false; }
1223     const tag_Layout* tags = get_tag_table(profile);
1224     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1225         if (read_big_u32(tags[i].signature) == sig) {
1226             tag->signature = sig;
1227             tag->size      = read_big_u32(tags[i].size);
1228             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
1229             tag->type      = read_big_u32(tag->buf);
1230             return true;
1231         }
1232     }
1233     return false;
1234 }
1235 
usable_as_src(const skcms_ICCProfile * profile)1236 static bool usable_as_src(const skcms_ICCProfile* profile) {
1237     return profile->has_A2B
1238        || (profile->has_trc && profile->has_toXYZD50);
1239 }
1240 
skcms_ParseWithA2BPriority(const void * buf,size_t len,const int priority[],const int priorities,skcms_ICCProfile * profile)1241 bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1242                                 const int priority[], const int priorities,
1243                                 skcms_ICCProfile* profile) {
1244     assert(SAFE_SIZEOF(header_Layout) == 132);
1245 
1246     if (!profile) {
1247         return false;
1248     }
1249     memset(profile, 0, SAFE_SIZEOF(*profile));
1250 
1251     if (len < SAFE_SIZEOF(header_Layout)) {
1252         return false;
1253     }
1254 
1255     // Byte-swap all header fields
1256     const header_Layout* header  = (const header_Layout*)buf;
1257     profile->buffer              = (const uint8_t*)buf;
1258     profile->size                = read_big_u32(header->size);
1259     uint32_t version             = read_big_u32(header->version);
1260     profile->data_color_space    = read_big_u32(header->data_color_space);
1261     profile->pcs                 = read_big_u32(header->pcs);
1262     uint32_t signature           = read_big_u32(header->signature);
1263     float illuminant_X           = read_big_fixed(header->illuminant_X);
1264     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1265     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1266     profile->tag_count           = read_big_u32(header->tag_count);
1267 
1268     // Validate signature, size (smaller than buffer, large enough to hold tag table),
1269     // and major version
1270     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1271     if (signature != skcms_Signature_acsp ||
1272         profile->size > len ||
1273         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1274         (version >> 24) > 4) {
1275         return false;
1276     }
1277 
1278     // Validate that illuminant is D50 white
1279     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1280         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1281         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1282         return false;
1283     }
1284 
1285     // Validate that all tag entries have sane offset + size
1286     const tag_Layout* tags = get_tag_table(profile);
1287     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1288         uint32_t tag_offset = read_big_u32(tags[i].offset);
1289         uint32_t tag_size   = read_big_u32(tags[i].size);
1290         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1291         if (tag_size < 4 || tag_end > profile->size) {
1292             return false;
1293         }
1294     }
1295 
1296     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1297         return false;
1298     }
1299 
1300     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1301 
1302     // Pre-parse commonly used tags.
1303     skcms_ICCTag kTRC;
1304     if (profile->data_color_space == skcms_Signature_Gray &&
1305         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1306         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1307             // Malformed tag
1308             return false;
1309         }
1310         profile->trc[1] = profile->trc[0];
1311         profile->trc[2] = profile->trc[0];
1312         profile->has_trc = true;
1313 
1314         if (pcs_is_xyz) {
1315             profile->toXYZD50.vals[0][0] = illuminant_X;
1316             profile->toXYZD50.vals[1][1] = illuminant_Y;
1317             profile->toXYZD50.vals[2][2] = illuminant_Z;
1318             profile->has_toXYZD50 = true;
1319         }
1320     } else {
1321         skcms_ICCTag rTRC, gTRC, bTRC;
1322         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1323             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1324             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1325             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1326                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1327                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1328                 // Malformed TRC tags
1329                 return false;
1330             }
1331             profile->has_trc = true;
1332         }
1333 
1334         skcms_ICCTag rXYZ, gXYZ, bXYZ;
1335         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1336             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1337             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1338             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1339                 // Malformed XYZ tags
1340                 return false;
1341             }
1342             profile->has_toXYZD50 = true;
1343         }
1344     }
1345 
1346     for (int i = 0; i < priorities; i++) {
1347         // enum { perceptual, relative_colormetric, saturation }
1348         if (priority[i] < 0 || priority[i] > 2) {
1349             return false;
1350         }
1351         uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1352         skcms_ICCTag tag;
1353         if (skcms_GetTagBySignature(profile, sig, &tag)) {
1354             if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1355                 // Malformed A2B tag
1356                 return false;
1357             }
1358             profile->has_A2B = true;
1359             break;
1360         }
1361     }
1362 
1363     for (int i = 0; i < priorities; i++) {
1364         // enum { perceptual, relative_colormetric, saturation }
1365         if (priority[i] < 0 || priority[i] > 2) {
1366             return false;
1367         }
1368         uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1369         skcms_ICCTag tag;
1370         if (skcms_GetTagBySignature(profile, sig, &tag)) {
1371             if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1372                 // Malformed B2A tag
1373                 return false;
1374             }
1375             profile->has_B2A = true;
1376             break;
1377         }
1378     }
1379 
1380     skcms_ICCTag cicp_tag;
1381     if (skcms_GetTagBySignature(profile, skcms_Signature_CICP, &cicp_tag)) {
1382         if (!read_cicp(&cicp_tag, &profile->CICP)) {
1383             // Malformed CICP tag
1384             return false;
1385         }
1386         profile->has_CICP = true;
1387     }
1388 
1389     return usable_as_src(profile);
1390 }
1391 
1392 
skcms_sRGB_profile()1393 const skcms_ICCProfile* skcms_sRGB_profile() {
1394     static const skcms_ICCProfile sRGB_profile = {
1395         nullptr,               // buffer, moot here
1396 
1397         0,                     // size, moot here
1398         skcms_Signature_RGB,   // data_color_space
1399         skcms_Signature_XYZ,   // pcs
1400         0,                     // tag count, moot here
1401 
1402         // We choose to represent sRGB with its canonical transfer function,
1403         // and with its canonical XYZD50 gamut matrix.
1404         true,  // has_trc, followed by the 3 trc curves
1405         {
1406             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1407             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1408             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1409         },
1410 
1411         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1412         {{
1413             { 0.436065674f, 0.385147095f, 0.143066406f },
1414             { 0.222488403f, 0.716873169f, 0.060607910f },
1415             { 0.013916016f, 0.097076416f, 0.714096069f },
1416         }},
1417 
1418         false, // has_A2B, followed by A2B itself, which we don't care about.
1419         {
1420             0,
1421             {
1422                 {{0, {0,0, 0,0,0,0,0}}},
1423                 {{0, {0,0, 0,0,0,0,0}}},
1424                 {{0, {0,0, 0,0,0,0,0}}},
1425                 {{0, {0,0, 0,0,0,0,0}}},
1426             },
1427             {0,0,0,0},
1428             nullptr,
1429             nullptr,
1430 
1431             0,
1432             {
1433                 {{0, {0,0, 0,0,0,0,0}}},
1434                 {{0, {0,0, 0,0,0,0,0}}},
1435                 {{0, {0,0, 0,0,0,0,0}}},
1436             },
1437             {{
1438                 { 0,0,0,0 },
1439                 { 0,0,0,0 },
1440                 { 0,0,0,0 },
1441             }},
1442 
1443             0,
1444             {
1445                 {{0, {0,0, 0,0,0,0,0}}},
1446                 {{0, {0,0, 0,0,0,0,0}}},
1447                 {{0, {0,0, 0,0,0,0,0}}},
1448             },
1449         },
1450 
1451         false, // has_B2A, followed by B2A itself, which we also don't care about.
1452         {
1453             0,
1454             {
1455                 {{0, {0,0, 0,0,0,0,0}}},
1456                 {{0, {0,0, 0,0,0,0,0}}},
1457                 {{0, {0,0, 0,0,0,0,0}}},
1458             },
1459 
1460             0,
1461             {{
1462                 { 0,0,0,0 },
1463                 { 0,0,0,0 },
1464                 { 0,0,0,0 },
1465             }},
1466             {
1467                 {{0, {0,0, 0,0,0,0,0}}},
1468                 {{0, {0,0, 0,0,0,0,0}}},
1469                 {{0, {0,0, 0,0,0,0,0}}},
1470             },
1471 
1472             0,
1473             {0,0,0,0},
1474             nullptr,
1475             nullptr,
1476             {
1477                 {{0, {0,0, 0,0,0,0,0}}},
1478                 {{0, {0,0, 0,0,0,0,0}}},
1479                 {{0, {0,0, 0,0,0,0,0}}},
1480                 {{0, {0,0, 0,0,0,0,0}}},
1481             },
1482         },
1483 
1484         false, // has_CICP, followed by cicp itself which we don't care about.
1485         { 0, 0, 0, 0 },
1486     };
1487     return &sRGB_profile;
1488 }
1489 
skcms_XYZD50_profile()1490 const skcms_ICCProfile* skcms_XYZD50_profile() {
1491     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1492     static const skcms_ICCProfile XYZD50_profile = {
1493         nullptr,               // buffer, moot here
1494 
1495         0,                     // size, moot here
1496         skcms_Signature_RGB,   // data_color_space
1497         skcms_Signature_XYZ,   // pcs
1498         0,                     // tag count, moot here
1499 
1500         true,  // has_trc, followed by the 3 trc curves
1501         {
1502             {{0, {1,1, 0,0,0,0,0}}},
1503             {{0, {1,1, 0,0,0,0,0}}},
1504             {{0, {1,1, 0,0,0,0,0}}},
1505         },
1506 
1507         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1508         {{
1509             { 1,0,0 },
1510             { 0,1,0 },
1511             { 0,0,1 },
1512         }},
1513 
1514         false, // has_A2B, followed by A2B itself, which we don't care about.
1515         {
1516             0,
1517             {
1518                 {{0, {0,0, 0,0,0,0,0}}},
1519                 {{0, {0,0, 0,0,0,0,0}}},
1520                 {{0, {0,0, 0,0,0,0,0}}},
1521                 {{0, {0,0, 0,0,0,0,0}}},
1522             },
1523             {0,0,0,0},
1524             nullptr,
1525             nullptr,
1526 
1527             0,
1528             {
1529                 {{0, {0,0, 0,0,0,0,0}}},
1530                 {{0, {0,0, 0,0,0,0,0}}},
1531                 {{0, {0,0, 0,0,0,0,0}}},
1532             },
1533             {{
1534                 { 0,0,0,0 },
1535                 { 0,0,0,0 },
1536                 { 0,0,0,0 },
1537             }},
1538 
1539             0,
1540             {
1541                 {{0, {0,0, 0,0,0,0,0}}},
1542                 {{0, {0,0, 0,0,0,0,0}}},
1543                 {{0, {0,0, 0,0,0,0,0}}},
1544             },
1545         },
1546 
1547         false, // has_B2A, followed by B2A itself, which we also don't care about.
1548         {
1549             0,
1550             {
1551                 {{0, {0,0, 0,0,0,0,0}}},
1552                 {{0, {0,0, 0,0,0,0,0}}},
1553                 {{0, {0,0, 0,0,0,0,0}}},
1554             },
1555 
1556             0,
1557             {{
1558                 { 0,0,0,0 },
1559                 { 0,0,0,0 },
1560                 { 0,0,0,0 },
1561             }},
1562             {
1563                 {{0, {0,0, 0,0,0,0,0}}},
1564                 {{0, {0,0, 0,0,0,0,0}}},
1565                 {{0, {0,0, 0,0,0,0,0}}},
1566             },
1567 
1568             0,
1569             {0,0,0,0},
1570             nullptr,
1571             nullptr,
1572             {
1573                 {{0, {0,0, 0,0,0,0,0}}},
1574                 {{0, {0,0, 0,0,0,0,0}}},
1575                 {{0, {0,0, 0,0,0,0,0}}},
1576                 {{0, {0,0, 0,0,0,0,0}}},
1577             },
1578         },
1579 
1580         false, // has_CICP, followed by cicp itself which we don't care about.
1581         { 0, 0, 0, 0 },
1582     };
1583 
1584     return &XYZD50_profile;
1585 }
1586 
skcms_sRGB_TransferFunction()1587 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1588     return &skcms_sRGB_profile()->trc[0].parametric;
1589 }
1590 
skcms_sRGB_Inverse_TransferFunction()1591 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1592     static const skcms_TransferFunction sRGB_inv =
1593         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1594     return &sRGB_inv;
1595 }
1596 
skcms_Identity_TransferFunction()1597 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1598     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1599     return &identity;
1600 }
1601 
1602 const uint8_t skcms_252_random_bytes[] = {
1603     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1604     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1605     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1606     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1607     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1608     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1609     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1610     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1611     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1612     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1613     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1614     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1615     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1616     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1617     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1618     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1619 };
1620 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1621 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1622     // Test for exactly equal profiles first.
1623     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1624         return true;
1625     }
1626 
1627     // For now this is the essentially the same strategy we use in test_only.c
1628     // for our skcms_Transform() smoke tests:
1629     //    1) transform A to XYZD50
1630     //    2) transform B to XYZD50
1631     //    3) return true if they're similar enough
1632     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1633 
1634     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1635     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1636 
1637     // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1638     // to be treated as equal.  But CMYK profiles are a totally different ballgame.
1639     const auto CMYK = skcms_Signature_CMYK;
1640     if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1641         return false;
1642     }
1643 
1644     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1645     // TODO: working with RGBA_8888 either way is probably fastest.
1646     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1647     size_t npixels = 84;
1648     if (A->data_color_space == skcms_Signature_CMYK) {
1649         fmt = skcms_PixelFormat_RGBA_8888;
1650         npixels = 63;
1651     }
1652 
1653     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1654     // use pre-canned results and skip that skcms_Transform() call?
1655     uint8_t dstA[252],
1656             dstB[252];
1657     if (!skcms_Transform(
1658                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1659                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1660                 npixels)) {
1661         return false;
1662     }
1663     if (!skcms_Transform(
1664                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1665                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1666                 npixels)) {
1667         return false;
1668     }
1669 
1670     // TODO: make sure this final check has reasonable codegen.
1671     for (size_t i = 0; i < 252; i++) {
1672         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1673             return false;
1674         }
1675     }
1676     return true;
1677 }
1678 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1679 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1680                                       const skcms_TransferFunction* inv_tf) {
1681     if (!profile || !profile->has_trc) {
1682         return false;
1683     }
1684 
1685     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1686            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1687            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1688 }
1689 
is_zero_to_one(float x)1690 static bool is_zero_to_one(float x) {
1691     return 0 <= x && x <= 1;
1692 }
1693 
1694 typedef struct { float vals[3]; } skcms_Vector3;
1695 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1696 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1697     skcms_Vector3 dst = {{0,0,0}};
1698     for (int row = 0; row < 3; ++row) {
1699         dst.vals[row] = m->vals[row][0] * v->vals[0]
1700                       + m->vals[row][1] * v->vals[1]
1701                       + m->vals[row][2] * v->vals[2];
1702     }
1703     return dst;
1704 }
1705 
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1706 bool skcms_AdaptToXYZD50(float wx, float wy,
1707                          skcms_Matrix3x3* toXYZD50) {
1708     if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1709         !toXYZD50) {
1710         return false;
1711     }
1712 
1713     // Assumes that Y is 1.0f.
1714     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1715 
1716     // Now convert toXYZ matrix to toXYZD50.
1717     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1718 
1719     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1720     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1721     // to be the best.
1722     skcms_Matrix3x3 xyz_to_lms = {{
1723         {  0.8951f,  0.2664f, -0.1614f },
1724         { -0.7502f,  1.7135f,  0.0367f },
1725         {  0.0389f, -0.0685f,  1.0296f },
1726     }};
1727     skcms_Matrix3x3 lms_to_xyz = {{
1728         {  0.9869929f, -0.1470543f, 0.1599627f },
1729         {  0.4323053f,  0.5183603f, 0.0492912f },
1730         { -0.0085287f,  0.0400428f, 0.9684867f },
1731     }};
1732 
1733     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1734     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1735 
1736     *toXYZD50 = {{
1737         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1738         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1739         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1740     }};
1741     *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1742     *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1743 
1744     return true;
1745 }
1746 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1747 bool skcms_PrimariesToXYZD50(float rx, float ry,
1748                              float gx, float gy,
1749                              float bx, float by,
1750                              float wx, float wy,
1751                              skcms_Matrix3x3* toXYZD50) {
1752     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1753         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1754         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1755         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1756         !toXYZD50) {
1757         return false;
1758     }
1759 
1760     // First, we need to convert xy values (primaries) to XYZ.
1761     skcms_Matrix3x3 primaries = {{
1762         { rx, gx, bx },
1763         { ry, gy, by },
1764         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1765     }};
1766     skcms_Matrix3x3 primaries_inv;
1767     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1768         return false;
1769     }
1770 
1771     // Assumes that Y is 1.0f.
1772     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1773     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1774 
1775     skcms_Matrix3x3 toXYZ = {{
1776         { XYZ.vals[0],           0,           0 },
1777         {           0, XYZ.vals[1],           0 },
1778         {           0,           0, XYZ.vals[2] },
1779     }};
1780     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1781 
1782     skcms_Matrix3x3 DXtoD50;
1783     if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1784         return false;
1785     }
1786 
1787     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1788     return true;
1789 }
1790 
1791 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1792 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1793     double a00 = src->vals[0][0],
1794            a01 = src->vals[1][0],
1795            a02 = src->vals[2][0],
1796            a10 = src->vals[0][1],
1797            a11 = src->vals[1][1],
1798            a12 = src->vals[2][1],
1799            a20 = src->vals[0][2],
1800            a21 = src->vals[1][2],
1801            a22 = src->vals[2][2];
1802 
1803     double b0 = a00*a11 - a01*a10,
1804            b1 = a00*a12 - a02*a10,
1805            b2 = a01*a12 - a02*a11,
1806            b3 = a20,
1807            b4 = a21,
1808            b5 = a22;
1809 
1810     double determinant = b0*b5
1811                        - b1*b4
1812                        + b2*b3;
1813 
1814     if (determinant == 0) {
1815         return false;
1816     }
1817 
1818     double invdet = 1.0 / determinant;
1819     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1820         return false;
1821     }
1822 
1823     b0 *= invdet;
1824     b1 *= invdet;
1825     b2 *= invdet;
1826     b3 *= invdet;
1827     b4 *= invdet;
1828     b5 *= invdet;
1829 
1830     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1831     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1832     dst->vals[2][0] = (float)(        +     b2 );
1833     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1834     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1835     dst->vals[2][1] = (float)(        -     b1 );
1836     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1837     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1838     dst->vals[2][2] = (float)(        +     b0 );
1839 
1840     for (int r = 0; r < 3; ++r)
1841     for (int c = 0; c < 3; ++c) {
1842         if (!isfinitef_(dst->vals[r][c])) {
1843             return false;
1844         }
1845     }
1846     return true;
1847 }
1848 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1849 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1850     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1851     for (int r = 0; r < 3; r++)
1852         for (int c = 0; c < 3; c++) {
1853             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1854                          + A->vals[r][1] * B->vals[1][c]
1855                          + A->vals[r][2] * B->vals[2][c];
1856         }
1857     return m;
1858 }
1859 
1860 #if defined(__clang__)
1861     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1862 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1863 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1864     TF_PQish  pq;
1865     TF_HLGish hlg;
1866     switch (classify(*src, &pq, &hlg)) {
1867         case skcms_TFType_Invalid: return false;
1868         case skcms_TFType_sRGBish: break;  // handled below
1869 
1870         case skcms_TFType_PQish:
1871             *dst = { TFKind_marker(skcms_TFType_PQish), -pq.A,  pq.D, 1.0f/pq.F
1872                                                       ,  pq.B, -pq.E, 1.0f/pq.C};
1873             return true;
1874 
1875         case skcms_TFType_HLGish:
1876             *dst = { TFKind_marker(skcms_TFType_HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1877                                                           , 1.0f/hlg.a, hlg.b, hlg.c
1878                                                           , hlg.K_minus_1 };
1879             return true;
1880 
1881         case skcms_TFType_HLGinvish:
1882             *dst = { TFKind_marker(skcms_TFType_HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1883                                                        , 1.0f/hlg.a, hlg.b, hlg.c
1884                                                        , hlg.K_minus_1 };
1885             return true;
1886     }
1887 
1888     assert (classify(*src) == skcms_TFType_sRGBish);
1889 
1890     // We're inverting this function, solving for x in terms of y.
1891     //   y = (cx + f)         x < d
1892     //       (ax + b)^g + e   x ≥ d
1893     // The inverse of this function can be expressed in the same piecewise form.
1894     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1895 
1896     // We'll start by finding the new threshold inv.d.
1897     // In principle we should be able to find that by solving for y at x=d from either side.
1898     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1899     float d_l =       src->c * src->d + src->f,
1900           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1901     if (fabsf_(d_l - d_r) > 1/512.0f) {
1902         return false;
1903     }
1904     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1905 
1906     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1907     if (inv.d > 0) {
1908         // Inverting the linear section is pretty straightfoward:
1909         //        y       = cx + f
1910         //        y - f   = cx
1911         //   (1/c)y - f/c = x
1912         inv.c =    1.0f/src->c;
1913         inv.f = -src->f/src->c;
1914     }
1915 
1916     // The interesting part is inverting the nonlinear section:
1917     //         y                = (ax + b)^g + e.
1918     //         y - e            = (ax + b)^g
1919     //        (y - e)^1/g       =  ax + b
1920     //        (y - e)^1/g - b   =  ax
1921     //   (1/a)(y - e)^1/g - b/a =   x
1922     //
1923     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1924     //   let k = (1/a)^g
1925     //   (1/a)( y -  e)^1/g - b/a = x
1926     //        (ky - ke)^1/g - b/a = x
1927 
1928     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1929     inv.g = 1.0f / src->g;
1930     inv.a = k;
1931     inv.b = -k * src->e;
1932     inv.e = -src->b / src->a;
1933 
1934     // We need to enforce the same constraints here that we do when fitting a curve,
1935     // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
1936     // of the source function if we're here.
1937 
1938     // Just like when fitting the curve, there's really no way to rescue a < 0.
1939     if (inv.a < 0) {
1940         return false;
1941     }
1942     // On the other hand we can rescue an ad+b that's gone slightly negative here.
1943     if (inv.a * inv.d + inv.b < 0) {
1944         inv.b = -inv.a * inv.d;
1945     }
1946 
1947     // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1948     // where we might still fail here, like non-finite parameter values.
1949     if (classify(inv) != skcms_TFType_sRGBish) {
1950         return false;
1951     }
1952 
1953     assert (inv.a >= 0);
1954     assert (inv.a * inv.d + inv.b >= 0);
1955 
1956     // Now in principle we're done.
1957     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1958     // e or f of the inverse, depending on which segment contains src(1.0f).
1959     float s = skcms_TransferFunction_eval(src, 1.0f);
1960     if (!isfinitef_(s)) {
1961         return false;
1962     }
1963 
1964     float sign = s < 0 ? -1.0f : 1.0f;
1965     s *= sign;
1966     if (s < inv.d) {
1967         inv.f = 1.0f - sign * inv.c * s;
1968     } else {
1969         inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1970     }
1971 
1972     *dst = inv;
1973     return classify(*dst) == skcms_TFType_sRGBish;
1974 }
1975 
1976 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1977 
1978 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1979 //
1980 //   tf(x) =  cx + f          x < d
1981 //   tf(x) = (ax + b)^g + e   x ≥ d
1982 //
1983 // When fitting, we add the additional constraint that both pieces meet at d:
1984 //
1985 //   cd + f = (ad + b)^g + e
1986 //
1987 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1988 //
1989 //   tf(x) =                           cx + f   x < d
1990 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1991 //
1992 // Our overall strategy is then:
1993 //    For a couple tolerances,
1994 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1995 //       - invert c,d,f
1996 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1997 //                          (and by constraint, inverted e) to the inverse of the table.
1998 //    Return the parameters with least maximum error.
1999 //
2000 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
2001 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
2002 //
2003 //    let y = Table(x)
2004 //    r(x) = x - f_inv(y)
2005 //
2006 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
2007 //          - ln(ad + b)*(ad + b)^g
2008 //    ∂r/∂a = yg(ay + b)^(g-1)
2009 //          - dg(ad + b)^(g-1)
2010 //    ∂r/∂b =  g(ay + b)^(g-1)
2011 //          -  g(ad + b)^(g-1)
2012 
2013 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
2014 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])2015 static float rg_nonlinear(float x,
2016                           const skcms_Curve* curve,
2017                           const skcms_TransferFunction* tf,
2018                           float dfdP[3]) {
2019     const float y = eval_curve(curve, x);
2020 
2021     const float g = tf->g, a = tf->a, b = tf->b,
2022                 c = tf->c, d = tf->d, f = tf->f;
2023 
2024     const float Y = fmaxf_(a*y + b, 0.0f),
2025                 D =        a*d + b;
2026     assert (D >= 0);
2027 
2028     // The gradient.
2029     dfdP[0] = logf_(Y)*powf_(Y, g)
2030             - logf_(D)*powf_(D, g);
2031     dfdP[1] = y*g*powf_(Y, g-1)
2032             - d*g*powf_(D, g-1);
2033     dfdP[2] =   g*powf_(Y, g-1)
2034             -   g*powf_(D, g-1);
2035 
2036     // The residual.
2037     const float f_inv = powf_(Y, g)
2038                       - powf_(D, g)
2039                       + c*d + f;
2040     return x - f_inv;
2041 }
2042 
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)2043 static bool gauss_newton_step(const skcms_Curve* curve,
2044                                     skcms_TransferFunction* tf,
2045                               float x0, float dx, int N) {
2046     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2047     //
2048     // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2049     //
2050     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2051     //   where r(P) is the residual vector
2052     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
2053     //
2054     // Let's review the shape of each of these expressions:
2055     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
2056     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
2057     //   Jf^T   is [3 x N], the transpose of Jf
2058     //
2059     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2060     //                                              and so is its inverse (Jf^T Jf)^-1
2061     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2062     //
2063     // Our implementation strategy to get to the final ∆P is
2064     //   1) evaluate Jf^T Jf,   call that lhs
2065     //   2) evaluate Jf^T r(P), call that rhs
2066     //   3) invert lhs
2067     //   4) multiply inverse lhs by rhs
2068     //
2069     // This is a friendly implementation strategy because we don't have to have any
2070     // buffers that scale with N, and equally nice don't have to perform any matrix
2071     // operations that are variable size.
2072     //
2073     // Other implementation strategies could trade this off, e.g. evaluating the
2074     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2075     // the residuals.  That would probably require implementing singular value
2076     // decomposition, and would create a [3 x N] matrix to be multiplied by the
2077     // [N x 1] residual vector, but on the upside I think that'd eliminate the
2078     // possibility of this gauss_newton_step() function ever failing.
2079 
2080     // 0) start off with lhs and rhs safely zeroed.
2081     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2082     skcms_Vector3   rhs = {  {0,0,0} };
2083 
2084     // 1,2) evaluate lhs and evaluate rhs
2085     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2086     //   so we'll have to update lhs and rhs at the same time.
2087     for (int i = 0; i < N; i++) {
2088         float x = x0 + static_cast<float>(i)*dx;
2089 
2090         float dfdP[3] = {0,0,0};
2091         float resid = rg_nonlinear(x,curve,tf, dfdP);
2092 
2093         for (int r = 0; r < 3; r++) {
2094             for (int c = 0; c < 3; c++) {
2095                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2096             }
2097             rhs.vals[r] += dfdP[r] * resid;
2098         }
2099     }
2100 
2101     // If any of the 3 P parameters are unused, this matrix will be singular.
2102     // Detect those cases and fix them up to indentity instead, so we can invert.
2103     for (int k = 0; k < 3; k++) {
2104         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2105             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2106             lhs.vals[k][k] = 1;
2107         }
2108     }
2109 
2110     // 3) invert lhs
2111     skcms_Matrix3x3 lhs_inv;
2112     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2113         return false;
2114     }
2115 
2116     // 4) multiply inverse lhs by rhs
2117     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2118     tf->g += dP.vals[0];
2119     tf->a += dP.vals[1];
2120     tf->b += dP.vals[2];
2121     return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2122 }
2123 
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)2124 static float max_roundtrip_error_checked(const skcms_Curve* curve,
2125                                          const skcms_TransferFunction* tf_inv) {
2126     skcms_TransferFunction tf;
2127     if (!skcms_TransferFunction_invert(tf_inv, &tf) || skcms_TFType_sRGBish != classify(tf)) {
2128         return INFINITY_;
2129     }
2130 
2131     skcms_TransferFunction tf_inv_again;
2132     if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2133         return INFINITY_;
2134     }
2135 
2136     return skcms_MaxRoundtripError(curve, &tf_inv_again);
2137 }
2138 
2139 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)2140 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2141     // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2142     auto fixup_tf = [tf]() {
2143         // a must be non-negative. That ensures the function is monotonically increasing.
2144         // We don't really know how to fix up a if it goes negative.
2145         if (tf->a < 0) {
2146             return false;
2147         }
2148         // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2149         // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2150         if (tf->a * tf->d + tf->b < 0) {
2151             tf->b = -tf->a * tf->d;
2152         }
2153         assert (tf->a >= 0 &&
2154                 tf->a * tf->d + tf->b >= 0);
2155 
2156         // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2157         // parameter so we can guarantee this.
2158         tf->e =   tf->c*tf->d + tf->f
2159           - powf_(tf->a*tf->d + tf->b, tf->g);
2160 
2161         return true;
2162     };
2163 
2164     if (!fixup_tf()) {
2165         return false;
2166     }
2167 
2168     // No matter where we start, dx should always represent N even steps from 0 to 1.
2169     const float dx = 1.0f / static_cast<float>(N-1);
2170 
2171     skcms_TransferFunction best_tf = *tf;
2172     float best_max_error = INFINITY_;
2173 
2174     // Need this or several curves get worse... *sigh*
2175     float init_error = max_roundtrip_error_checked(curve, tf);
2176     if (init_error < best_max_error) {
2177         best_max_error = init_error;
2178         best_tf = *tf;
2179     }
2180 
2181     // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2182     for (int j = 0; j < 8; j++) {
2183         if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
2184             *tf = best_tf;
2185             return isfinitef_(best_max_error);
2186         }
2187 
2188         float max_error = max_roundtrip_error_checked(curve, tf);
2189         if (max_error < best_max_error) {
2190             best_max_error = max_error;
2191             best_tf = *tf;
2192         }
2193     }
2194 
2195     *tf = best_tf;
2196     return isfinitef_(best_max_error);
2197 }
2198 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)2199 bool skcms_ApproximateCurve(const skcms_Curve* curve,
2200                             skcms_TransferFunction* approx,
2201                             float* max_error) {
2202     if (!curve || !approx || !max_error) {
2203         return false;
2204     }
2205 
2206     if (curve->table_entries == 0) {
2207         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2208         return false;
2209     }
2210 
2211     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2212         // We need at least two points, and must put some reasonable cap on the maximum number.
2213         return false;
2214     }
2215 
2216     int N = (int)curve->table_entries;
2217     const float dx = 1.0f / static_cast<float>(N - 1);
2218 
2219     *max_error = INFINITY_;
2220     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2221     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2222         skcms_TransferFunction tf,
2223                                tf_inv;
2224 
2225         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2226         tf.f = 0.0f;
2227         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2228 
2229         if (L == N) {
2230             // If the entire data set was linear, move the coefficients to the nonlinear portion
2231             // with G == 1.  This lets use a canonical representation with d == 0.
2232             tf.g = 1;
2233             tf.a = tf.c;
2234             tf.b = tf.f;
2235             tf.c = tf.d = tf.e = tf.f = 0;
2236         } else if (L == N - 1) {
2237             // Degenerate case with only two points in the nonlinear segment. Solve directly.
2238             tf.g = 1;
2239             tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
2240                     eval_curve(curve, static_cast<float>(N-2)*dx))
2241                  / dx;
2242             tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
2243                  - tf.a * static_cast<float>(N-2)*dx;
2244             tf.e = 0;
2245         } else {
2246             // Start by guessing a gamma-only curve through the midpoint.
2247             int mid = (L + N) / 2;
2248             float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
2249             float mid_y = eval_curve(curve, mid_x);
2250             tf.g = log2f_(mid_y) / log2f_(mid_x);
2251             tf.a = 1;
2252             tf.b = 0;
2253             tf.e =    tf.c*tf.d + tf.f
2254               - powf_(tf.a*tf.d + tf.b, tf.g);
2255 
2256 
2257             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2258                 !fit_nonlinear(curve, L,N, &tf_inv)) {
2259                 continue;
2260             }
2261 
2262             // We fit tf_inv, so calculate tf to keep in sync.
2263             // fit_nonlinear() should guarantee invertibility.
2264             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2265                 assert(false);
2266                 continue;
2267             }
2268         }
2269 
2270         // We'd better have a sane, sRGB-ish TF by now.
2271         // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2272         // anything else is just some accident of math and the way we pun tf.g as a type flag.
2273         // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2274         if (skcms_TFType_sRGBish != classify(tf)) {
2275             continue;
2276         }
2277 
2278         // We find our error by roundtripping the table through tf_inv.
2279         //
2280         // (The most likely use case for this approximation is to be inverted and
2281         // used as the transfer function for a destination color space.)
2282         //
2283         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2284         // invertible, so re-verify that here (and use the new inverse for testing).
2285         // fit_nonlinear() should guarantee this, but the special cases that don't use
2286         // it may fail this test.
2287         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2288             continue;
2289         }
2290 
2291         float err = skcms_MaxRoundtripError(curve, &tf_inv);
2292         if (*max_error > err) {
2293             *max_error = err;
2294             *approx    = tf;
2295         }
2296     }
2297     return isfinitef_(*max_error);
2298 }
2299 
2300 // ~~~~ Impl. of skcms_Transform() ~~~~
2301 
2302 typedef enum {
2303     Op_load_a8,
2304     Op_load_g8,
2305     Op_load_8888_palette8,
2306     Op_load_4444,
2307     Op_load_565,
2308     Op_load_888,
2309     Op_load_8888,
2310     Op_load_1010102,
2311     Op_load_101010x_XR,
2312     Op_load_161616LE,
2313     Op_load_16161616LE,
2314     Op_load_161616BE,
2315     Op_load_16161616BE,
2316     Op_load_hhh,
2317     Op_load_hhhh,
2318     Op_load_fff,
2319     Op_load_ffff,
2320 
2321     Op_swap_rb,
2322     Op_clamp,
2323     Op_invert,
2324     Op_force_opaque,
2325     Op_premul,
2326     Op_unpremul,
2327     Op_matrix_3x3,
2328     Op_matrix_3x4,
2329 
2330     Op_lab_to_xyz,
2331     Op_xyz_to_lab,
2332 
2333     Op_tf_r,
2334     Op_tf_g,
2335     Op_tf_b,
2336     Op_tf_a,
2337 
2338     Op_pq_r,
2339     Op_pq_g,
2340     Op_pq_b,
2341     Op_pq_a,
2342 
2343     Op_hlg_r,
2344     Op_hlg_g,
2345     Op_hlg_b,
2346     Op_hlg_a,
2347 
2348     Op_hlginv_r,
2349     Op_hlginv_g,
2350     Op_hlginv_b,
2351     Op_hlginv_a,
2352 
2353     Op_table_r,
2354     Op_table_g,
2355     Op_table_b,
2356     Op_table_a,
2357 
2358     Op_clut_A2B,
2359     Op_clut_B2A,
2360 
2361     Op_store_a8,
2362     Op_store_g8,
2363     Op_store_4444,
2364     Op_store_565,
2365     Op_store_888,
2366     Op_store_8888,
2367     Op_store_1010102,
2368     Op_store_161616LE,
2369     Op_store_16161616LE,
2370     Op_store_161616BE,
2371     Op_store_16161616BE,
2372     Op_store_101010x_XR,
2373     Op_store_hhh,
2374     Op_store_hhhh,
2375     Op_store_fff,
2376     Op_store_ffff,
2377 } Op;
2378 
2379 #if defined(__clang__)
2380     template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2381 #elif defined(__GNUC__)
2382     // For some reason GCC accepts this nonsense, but not the more straightforward version,
2383     //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2384     template <int N, typename T>
2385     struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2386 
2387     template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2388 #endif
2389 
2390 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2391 
2392 namespace baseline {
2393 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2394                             || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2395     #define N 1
2396     template <typename T> using V = T;
2397     using Color = float;
2398 #elif defined(__AVX512F__) && defined(__AVX512DQ__)
2399     #define N 16
2400     template <typename T> using V = Vec<N,T>;
2401     using Color = float;
2402 #elif defined(__AVX__)
2403     #define N 8
2404     template <typename T> using V = Vec<N,T>;
2405     using Color = float;
2406 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2407     #define N 8
2408     template <typename T> using V = Vec<N,T>;
2409     using Color = _Float16;
2410 #else
2411     #define N 4
2412     template <typename T> using V = Vec<N,T>;
2413     using Color = float;
2414 #endif
2415 
2416     #include "src/Transform_inl.h"
2417     #undef N
2418 }
2419 
2420 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2421 #if !defined(SKCMS_PORTABLE) &&                           \
2422     !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) &&           \
2423         (( defined(__clang__) && __clang_major__ >= 5) || \
2424          (!defined(__clang__) && defined(__GNUC__)))      \
2425      && defined(__x86_64__)
2426 
2427     #if !defined(__AVX2__)
2428         #if defined(__clang__)
2429             #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2430         #elif defined(__GNUC__)
2431             #pragma GCC push_options
2432             #pragma GCC target("avx2,f16c")
2433         #endif
2434 
2435         namespace hsw {
2436             #define USING_AVX
2437             #define USING_AVX_F16C
2438             #define USING_AVX2
2439             #define N 8
2440             template <typename T> using V = Vec<N,T>;
2441             using Color = float;
2442 
2443             #include "src/Transform_inl.h"
2444 
2445             // src/Transform_inl.h will undefine USING_* for us.
2446             #undef N
2447         }
2448 
2449         #if defined(__clang__)
2450             #pragma clang attribute pop
2451         #elif defined(__GNUC__)
2452             #pragma GCC pop_options
2453         #endif
2454 
2455         #define TEST_FOR_HSW
2456     #endif
2457 
2458     #if !defined(__AVX512F__) || !defined(__AVX512DQ__)
2459         #if defined(__clang__)
2460             #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2461         #elif defined(__GNUC__)
2462             #pragma GCC push_options
2463             #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2464         #endif
2465 
2466         namespace skx {
2467             #define USING_AVX512F
2468             #define N 16
2469             template <typename T> using V = Vec<N,T>;
2470             using Color = float;
2471 
2472             #include "src/Transform_inl.h"
2473 
2474             // src/Transform_inl.h will undefine USING_* for us.
2475             #undef N
2476         }
2477 
2478         #if defined(__clang__)
2479             #pragma clang attribute pop
2480         #elif defined(__GNUC__)
2481             #pragma GCC pop_options
2482         #endif
2483 
2484         #define TEST_FOR_SKX
2485     #endif
2486 
2487     #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2488         enum class CpuType { None, HSW, SKX };
cpu_type()2489         static CpuType cpu_type() {
2490             static const CpuType type = []{
2491                 if (!runtime_cpu_detection) {
2492                     return CpuType::None;
2493                 }
2494                 // See http://www.sandpile.org/x86/cpuid.htm
2495 
2496                 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2497                 uint32_t eax, ebx, ecx, edx;
2498                 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2499                                              : "0"(1), "2"(0));
2500                 if ((edx & (1u<<25)) &&  // SSE
2501                     (edx & (1u<<26)) &&  // SSE2
2502                     (ecx & (1u<< 0)) &&  // SSE3
2503                     (ecx & (1u<< 9)) &&  // SSSE3
2504                     (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2505                     (ecx & (1u<<19)) &&  // SSE4.1
2506                     (ecx & (1u<<20)) &&  // SSE4.2
2507                     (ecx & (1u<<26)) &&  // XSAVE
2508                     (ecx & (1u<<27)) &&  // OSXSAVE
2509                     (ecx & (1u<<28)) &&  // AVX
2510                     (ecx & (1u<<29))) {  // F16C
2511 
2512                     // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2513                     __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2514                                                  : "0"(7), "2"(0));
2515                     // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2516                     uint32_t xcr0, dont_need_edx;
2517                     __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2518 
2519                     if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2520                         (xcr0 & (1u<<2)) &&  // YMM register state saved?
2521                         (ebx  & (1u<<5))) {  // AVX2
2522                         // At this point we're at least HSW.  Continue checking for SKX.
2523                         if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2524                             (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2525                             (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2526                             (ebx  & (1u<<16)) && // AVX512F
2527                             (ebx  & (1u<<17)) && // AVX512DQ
2528                             (ebx  & (1u<<28)) && // AVX512CD
2529                             (ebx  & (1u<<30)) && // AVX512BW
2530                             (ebx  & (1u<<31))) { // AVX512VL
2531                             return CpuType::SKX;
2532                         }
2533                         return CpuType::HSW;
2534                     }
2535                 }
2536                 return CpuType::None;
2537             }();
2538             return type;
2539         }
2540     #endif
2541 
2542 #endif
2543 
2544 typedef struct {
2545     Op          op;
2546     const void* arg;
2547 } OpAndArg;
2548 
select_curve_op(const skcms_Curve * curve,int channel)2549 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2550     static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2551         { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2552         { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2553         { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2554         { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2555     };
2556     const auto& op = ops[channel];
2557 
2558     if (curve->table_entries == 0) {
2559         const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2560 
2561         const skcms_TransferFunction& tf = curve->parametric;
2562 
2563         if (tf.g == 1 && tf.a == 1 &&
2564             tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2565             return noop;
2566         }
2567 
2568         switch (classify(tf)) {
2569             case skcms_TFType_Invalid:    return noop;
2570             case skcms_TFType_sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2571             case skcms_TFType_PQish:      return OpAndArg{op.PQish,     &tf};
2572             case skcms_TFType_HLGish:     return OpAndArg{op.HLGish,    &tf};
2573             case skcms_TFType_HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2574         }
2575     }
2576     return OpAndArg{op.table, curve};
2577 }
2578 
bytes_per_pixel(skcms_PixelFormat fmt)2579 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2580     switch (fmt >> 1) {   // ignore rgb/bgr
2581         case skcms_PixelFormat_A_8                >> 1: return  1;
2582         case skcms_PixelFormat_G_8                >> 1: return  1;
2583         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
2584         case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
2585         case skcms_PixelFormat_RGB_565            >> 1: return  2;
2586         case skcms_PixelFormat_RGB_888            >> 1: return  3;
2587         case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
2588         case skcms_PixelFormat_RGBA_8888_sRGB     >> 1: return  4;
2589         case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
2590         case skcms_PixelFormat_RGB_101010x_XR     >> 1: return  4;
2591         case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
2592         case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
2593         case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
2594         case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
2595         case skcms_PixelFormat_RGB_hhh_Norm       >> 1: return  6;
2596         case skcms_PixelFormat_RGBA_hhhh_Norm     >> 1: return  8;
2597         case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
2598         case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
2599         case skcms_PixelFormat_RGB_fff            >> 1: return 12;
2600         case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
2601     }
2602     assert(false);
2603     return 0;
2604 }
2605 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2606 static bool prep_for_destination(const skcms_ICCProfile* profile,
2607                                  skcms_Matrix3x3* fromXYZD50,
2608                                  skcms_TransferFunction* invR,
2609                                  skcms_TransferFunction* invG,
2610                                  skcms_TransferFunction* invB) {
2611     // skcms_Transform() supports B2A destinations...
2612     if (profile->has_B2A) { return true; }
2613     // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2614     return profile->has_trc
2615         && profile->has_toXYZD50
2616         && profile->trc[0].table_entries == 0
2617         && profile->trc[1].table_entries == 0
2618         && profile->trc[2].table_entries == 0
2619         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2620         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2621         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2622         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2623 }
2624 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2625 bool skcms_Transform(const void*             src,
2626                      skcms_PixelFormat       srcFmt,
2627                      skcms_AlphaFormat       srcAlpha,
2628                      const skcms_ICCProfile* srcProfile,
2629                      void*                   dst,
2630                      skcms_PixelFormat       dstFmt,
2631                      skcms_AlphaFormat       dstAlpha,
2632                      const skcms_ICCProfile* dstProfile,
2633                      size_t                  npixels) {
2634     return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2635                                       dst, dstFmt, dstAlpha, dstProfile,
2636                                       npixels, nullptr);
2637 }
2638 
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2639 bool skcms_TransformWithPalette(const void*             src,
2640                                 skcms_PixelFormat       srcFmt,
2641                                 skcms_AlphaFormat       srcAlpha,
2642                                 const skcms_ICCProfile* srcProfile,
2643                                 void*                   dst,
2644                                 skcms_PixelFormat       dstFmt,
2645                                 skcms_AlphaFormat       dstAlpha,
2646                                 const skcms_ICCProfile* dstProfile,
2647                                 size_t                  nz,
2648                                 const void*             palette) {
2649     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2650                  src_bpp = bytes_per_pixel(srcFmt);
2651     // Let's just refuse if the request is absurdly big.
2652     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2653         return false;
2654     }
2655     int n = (int)nz;
2656 
2657     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2658     if (!srcProfile) {
2659         srcProfile = skcms_sRGB_profile();
2660     }
2661     if (!dstProfile) {
2662         dstProfile = skcms_sRGB_profile();
2663     }
2664 
2665     // We can't transform in place unless the PixelFormats are the same size.
2666     if (dst == src && dst_bpp != src_bpp) {
2667         return false;
2668     }
2669     // TODO: more careful alias rejection (like, dst == src + 1)?
2670 
2671     if (needs_palette(srcFmt) && !palette) {
2672         return false;
2673     }
2674 
2675     Op          program  [32];
2676     const void* arguments[32];
2677 
2678     Op*          ops  = program;
2679     const void** args = arguments;
2680 
2681     // These are always parametric curves of some sort.
2682     skcms_Curve dst_curves[3];
2683     dst_curves[0].table_entries =
2684     dst_curves[1].table_entries =
2685     dst_curves[2].table_entries = 0;
2686 
2687     skcms_Matrix3x3        from_xyz;
2688 
2689     switch (srcFmt >> 1) {
2690         default: return false;
2691         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
2692         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
2693         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
2694         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
2695         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
2696         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
2697         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
2698         case skcms_PixelFormat_RGB_101010x_XR  >> 1: *ops++ = Op_load_101010x_XR; break;
2699         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
2700         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2701         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
2702         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2703         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_load_hhh;        break;
2704         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_load_hhhh;       break;
2705         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
2706         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
2707         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
2708         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
2709 
2710         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
2711                                                         *args++ = palette;
2712                                                         break;
2713         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2714             *ops++ = Op_load_8888;
2715             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_TransferFunction();
2716             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_TransferFunction();
2717             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_TransferFunction();
2718             break;
2719     }
2720     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2721         srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2722         *ops++ = Op_clamp;
2723     }
2724     if (srcFmt & 1) {
2725         *ops++ = Op_swap_rb;
2726     }
2727     skcms_ICCProfile gray_dst_profile;
2728     if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2729         // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2730         // luminance (Y) by the destination transfer function.
2731         gray_dst_profile = *dstProfile;
2732         skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2733         dstProfile = &gray_dst_profile;
2734     }
2735 
2736     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2737         // Photoshop creates CMYK images as inverse CMYK.
2738         // These happen to be the only ones we've _ever_ seen.
2739         *ops++ = Op_invert;
2740         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2741         srcAlpha = skcms_AlphaFormat_Unpremul;
2742     }
2743 
2744     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2745         *ops++ = Op_force_opaque;
2746     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2747         *ops++ = Op_unpremul;
2748     }
2749 
2750     if (dstProfile != srcProfile) {
2751 
2752         if (!prep_for_destination(dstProfile,
2753                                   &from_xyz,
2754                                   &dst_curves[0].parametric,
2755                                   &dst_curves[1].parametric,
2756                                   &dst_curves[2].parametric)) {
2757             return false;
2758         }
2759 
2760         if (srcProfile->has_A2B) {
2761             if (srcProfile->A2B.input_channels) {
2762                 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2763                     OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2764                     if (oa.arg) {
2765                         *ops++  = oa.op;
2766                         *args++ = oa.arg;
2767                     }
2768                 }
2769                 *ops++  = Op_clamp;
2770                 *ops++  = Op_clut_A2B;
2771                 *args++ = &srcProfile->A2B;
2772             }
2773 
2774             if (srcProfile->A2B.matrix_channels == 3) {
2775                 for (int i = 0; i < 3; i++) {
2776                     OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2777                     if (oa.arg) {
2778                         *ops++  = oa.op;
2779                         *args++ = oa.arg;
2780                     }
2781                 }
2782 
2783                 static const skcms_Matrix3x4 I = {{
2784                     {1,0,0,0},
2785                     {0,1,0,0},
2786                     {0,0,1,0},
2787                 }};
2788                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2789                     *ops++  = Op_matrix_3x4;
2790                     *args++ = &srcProfile->A2B.matrix;
2791                 }
2792             }
2793 
2794             if (srcProfile->A2B.output_channels == 3) {
2795                 for (int i = 0; i < 3; i++) {
2796                     OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2797                     if (oa.arg) {
2798                         *ops++  = oa.op;
2799                         *args++ = oa.arg;
2800                     }
2801                 }
2802             }
2803 
2804             if (srcProfile->pcs == skcms_Signature_Lab) {
2805                 *ops++ = Op_lab_to_xyz;
2806             }
2807 
2808         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2809             for (int i = 0; i < 3; i++) {
2810                 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2811                 if (oa.arg) {
2812                     *ops++  = oa.op;
2813                     *args++ = oa.arg;
2814                 }
2815             }
2816         } else {
2817             return false;
2818         }
2819 
2820         // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2821         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2822 
2823         if (dstProfile->has_B2A) {
2824             // B2A needs its input in XYZD50, so transform TRC sources now.
2825             if (!srcProfile->has_A2B) {
2826                 *ops++  = Op_matrix_3x3;
2827                 *args++ = &srcProfile->toXYZD50;
2828             }
2829 
2830             if (dstProfile->pcs == skcms_Signature_Lab) {
2831                 *ops++ = Op_xyz_to_lab;
2832             }
2833 
2834             if (dstProfile->B2A.input_channels == 3) {
2835                 for (int i = 0; i < 3; i++) {
2836                     OpAndArg oa = select_curve_op(&dstProfile->B2A.input_curves[i], i);
2837                     if (oa.arg) {
2838                         *ops++  = oa.op;
2839                         *args++ = oa.arg;
2840                     }
2841                 }
2842             }
2843 
2844             if (dstProfile->B2A.matrix_channels == 3) {
2845                 static const skcms_Matrix3x4 I = {{
2846                     {1,0,0,0},
2847                     {0,1,0,0},
2848                     {0,0,1,0},
2849                 }};
2850                 if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2851                     *ops++  = Op_matrix_3x4;
2852                     *args++ = &dstProfile->B2A.matrix;
2853                 }
2854 
2855                 for (int i = 0; i < 3; i++) {
2856                     OpAndArg oa = select_curve_op(&dstProfile->B2A.matrix_curves[i], i);
2857                     if (oa.arg) {
2858                         *ops++  = oa.op;
2859                         *args++ = oa.arg;
2860                     }
2861                 }
2862             }
2863 
2864             if (dstProfile->B2A.output_channels) {
2865                 *ops++  = Op_clamp;
2866                 *ops++  = Op_clut_B2A;
2867                 *args++ = &dstProfile->B2A;
2868                 for (int i = 0; i < (int)dstProfile->B2A.output_channels; i++) {
2869                     OpAndArg oa = select_curve_op(&dstProfile->B2A.output_curves[i], i);
2870                     if (oa.arg) {
2871                         *ops++  = oa.op;
2872                         *args++ = oa.arg;
2873                     }
2874                 }
2875             }
2876         } else {
2877             // This is a TRC destination.
2878             // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2879             // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2880             static const skcms_Matrix3x3 I = {{
2881                 { 1.0f, 0.0f, 0.0f },
2882                 { 0.0f, 1.0f, 0.0f },
2883                 { 0.0f, 0.0f, 1.0f },
2884             }};
2885             const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2886 
2887             // There's a chance the source and destination gamuts are identical,
2888             // in which case we can skip the gamut transform.
2889             if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2890                 // Concat the entire gamut transform into from_xyz,
2891                 // now slightly misnamed but it's a handy spot to stash the result.
2892                 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2893                 *ops++  = Op_matrix_3x3;
2894                 *args++ = &from_xyz;
2895             }
2896 
2897             // Encode back to dst RGB using its parametric transfer functions.
2898             for (int i = 0; i < 3; i++) {
2899                 OpAndArg oa = select_curve_op(dst_curves+i, i);
2900                 if (oa.arg) {
2901                     assert (oa.op != Op_table_r &&
2902                             oa.op != Op_table_g &&
2903                             oa.op != Op_table_b &&
2904                             oa.op != Op_table_a);
2905                     *ops++  = oa.op;
2906                     *args++ = oa.arg;
2907                 }
2908             }
2909         }
2910     }
2911 
2912     // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2913     // not just to values that fit in [0,1].
2914     //
2915     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2916     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2917     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2918         *ops++ = Op_clamp;
2919     }
2920 
2921     if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2922         // Photoshop creates CMYK images as inverse CMYK.
2923         // These happen to be the only ones we've _ever_ seen.
2924         *ops++ = Op_invert;
2925 
2926         // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2927         dstAlpha = skcms_AlphaFormat_Unpremul;
2928     }
2929 
2930     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2931         *ops++ = Op_force_opaque;
2932     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2933         *ops++ = Op_premul;
2934     }
2935     if (dstFmt & 1) {
2936         *ops++ = Op_swap_rb;
2937     }
2938     switch (dstFmt >> 1) {
2939         default: return false;
2940         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
2941         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
2942         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
2943         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
2944         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
2945         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
2946         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
2947         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
2948         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2949         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
2950         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2951         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_store_hhh;        break;
2952         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_store_hhhh;       break;
2953         case skcms_PixelFormat_RGB_101010x_XR  >> 1: *ops++ = Op_store_101010x_XR; break;
2954         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
2955         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
2956         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
2957         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
2958 
2959         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2960             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2961             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2962             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2963             *ops++ = Op_store_8888;
2964             break;
2965     }
2966 
2967     auto run = baseline::run_program;
2968 #if defined(TEST_FOR_HSW)
2969     switch (cpu_type()) {
2970         case CpuType::None:                        break;
2971         case CpuType::HSW: run = hsw::run_program; break;
2972         case CpuType::SKX: run = hsw::run_program; break;
2973     }
2974 #endif
2975 #if defined(TEST_FOR_SKX)
2976     switch (cpu_type()) {
2977         case CpuType::None:                        break;
2978         case CpuType::HSW:                         break;
2979         case CpuType::SKX: run = skx::run_program; break;
2980     }
2981 #endif
2982     run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2983     return true;
2984 }
2985 
assert_usable_as_destination(const skcms_ICCProfile * profile)2986 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2987 #if defined(NDEBUG)
2988     (void)profile;
2989 #else
2990     skcms_Matrix3x3 fromXYZD50;
2991     skcms_TransferFunction invR, invG, invB;
2992     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2993 #endif
2994 }
2995 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2996 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2997     if (!profile->has_B2A) {
2998         skcms_Matrix3x3 fromXYZD50;
2999         if (!profile->has_trc || !profile->has_toXYZD50
3000             || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
3001             return false;
3002         }
3003 
3004         skcms_TransferFunction tf[3];
3005         for (int i = 0; i < 3; i++) {
3006             skcms_TransferFunction inv;
3007             if (profile->trc[i].table_entries == 0
3008                 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
3009                 tf[i] = profile->trc[i].parametric;
3010                 continue;
3011             }
3012 
3013             float max_error;
3014             // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
3015             if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
3016                 return false;
3017             }
3018         }
3019 
3020         for (int i = 0; i < 3; ++i) {
3021             profile->trc[i].table_entries = 0;
3022             profile->trc[i].parametric = tf[i];
3023         }
3024     }
3025     assert_usable_as_destination(profile);
3026     return true;
3027 }
3028 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)3029 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
3030     // Call skcms_MakeUsableAsDestination() with B2A disabled;
3031     // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
3032     skcms_ICCProfile result = *profile;
3033     result.has_B2A = false;
3034     if (!skcms_MakeUsableAsDestination(&result)) {
3035         return false;
3036     }
3037 
3038     // Of the three, pick the transfer function that best fits the other two.
3039     int best_tf = 0;
3040     float min_max_error = INFINITY_;
3041     for (int i = 0; i < 3; i++) {
3042         skcms_TransferFunction inv;
3043         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
3044             return false;
3045         }
3046 
3047         float err = 0;
3048         for (int j = 0; j < 3; ++j) {
3049             err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
3050         }
3051         if (min_max_error > err) {
3052             min_max_error = err;
3053             best_tf = i;
3054         }
3055     }
3056 
3057     for (int i = 0; i < 3; i++) {
3058         result.trc[i].parametric = result.trc[best_tf].parametric;
3059     }
3060 
3061     *profile = result;
3062     assert_usable_as_destination(profile);
3063     return true;
3064 }
3065