1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #if defined(__ARM_NEON)
17 #include <arm_neon.h>
18 #elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33 #endif
34
35 static bool runtime_cpu_detection = true;
skcms_DisableRuntimeCPUDetection()36 void skcms_DisableRuntimeCPUDetection() {
37 runtime_cpu_detection = false;
38 }
39
40 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42 //
43 // Please do not use sizeof() directly, and size_t only when required.
44 // (We have no way of enforcing these requests...)
45 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46
47 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49
50 static const union {
51 uint32_t bits;
52 float f;
53 } inf_ = { 0x7f800000 };
54 #define INFINITY_ inf_.f
55
56 #if defined(__clang__) || defined(__GNUC__)
57 #define small_memcpy __builtin_memcpy
58 #else
59 #define small_memcpy memcpy
60 #endif
61
log2f_(float x)62 static float log2f_(float x) {
63 // The first approximation of log2(x) is its exponent 'e', minus 127.
64 int32_t bits;
65 small_memcpy(&bits, &x, sizeof(bits));
66
67 float e = (float)bits * (1.0f / (1<<23));
68
69 // If we use the mantissa too we can refine the error signficantly.
70 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71 float m;
72 small_memcpy(&m, &m_bits, sizeof(m));
73
74 return (e - 124.225514990f
75 - 1.498030302f*m
76 - 1.725879990f/(0.3520887068f + m));
77 }
logf_(float x)78 static float logf_(float x) {
79 const float ln2 = 0.69314718f;
80 return ln2*log2f_(x);
81 }
82
exp2f_(float x)83 static float exp2f_(float x) {
84 float fract = x - floorf_(x);
85
86 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
87 - 1.490129070f*fract
88 + 27.728023300f/(4.84252568f - fract));
89
90 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
91 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
92 // Negative values are effectively underflow - we'll end up returning a (different) negative
93 // value, which makes no sense. So clamp to zero.
94 if (fbits >= (float)INT_MAX) {
95 return INFINITY_;
96 } else if (fbits < 0) {
97 return 0;
98 }
99
100 int32_t bits = (int32_t)fbits;
101 small_memcpy(&x, &bits, sizeof(x));
102 return x;
103 }
104
105 // Not static, as it's used by some test tools.
powf_(float x,float y)106 float powf_(float x, float y) {
107 assert (x >= 0);
108 return (x == 0) || (x == 1) ? x
109 : exp2f_(log2f_(x) * y);
110 }
111
expf_(float x)112 static float expf_(float x) {
113 const float log2_e = 1.4426950408889634074f;
114 return exp2f_(log2_e * x);
115 }
116
fmaxf_(float x,float y)117 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)118 static float fminf_(float x, float y) { return x < y ? x : y; }
119
isfinitef_(float x)120 static bool isfinitef_(float x) { return 0 == x*0; }
121
minus_1_ulp(float x)122 static float minus_1_ulp(float x) {
123 int32_t bits;
124 memcpy(&bits, &x, sizeof(bits));
125 bits = bits - 1;
126 memcpy(&x, &bits, sizeof(bits));
127 return x;
128 }
129
130 // Most transfer functions we work with are sRGBish.
131 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
132 // and repurpose the other fields to hold the parameters of the HDR functions.
133 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
134 struct TF_PQish { float A,B,C,D,E,F; };
135 struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
136 // We didn't originally support a scale factor K for HLG, and instead just stored 0 in
137 // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
138 // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
139
TFKind_marker(TFKind kind)140 static float TFKind_marker(TFKind kind) {
141 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
142 return -(float)kind;
143 }
144
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)145 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
146 , TF_HLGish* hlg = nullptr) {
147 if (tf.g < 0 && (int)tf.g == tf.g) {
148 // TODO: soundness checks for PQ/HLG like we do for sRGBish?
149 switch ((int)tf.g) {
150 case -PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
151 case -HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
152 case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
153 }
154 return Bad;
155 }
156
157 // Basic soundness checks for sRGBish transfer functions.
158 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
159 // a,c,d,g should be non-negative to make any sense.
160 && tf.a >= 0
161 && tf.c >= 0
162 && tf.d >= 0
163 && tf.g >= 0
164 // Raising a negative value to a fractional tf->g produces complex numbers.
165 && tf.a * tf.d + tf.b >= 0) {
166 return sRGBish;
167 }
168
169 return Bad;
170 }
171
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)172 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
173 return classify(*tf) == sRGBish;
174 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)175 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
176 return classify(*tf) == PQish;
177 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)178 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
179 return classify(*tf) == HLGish;
180 }
181
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)182 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
183 float A, float B, float C,
184 float D, float E, float F) {
185 *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
186 assert(skcms_TransferFunction_isPQish(tf));
187 return true;
188 }
189
skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction * tf,float K,float R,float G,float a,float b,float c)190 bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
191 float K, float R, float G,
192 float a, float b, float c) {
193 *tf = { TFKind_marker(HLGish), R,G, a,b,c, K-1.0f };
194 assert(skcms_TransferFunction_isHLGish(tf));
195 return true;
196 }
197
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)198 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
199 float sign = x < 0 ? -1.0f : 1.0f;
200 x *= sign;
201
202 TF_PQish pq;
203 TF_HLGish hlg;
204 switch (classify(*tf, &pq, &hlg)) {
205 case Bad: break;
206
207 case HLGish: {
208 const float K = hlg.K_minus_1 + 1.0f;
209 return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
210 : expf_((x-hlg.c)*hlg.a) + hlg.b);
211 }
212
213 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
214 case HLGinvish: {
215 const float K = hlg.K_minus_1 + 1.0f;
216 x /= K;
217 return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
218 : hlg.a * logf_(x - hlg.b) + hlg.c);
219 }
220
221
222 case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
223 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
224
225 case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
226 / (pq.D + pq.E * powf_(x, pq.C)),
227 pq.F);
228 }
229 return 0;
230 }
231
232
eval_curve(const skcms_Curve * curve,float x)233 static float eval_curve(const skcms_Curve* curve, float x) {
234 if (curve->table_entries == 0) {
235 return skcms_TransferFunction_eval(&curve->parametric, x);
236 }
237
238 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
239 int lo = (int) ix ,
240 hi = (int)(float)minus_1_ulp(ix + 1.0f);
241 float t = ix - (float)lo;
242
243 float l, h;
244 if (curve->table_8) {
245 l = curve->table_8[lo] * (1/255.0f);
246 h = curve->table_8[hi] * (1/255.0f);
247 } else {
248 uint16_t be_l, be_h;
249 memcpy(&be_l, curve->table_16 + 2*lo, 2);
250 memcpy(&be_h, curve->table_16 + 2*hi, 2);
251 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
252 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
253 l = le_l * (1/65535.0f);
254 h = le_h * (1/65535.0f);
255 }
256 return l + (h-l)*t;
257 }
258
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)259 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
260 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
261 const float dx = 1.0f / (N - 1);
262 float err = 0;
263 for (uint32_t i = 0; i < N; i++) {
264 float x = i * dx,
265 y = eval_curve(curve, x);
266 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
267 }
268 return err;
269 }
270
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)271 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
272 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
273 }
274
275 // Additional ICC signature values that are only used internally
276 enum {
277 // File signature
278 skcms_Signature_acsp = 0x61637370,
279
280 // Tag signatures
281 skcms_Signature_rTRC = 0x72545243,
282 skcms_Signature_gTRC = 0x67545243,
283 skcms_Signature_bTRC = 0x62545243,
284 skcms_Signature_kTRC = 0x6B545243,
285
286 skcms_Signature_rXYZ = 0x7258595A,
287 skcms_Signature_gXYZ = 0x6758595A,
288 skcms_Signature_bXYZ = 0x6258595A,
289
290 skcms_Signature_A2B0 = 0x41324230,
291 skcms_Signature_B2A0 = 0x42324130,
292
293 skcms_Signature_CHAD = 0x63686164,
294 skcms_Signature_WTPT = 0x77747074,
295
296 // Type signatures
297 skcms_Signature_curv = 0x63757276,
298 skcms_Signature_mft1 = 0x6D667431,
299 skcms_Signature_mft2 = 0x6D667432,
300 skcms_Signature_mAB = 0x6D414220,
301 skcms_Signature_mBA = 0x6D424120,
302 skcms_Signature_para = 0x70617261,
303 skcms_Signature_sf32 = 0x73663332,
304 // XYZ is also a PCS signature, so it's defined in skcms.h
305 // skcms_Signature_XYZ = 0x58595A20,
306 };
307
read_big_u16(const uint8_t * ptr)308 static uint16_t read_big_u16(const uint8_t* ptr) {
309 uint16_t be;
310 memcpy(&be, ptr, sizeof(be));
311 #if defined(_MSC_VER)
312 return _byteswap_ushort(be);
313 #else
314 return __builtin_bswap16(be);
315 #endif
316 }
317
read_big_u32(const uint8_t * ptr)318 static uint32_t read_big_u32(const uint8_t* ptr) {
319 uint32_t be;
320 memcpy(&be, ptr, sizeof(be));
321 #if defined(_MSC_VER)
322 return _byteswap_ulong(be);
323 #else
324 return __builtin_bswap32(be);
325 #endif
326 }
327
read_big_i32(const uint8_t * ptr)328 static int32_t read_big_i32(const uint8_t* ptr) {
329 return (int32_t)read_big_u32(ptr);
330 }
331
read_big_fixed(const uint8_t * ptr)332 static float read_big_fixed(const uint8_t* ptr) {
333 return read_big_i32(ptr) * (1.0f / 65536.0f);
334 }
335
336 // Maps to an in-memory profile so that fields line up to the locations specified
337 // in ICC.1:2010, section 7.2
338 typedef struct {
339 uint8_t size [ 4];
340 uint8_t cmm_type [ 4];
341 uint8_t version [ 4];
342 uint8_t profile_class [ 4];
343 uint8_t data_color_space [ 4];
344 uint8_t pcs [ 4];
345 uint8_t creation_date_time [12];
346 uint8_t signature [ 4];
347 uint8_t platform [ 4];
348 uint8_t flags [ 4];
349 uint8_t device_manufacturer [ 4];
350 uint8_t device_model [ 4];
351 uint8_t device_attributes [ 8];
352 uint8_t rendering_intent [ 4];
353 uint8_t illuminant_X [ 4];
354 uint8_t illuminant_Y [ 4];
355 uint8_t illuminant_Z [ 4];
356 uint8_t creator [ 4];
357 uint8_t profile_id [16];
358 uint8_t reserved [28];
359 uint8_t tag_count [ 4]; // Technically not part of header, but required
360 } header_Layout;
361
362 typedef struct {
363 uint8_t signature [4];
364 uint8_t offset [4];
365 uint8_t size [4];
366 } tag_Layout;
367
get_tag_table(const skcms_ICCProfile * profile)368 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
369 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
370 }
371
372 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
373 // use of the type is for the CHAD tag that stores exactly nine values.
374 typedef struct {
375 uint8_t type [ 4];
376 uint8_t reserved [ 4];
377 uint8_t values [36];
378 } sf32_Layout;
379
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)380 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
381 skcms_ICCTag tag;
382 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
383 return false;
384 }
385
386 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
387 return false;
388 }
389
390 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
391 const uint8_t* values = sf32Tag->values;
392 for (int r = 0; r < 3; ++r)
393 for (int c = 0; c < 3; ++c, values += 4) {
394 m->vals[r][c] = read_big_fixed(values);
395 }
396 return true;
397 }
398
399 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
400 // the type are for tags/data that store exactly one triple.
401 typedef struct {
402 uint8_t type [4];
403 uint8_t reserved [4];
404 uint8_t X [4];
405 uint8_t Y [4];
406 uint8_t Z [4];
407 } XYZ_Layout;
408
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)409 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
410 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
411 return false;
412 }
413
414 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
415
416 *x = read_big_fixed(xyzTag->X);
417 *y = read_big_fixed(xyzTag->Y);
418 *z = read_big_fixed(xyzTag->Z);
419 return true;
420 }
421
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])422 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
423 skcms_ICCTag tag;
424 return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
425 read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
426 }
427
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)428 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
429 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
430 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
431 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
432 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
433 }
434
435 typedef struct {
436 uint8_t type [4];
437 uint8_t reserved_a [4];
438 uint8_t function_type [2];
439 uint8_t reserved_b [2];
440 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
441 } para_Layout;
442
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)443 static bool read_curve_para(const uint8_t* buf, uint32_t size,
444 skcms_Curve* curve, uint32_t* curve_size) {
445 if (size < SAFE_FIXED_SIZE(para_Layout)) {
446 return false;
447 }
448
449 const para_Layout* paraTag = (const para_Layout*)buf;
450
451 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
452 uint16_t function_type = read_big_u16(paraTag->function_type);
453 if (function_type > kGABCDEF) {
454 return false;
455 }
456
457 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
458 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
459 return false;
460 }
461
462 if (curve_size) {
463 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
464 }
465
466 curve->table_entries = 0;
467 curve->parametric.a = 1.0f;
468 curve->parametric.b = 0.0f;
469 curve->parametric.c = 0.0f;
470 curve->parametric.d = 0.0f;
471 curve->parametric.e = 0.0f;
472 curve->parametric.f = 0.0f;
473 curve->parametric.g = read_big_fixed(paraTag->variable);
474
475 switch (function_type) {
476 case kGAB:
477 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
478 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
479 if (curve->parametric.a == 0) {
480 return false;
481 }
482 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
483 break;
484 case kGABC:
485 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
486 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
487 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
488 if (curve->parametric.a == 0) {
489 return false;
490 }
491 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
492 curve->parametric.f = curve->parametric.e;
493 break;
494 case kGABCD:
495 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
496 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
497 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
498 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
499 break;
500 case kGABCDEF:
501 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
502 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
503 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
504 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
505 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
506 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
507 break;
508 }
509 return skcms_TransferFunction_isSRGBish(&curve->parametric);
510 }
511
512 typedef struct {
513 uint8_t type [4];
514 uint8_t reserved [4];
515 uint8_t value_count [4];
516 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
517 } curv_Layout;
518
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)519 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
520 skcms_Curve* curve, uint32_t* curve_size) {
521 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
522 return false;
523 }
524
525 const curv_Layout* curvTag = (const curv_Layout*)buf;
526
527 uint32_t value_count = read_big_u32(curvTag->value_count);
528 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
529 return false;
530 }
531
532 if (curve_size) {
533 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
534 }
535
536 if (value_count < 2) {
537 curve->table_entries = 0;
538 curve->parametric.a = 1.0f;
539 curve->parametric.b = 0.0f;
540 curve->parametric.c = 0.0f;
541 curve->parametric.d = 0.0f;
542 curve->parametric.e = 0.0f;
543 curve->parametric.f = 0.0f;
544 if (value_count == 0) {
545 // Empty tables are a shorthand for an identity curve
546 curve->parametric.g = 1.0f;
547 } else {
548 // Single entry tables are a shorthand for simple gamma
549 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
550 }
551 } else {
552 curve->table_8 = nullptr;
553 curve->table_16 = curvTag->variable;
554 curve->table_entries = value_count;
555 }
556
557 return true;
558 }
559
560 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
561 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)562 static bool read_curve(const uint8_t* buf, uint32_t size,
563 skcms_Curve* curve, uint32_t* curve_size) {
564 if (!buf || size < 4 || !curve) {
565 return false;
566 }
567
568 uint32_t type = read_big_u32(buf);
569 if (type == skcms_Signature_para) {
570 return read_curve_para(buf, size, curve, curve_size);
571 } else if (type == skcms_Signature_curv) {
572 return read_curve_curv(buf, size, curve, curve_size);
573 }
574
575 return false;
576 }
577
578 // mft1 and mft2 share a large chunk of data
579 typedef struct {
580 uint8_t type [ 4];
581 uint8_t reserved_a [ 4];
582 uint8_t input_channels [ 1];
583 uint8_t output_channels [ 1];
584 uint8_t grid_points [ 1];
585 uint8_t reserved_b [ 1];
586 uint8_t matrix [36];
587 } mft_CommonLayout;
588
589 typedef struct {
590 mft_CommonLayout common [1];
591
592 uint8_t variable [1/*variable*/];
593 } mft1_Layout;
594
595 typedef struct {
596 mft_CommonLayout common [1];
597
598 uint8_t input_table_entries [2];
599 uint8_t output_table_entries [2];
600 uint8_t variable [1/*variable*/];
601 } mft2_Layout;
602
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)603 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
604 // MFT matrices are applied before the first set of curves, but must be identity unless the
605 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
606 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
607 // field/flag.
608 a2b->matrix_channels = 0;
609 a2b-> input_channels = mftTag-> input_channels[0];
610 a2b->output_channels = mftTag->output_channels[0];
611
612 // We require exactly three (ie XYZ/Lab/RGB) output channels
613 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
614 return false;
615 }
616 // We require at least one, and no more than four (ie CMYK) input channels
617 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
618 return false;
619 }
620
621 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
622 a2b->grid_points[i] = mftTag->grid_points[0];
623 }
624 // The grid only makes sense with at least two points along each axis
625 if (a2b->grid_points[0] < 2) {
626 return false;
627 }
628 return true;
629 }
630
631 // All as the A2B version above, except where noted.
read_mft_common(const mft_CommonLayout * mftTag,skcms_B2A * b2a)632 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
633 // Same as A2B.
634 b2a->matrix_channels = 0;
635 b2a-> input_channels = mftTag-> input_channels[0];
636 b2a->output_channels = mftTag->output_channels[0];
637
638
639 // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
640 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
641 return false;
642 }
643 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
644 return false;
645 }
646
647 // Same as A2B.
648 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
649 b2a->grid_points[i] = mftTag->grid_points[0];
650 }
651 if (b2a->grid_points[0] < 2) {
652 return false;
653 }
654 return true;
655 }
656
657 template <typename A2B_or_B2A>
init_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,A2B_or_B2A * out)658 static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
659 uint32_t input_table_entries, uint32_t output_table_entries,
660 A2B_or_B2A* out) {
661 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
662 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
663 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
664
665 // [input|output]_channels are <= 4, so still no overflow
666 uint32_t byte_len_all_input_tables = out->input_channels * byte_len_per_input_table;
667 uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
668
669 uint64_t grid_size = out->output_channels * byte_width;
670 for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
671 grid_size *= out->grid_points[axis];
672 }
673
674 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
675 return false;
676 }
677
678 for (uint32_t i = 0; i < out->input_channels; ++i) {
679 out->input_curves[i].table_entries = input_table_entries;
680 if (byte_width == 1) {
681 out->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
682 out->input_curves[i].table_16 = nullptr;
683 } else {
684 out->input_curves[i].table_8 = nullptr;
685 out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
686 }
687 }
688
689 if (byte_width == 1) {
690 out->grid_8 = table_base + byte_len_all_input_tables;
691 out->grid_16 = nullptr;
692 } else {
693 out->grid_8 = nullptr;
694 out->grid_16 = table_base + byte_len_all_input_tables;
695 }
696
697 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
698 for (uint32_t i = 0; i < out->output_channels; ++i) {
699 out->output_curves[i].table_entries = output_table_entries;
700 if (byte_width == 1) {
701 out->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
702 out->output_curves[i].table_16 = nullptr;
703 } else {
704 out->output_curves[i].table_8 = nullptr;
705 out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
706 }
707 }
708
709 return true;
710 }
711
712 template <typename A2B_or_B2A>
read_tag_mft1(const skcms_ICCTag * tag,A2B_or_B2A * out)713 static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
714 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
715 return false;
716 }
717
718 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
719 if (!read_mft_common(mftTag->common, out)) {
720 return false;
721 }
722
723 uint32_t input_table_entries = 256;
724 uint32_t output_table_entries = 256;
725
726 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
727 input_table_entries, output_table_entries, out);
728 }
729
730 template <typename A2B_or_B2A>
read_tag_mft2(const skcms_ICCTag * tag,A2B_or_B2A * out)731 static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
732 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
733 return false;
734 }
735
736 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
737 if (!read_mft_common(mftTag->common, out)) {
738 return false;
739 }
740
741 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
742 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
743
744 // ICC spec mandates that 2 <= table_entries <= 4096
745 if (input_table_entries < 2 || input_table_entries > 4096 ||
746 output_table_entries < 2 || output_table_entries > 4096) {
747 return false;
748 }
749
750 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
751 input_table_entries, output_table_entries, out);
752 }
753
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)754 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
755 uint32_t num_curves, skcms_Curve* curves) {
756 for (uint32_t i = 0; i < num_curves; ++i) {
757 if (curve_offset > size) {
758 return false;
759 }
760
761 uint32_t curve_bytes;
762 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
763 return false;
764 }
765
766 if (curve_bytes > UINT32_MAX - 3) {
767 return false;
768 }
769 curve_bytes = (curve_bytes + 3) & ~3U;
770
771 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
772 curve_offset = (uint32_t)new_offset_64;
773 if (new_offset_64 != curve_offset) {
774 return false;
775 }
776 }
777
778 return true;
779 }
780
781 // mAB and mBA tags use the same encoding, including color lookup tables.
782 typedef struct {
783 uint8_t type [ 4];
784 uint8_t reserved_a [ 4];
785 uint8_t input_channels [ 1];
786 uint8_t output_channels [ 1];
787 uint8_t reserved_b [ 2];
788 uint8_t b_curve_offset [ 4];
789 uint8_t matrix_offset [ 4];
790 uint8_t m_curve_offset [ 4];
791 uint8_t clut_offset [ 4];
792 uint8_t a_curve_offset [ 4];
793 } mAB_or_mBA_Layout;
794
795 typedef struct {
796 uint8_t grid_points [16];
797 uint8_t grid_byte_width [ 1];
798 uint8_t reserved [ 3];
799 uint8_t variable [1/*variable*/];
800 } CLUT_Layout;
801
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)802 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
803 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
804 return false;
805 }
806
807 const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
808
809 a2b->input_channels = mABTag->input_channels[0];
810 a2b->output_channels = mABTag->output_channels[0];
811
812 // We require exactly three (ie XYZ/Lab/RGB) output channels
813 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
814 return false;
815 }
816 // We require no more than four (ie CMYK) input channels
817 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
818 return false;
819 }
820
821 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
822 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
823 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
824 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
825 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
826
827 // "B" curves must be present
828 if (0 == b_curve_offset) {
829 return false;
830 }
831
832 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
833 a2b->output_curves)) {
834 return false;
835 }
836
837 // "M" curves and Matrix must be used together
838 if (0 != m_curve_offset) {
839 if (0 == matrix_offset) {
840 return false;
841 }
842 a2b->matrix_channels = a2b->output_channels;
843 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
844 a2b->matrix_curves)) {
845 return false;
846 }
847
848 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
849 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
850 return false;
851 }
852 float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
853 const uint8_t* mtx_buf = tag->buf + matrix_offset;
854 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
855 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
856 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
857 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
858 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
859 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
860 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
861 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
862 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
863 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
864 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
865 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
866 } else {
867 if (0 != matrix_offset) {
868 return false;
869 }
870 a2b->matrix_channels = 0;
871 }
872
873 // "A" curves and CLUT must be used together
874 if (0 != a_curve_offset) {
875 if (0 == clut_offset) {
876 return false;
877 }
878 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
879 a2b->input_curves)) {
880 return false;
881 }
882
883 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
884 return false;
885 }
886 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
887
888 if (clut->grid_byte_width[0] == 1) {
889 a2b->grid_8 = clut->variable;
890 a2b->grid_16 = nullptr;
891 } else if (clut->grid_byte_width[0] == 2) {
892 a2b->grid_8 = nullptr;
893 a2b->grid_16 = clut->variable;
894 } else {
895 return false;
896 }
897
898 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; // the payload
899 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
900 a2b->grid_points[i] = clut->grid_points[i];
901 // The grid only makes sense with at least two points along each axis
902 if (a2b->grid_points[i] < 2) {
903 return false;
904 }
905 grid_size *= a2b->grid_points[i];
906 }
907 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
908 return false;
909 }
910 } else {
911 if (0 != clut_offset) {
912 return false;
913 }
914
915 // If there is no CLUT, the number of input and output channels must match
916 if (a2b->input_channels != a2b->output_channels) {
917 return false;
918 }
919
920 // Zero out the number of input channels to signal that we're skipping this stage
921 a2b->input_channels = 0;
922 }
923
924 return true;
925 }
926
927 // Exactly the same as read_tag_mab(), except where there are comments.
928 // TODO: refactor the two to eliminate common code?
read_tag_mba(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)929 static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
930 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
931 return false;
932 }
933
934 const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
935
936 b2a->input_channels = mBATag->input_channels[0];
937 b2a->output_channels = mBATag->output_channels[0];
938
939 // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
940 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
941 return false;
942 }
943 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
944 return false;
945 }
946
947 uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
948 uint32_t matrix_offset = read_big_u32(mBATag->matrix_offset);
949 uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
950 uint32_t clut_offset = read_big_u32(mBATag->clut_offset);
951 uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
952
953 if (0 == b_curve_offset) {
954 return false;
955 }
956
957 // "B" curves are our inputs, not outputs.
958 if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
959 b2a->input_curves)) {
960 return false;
961 }
962
963 if (0 != m_curve_offset) {
964 if (0 == matrix_offset) {
965 return false;
966 }
967 // Matrix channels is tied to input_channels (3), not output_channels.
968 b2a->matrix_channels = b2a->input_channels;
969
970 if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
971 b2a->matrix_curves)) {
972 return false;
973 }
974
975 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
976 return false;
977 }
978 float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f; // TODO: understand
979 const uint8_t* mtx_buf = tag->buf + matrix_offset;
980 b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
981 b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
982 b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
983 b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
984 b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
985 b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
986 b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
987 b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
988 b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
989 b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
990 b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
991 b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
992 } else {
993 if (0 != matrix_offset) {
994 return false;
995 }
996 b2a->matrix_channels = 0;
997 }
998
999 if (0 != a_curve_offset) {
1000 if (0 == clut_offset) {
1001 return false;
1002 }
1003
1004 // "A" curves are our output, not input.
1005 if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1006 b2a->output_curves)) {
1007 return false;
1008 }
1009
1010 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1011 return false;
1012 }
1013 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1014
1015 if (clut->grid_byte_width[0] == 1) {
1016 b2a->grid_8 = clut->variable;
1017 b2a->grid_16 = nullptr;
1018 } else if (clut->grid_byte_width[0] == 2) {
1019 b2a->grid_8 = nullptr;
1020 b2a->grid_16 = clut->variable;
1021 } else {
1022 return false;
1023 }
1024
1025 uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1026 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1027 b2a->grid_points[i] = clut->grid_points[i];
1028 if (b2a->grid_points[i] < 2) {
1029 return false;
1030 }
1031 grid_size *= b2a->grid_points[i];
1032 }
1033 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1034 return false;
1035 }
1036 } else {
1037 if (0 != clut_offset) {
1038 return false;
1039 }
1040
1041 if (b2a->input_channels != b2a->output_channels) {
1042 return false;
1043 }
1044
1045 // Zero out *output* channels to skip this stage.
1046 b2a->output_channels = 0;
1047 }
1048 return true;
1049 }
1050
1051 // If you pass f, we'll fit a possibly-non-zero value for *f.
1052 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)1053 static int fit_linear(const skcms_Curve* curve, int N, float tol,
1054 float* c, float* d, float* f = nullptr) {
1055 assert(N > 1);
1056 // We iteratively fit the first points to the TF's linear piece.
1057 // We want the cx + f line to pass through the first and last points we fit exactly.
1058 //
1059 // As we walk along the points we find the minimum and maximum slope of the line before the
1060 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
1061 // emtpy, when we definitely can't add any more points.
1062 //
1063 // Some points' error intervals may intersect the running interval but not lie fully
1064 // within it. So we keep track of the last point we saw that is a valid end point candidate,
1065 // and once the search is done, back up to build the line through *that* point.
1066 const float dx = 1.0f / (N - 1);
1067
1068 int lin_points = 1;
1069
1070 float f_zero = 0.0f;
1071 if (f) {
1072 *f = eval_curve(curve, 0);
1073 } else {
1074 f = &f_zero;
1075 }
1076
1077
1078 float slope_min = -INFINITY_;
1079 float slope_max = +INFINITY_;
1080 for (int i = 1; i < N; ++i) {
1081 float x = i * dx;
1082 float y = eval_curve(curve, x);
1083
1084 float slope_max_i = (y + tol - *f) / x,
1085 slope_min_i = (y - tol - *f) / x;
1086 if (slope_max_i < slope_min || slope_max < slope_min_i) {
1087 // Slope intervals would no longer overlap.
1088 break;
1089 }
1090 slope_max = fminf_(slope_max, slope_max_i);
1091 slope_min = fmaxf_(slope_min, slope_min_i);
1092
1093 float cur_slope = (y - *f) / x;
1094 if (slope_min <= cur_slope && cur_slope <= slope_max) {
1095 lin_points = i + 1;
1096 *c = cur_slope;
1097 }
1098 }
1099
1100 // Set D to the last point that met our tolerance.
1101 *d = (lin_points - 1) * dx;
1102 return lin_points;
1103 }
1104
1105 // If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
canonicalize_identity(skcms_Curve * curve)1106 static void canonicalize_identity(skcms_Curve* curve) {
1107 if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1108 int N = (int)curve->table_entries;
1109
1110 float c = 0.0f, d = 0.0f, f = 0.0f;
1111 if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
1112 && c == 1.0f
1113 && f == 0.0f) {
1114 curve->table_entries = 0;
1115 curve->table_8 = nullptr;
1116 curve->table_16 = nullptr;
1117 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
1118 }
1119 }
1120 }
1121
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)1122 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1123 bool ok = false;
1124 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1125 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1126 if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1127 if (!ok) {
1128 return false;
1129 }
1130
1131 if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1132 if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1133 if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1134 if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1135
1136 if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1137 if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1138 if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1139
1140 if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1141 if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1142 if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1143
1144 return true;
1145 }
1146
read_b2a(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1147 static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1148 bool ok = false;
1149 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1150 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1151 if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1152 if (!ok) {
1153 return false;
1154 }
1155
1156 if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1157 if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1158 if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1159
1160 if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1161 if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1162 if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1163
1164 if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1165 if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1166 if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1167 if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1168
1169 return true;
1170 }
1171
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)1172 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1173 if (!profile || !profile->buffer || !tag) { return; }
1174 if (idx > profile->tag_count) { return; }
1175 const tag_Layout* tags = get_tag_table(profile);
1176 tag->signature = read_big_u32(tags[idx].signature);
1177 tag->size = read_big_u32(tags[idx].size);
1178 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
1179 tag->type = read_big_u32(tag->buf);
1180 }
1181
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1182 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1183 if (!profile || !profile->buffer || !tag) { return false; }
1184 const tag_Layout* tags = get_tag_table(profile);
1185 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1186 if (read_big_u32(tags[i].signature) == sig) {
1187 tag->signature = sig;
1188 tag->size = read_big_u32(tags[i].size);
1189 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
1190 tag->type = read_big_u32(tag->buf);
1191 return true;
1192 }
1193 }
1194 return false;
1195 }
1196
usable_as_src(const skcms_ICCProfile * profile)1197 static bool usable_as_src(const skcms_ICCProfile* profile) {
1198 return profile->has_A2B
1199 || (profile->has_trc && profile->has_toXYZD50);
1200 }
1201
skcms_ParseWithA2BPriority(const void * buf,size_t len,const int priority[],const int priorities,skcms_ICCProfile * profile)1202 bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1203 const int priority[], const int priorities,
1204 skcms_ICCProfile* profile) {
1205 assert(SAFE_SIZEOF(header_Layout) == 132);
1206
1207 if (!profile) {
1208 return false;
1209 }
1210 memset(profile, 0, SAFE_SIZEOF(*profile));
1211
1212 if (len < SAFE_SIZEOF(header_Layout)) {
1213 return false;
1214 }
1215
1216 // Byte-swap all header fields
1217 const header_Layout* header = (const header_Layout*)buf;
1218 profile->buffer = (const uint8_t*)buf;
1219 profile->size = read_big_u32(header->size);
1220 uint32_t version = read_big_u32(header->version);
1221 profile->data_color_space = read_big_u32(header->data_color_space);
1222 profile->pcs = read_big_u32(header->pcs);
1223 uint32_t signature = read_big_u32(header->signature);
1224 float illuminant_X = read_big_fixed(header->illuminant_X);
1225 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1226 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1227 profile->tag_count = read_big_u32(header->tag_count);
1228
1229 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1230 // and major version
1231 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1232 if (signature != skcms_Signature_acsp ||
1233 profile->size > len ||
1234 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1235 (version >> 24) > 4) {
1236 return false;
1237 }
1238
1239 // Validate that illuminant is D50 white
1240 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1241 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1242 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1243 return false;
1244 }
1245
1246 // Validate that all tag entries have sane offset + size
1247 const tag_Layout* tags = get_tag_table(profile);
1248 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1249 uint32_t tag_offset = read_big_u32(tags[i].offset);
1250 uint32_t tag_size = read_big_u32(tags[i].size);
1251 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1252 if (tag_size < 4 || tag_end > profile->size) {
1253 return false;
1254 }
1255 }
1256
1257 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1258 return false;
1259 }
1260
1261 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1262
1263 // Pre-parse commonly used tags.
1264 skcms_ICCTag kTRC;
1265 if (profile->data_color_space == skcms_Signature_Gray &&
1266 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1267 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1268 // Malformed tag
1269 return false;
1270 }
1271 profile->trc[1] = profile->trc[0];
1272 profile->trc[2] = profile->trc[0];
1273 profile->has_trc = true;
1274
1275 if (pcs_is_xyz) {
1276 profile->toXYZD50.vals[0][0] = illuminant_X;
1277 profile->toXYZD50.vals[1][1] = illuminant_Y;
1278 profile->toXYZD50.vals[2][2] = illuminant_Z;
1279 profile->has_toXYZD50 = true;
1280 }
1281 } else {
1282 skcms_ICCTag rTRC, gTRC, bTRC;
1283 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1284 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1285 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1286 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1287 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1288 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1289 // Malformed TRC tags
1290 return false;
1291 }
1292 profile->has_trc = true;
1293 }
1294
1295 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1296 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1297 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1298 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1299 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1300 // Malformed XYZ tags
1301 return false;
1302 }
1303 profile->has_toXYZD50 = true;
1304 }
1305 }
1306
1307 for (int i = 0; i < priorities; i++) {
1308 // enum { perceptual, relative_colormetric, saturation }
1309 if (priority[i] < 0 || priority[i] > 2) {
1310 return false;
1311 }
1312 uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1313 skcms_ICCTag tag;
1314 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1315 if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1316 // Malformed A2B tag
1317 return false;
1318 }
1319 profile->has_A2B = true;
1320 break;
1321 }
1322 }
1323
1324 for (int i = 0; i < priorities; i++) {
1325 // enum { perceptual, relative_colormetric, saturation }
1326 if (priority[i] < 0 || priority[i] > 2) {
1327 return false;
1328 }
1329 uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1330 skcms_ICCTag tag;
1331 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1332 if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1333 // Malformed B2A tag
1334 return false;
1335 }
1336 profile->has_B2A = true;
1337 break;
1338 }
1339 }
1340
1341 return usable_as_src(profile);
1342 }
1343
1344
skcms_sRGB_profile()1345 const skcms_ICCProfile* skcms_sRGB_profile() {
1346 static const skcms_ICCProfile sRGB_profile = {
1347 nullptr, // buffer, moot here
1348
1349 0, // size, moot here
1350 skcms_Signature_RGB, // data_color_space
1351 skcms_Signature_XYZ, // pcs
1352 0, // tag count, moot here
1353
1354 // We choose to represent sRGB with its canonical transfer function,
1355 // and with its canonical XYZD50 gamut matrix.
1356 true, // has_trc, followed by the 3 trc curves
1357 {
1358 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1359 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1360 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1361 },
1362
1363 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1364 {{
1365 { 0.436065674f, 0.385147095f, 0.143066406f },
1366 { 0.222488403f, 0.716873169f, 0.060607910f },
1367 { 0.013916016f, 0.097076416f, 0.714096069f },
1368 }},
1369
1370 false, // has_A2B, followed by A2B itself, which we don't care about.
1371 {
1372 0,
1373 {
1374 {{0, {0,0, 0,0,0,0,0}}},
1375 {{0, {0,0, 0,0,0,0,0}}},
1376 {{0, {0,0, 0,0,0,0,0}}},
1377 {{0, {0,0, 0,0,0,0,0}}},
1378 },
1379 {0,0,0,0},
1380 nullptr,
1381 nullptr,
1382
1383 0,
1384 {
1385 {{0, {0,0, 0,0,0,0,0}}},
1386 {{0, {0,0, 0,0,0,0,0}}},
1387 {{0, {0,0, 0,0,0,0,0}}},
1388 },
1389 {{
1390 { 0,0,0,0 },
1391 { 0,0,0,0 },
1392 { 0,0,0,0 },
1393 }},
1394
1395 0,
1396 {
1397 {{0, {0,0, 0,0,0,0,0}}},
1398 {{0, {0,0, 0,0,0,0,0}}},
1399 {{0, {0,0, 0,0,0,0,0}}},
1400 },
1401 },
1402
1403 false, // has_B2A, followed by B2A itself, which we also don't care about.
1404 {
1405 0,
1406 {
1407 {{0, {0,0, 0,0,0,0,0}}},
1408 {{0, {0,0, 0,0,0,0,0}}},
1409 {{0, {0,0, 0,0,0,0,0}}},
1410 },
1411
1412 0,
1413 {{
1414 { 0,0,0,0 },
1415 { 0,0,0,0 },
1416 { 0,0,0,0 },
1417 }},
1418 {
1419 {{0, {0,0, 0,0,0,0,0}}},
1420 {{0, {0,0, 0,0,0,0,0}}},
1421 {{0, {0,0, 0,0,0,0,0}}},
1422 },
1423
1424 0,
1425 {0,0,0,0},
1426 nullptr,
1427 nullptr,
1428 {
1429 {{0, {0,0, 0,0,0,0,0}}},
1430 {{0, {0,0, 0,0,0,0,0}}},
1431 {{0, {0,0, 0,0,0,0,0}}},
1432 {{0, {0,0, 0,0,0,0,0}}},
1433 },
1434 },
1435 };
1436 return &sRGB_profile;
1437 }
1438
skcms_XYZD50_profile()1439 const skcms_ICCProfile* skcms_XYZD50_profile() {
1440 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1441 static const skcms_ICCProfile XYZD50_profile = {
1442 nullptr, // buffer, moot here
1443
1444 0, // size, moot here
1445 skcms_Signature_RGB, // data_color_space
1446 skcms_Signature_XYZ, // pcs
1447 0, // tag count, moot here
1448
1449 true, // has_trc, followed by the 3 trc curves
1450 {
1451 {{0, {1,1, 0,0,0,0,0}}},
1452 {{0, {1,1, 0,0,0,0,0}}},
1453 {{0, {1,1, 0,0,0,0,0}}},
1454 },
1455
1456 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1457 {{
1458 { 1,0,0 },
1459 { 0,1,0 },
1460 { 0,0,1 },
1461 }},
1462
1463 false, // has_A2B, followed by A2B itself, which we don't care about.
1464 {
1465 0,
1466 {
1467 {{0, {0,0, 0,0,0,0,0}}},
1468 {{0, {0,0, 0,0,0,0,0}}},
1469 {{0, {0,0, 0,0,0,0,0}}},
1470 {{0, {0,0, 0,0,0,0,0}}},
1471 },
1472 {0,0,0,0},
1473 nullptr,
1474 nullptr,
1475
1476 0,
1477 {
1478 {{0, {0,0, 0,0,0,0,0}}},
1479 {{0, {0,0, 0,0,0,0,0}}},
1480 {{0, {0,0, 0,0,0,0,0}}},
1481 },
1482 {{
1483 { 0,0,0,0 },
1484 { 0,0,0,0 },
1485 { 0,0,0,0 },
1486 }},
1487
1488 0,
1489 {
1490 {{0, {0,0, 0,0,0,0,0}}},
1491 {{0, {0,0, 0,0,0,0,0}}},
1492 {{0, {0,0, 0,0,0,0,0}}},
1493 },
1494 },
1495
1496 false, // has_B2A, followed by B2A itself, which we also don't care about.
1497 {
1498 0,
1499 {
1500 {{0, {0,0, 0,0,0,0,0}}},
1501 {{0, {0,0, 0,0,0,0,0}}},
1502 {{0, {0,0, 0,0,0,0,0}}},
1503 },
1504
1505 0,
1506 {{
1507 { 0,0,0,0 },
1508 { 0,0,0,0 },
1509 { 0,0,0,0 },
1510 }},
1511 {
1512 {{0, {0,0, 0,0,0,0,0}}},
1513 {{0, {0,0, 0,0,0,0,0}}},
1514 {{0, {0,0, 0,0,0,0,0}}},
1515 },
1516
1517 0,
1518 {0,0,0,0},
1519 nullptr,
1520 nullptr,
1521 {
1522 {{0, {0,0, 0,0,0,0,0}}},
1523 {{0, {0,0, 0,0,0,0,0}}},
1524 {{0, {0,0, 0,0,0,0,0}}},
1525 {{0, {0,0, 0,0,0,0,0}}},
1526 },
1527 },
1528 };
1529
1530 return &XYZD50_profile;
1531 }
1532
skcms_sRGB_TransferFunction()1533 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1534 return &skcms_sRGB_profile()->trc[0].parametric;
1535 }
1536
skcms_sRGB_Inverse_TransferFunction()1537 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1538 static const skcms_TransferFunction sRGB_inv =
1539 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1540 return &sRGB_inv;
1541 }
1542
skcms_Identity_TransferFunction()1543 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1544 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1545 return &identity;
1546 }
1547
1548 const uint8_t skcms_252_random_bytes[] = {
1549 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1550 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1551 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1552 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1553 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1554 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1555 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1556 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1557 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1558 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1559 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1560 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1561 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1562 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1563 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1564 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1565 };
1566
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1567 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1568 // Test for exactly equal profiles first.
1569 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1570 return true;
1571 }
1572
1573 // For now this is the essentially the same strategy we use in test_only.c
1574 // for our skcms_Transform() smoke tests:
1575 // 1) transform A to XYZD50
1576 // 2) transform B to XYZD50
1577 // 3) return true if they're similar enough
1578 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1579
1580 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1581 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1582
1583 // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1584 // to be treated as equal. But CMYK profiles are a totally different ballgame.
1585 const auto CMYK = skcms_Signature_CMYK;
1586 if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1587 return false;
1588 }
1589
1590 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1591 // TODO: working with RGBA_8888 either way is probably fastest.
1592 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1593 size_t npixels = 84;
1594 if (A->data_color_space == skcms_Signature_CMYK) {
1595 fmt = skcms_PixelFormat_RGBA_8888;
1596 npixels = 63;
1597 }
1598
1599 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1600 // use pre-canned results and skip that skcms_Transform() call?
1601 uint8_t dstA[252],
1602 dstB[252];
1603 if (!skcms_Transform(
1604 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1605 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1606 npixels)) {
1607 return false;
1608 }
1609 if (!skcms_Transform(
1610 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1611 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1612 npixels)) {
1613 return false;
1614 }
1615
1616 // TODO: make sure this final check has reasonable codegen.
1617 for (size_t i = 0; i < 252; i++) {
1618 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1619 return false;
1620 }
1621 }
1622 return true;
1623 }
1624
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1625 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1626 const skcms_TransferFunction* inv_tf) {
1627 if (!profile || !profile->has_trc) {
1628 return false;
1629 }
1630
1631 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1632 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1633 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1634 }
1635
is_zero_to_one(float x)1636 static bool is_zero_to_one(float x) {
1637 return 0 <= x && x <= 1;
1638 }
1639
1640 typedef struct { float vals[3]; } skcms_Vector3;
1641
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1642 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1643 skcms_Vector3 dst = {{0,0,0}};
1644 for (int row = 0; row < 3; ++row) {
1645 dst.vals[row] = m->vals[row][0] * v->vals[0]
1646 + m->vals[row][1] * v->vals[1]
1647 + m->vals[row][2] * v->vals[2];
1648 }
1649 return dst;
1650 }
1651
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1652 bool skcms_AdaptToXYZD50(float wx, float wy,
1653 skcms_Matrix3x3* toXYZD50) {
1654 if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1655 !toXYZD50) {
1656 return false;
1657 }
1658
1659 // Assumes that Y is 1.0f.
1660 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1661
1662 // Now convert toXYZ matrix to toXYZD50.
1663 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1664
1665 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1666 // the matrices below. The Bradford method is used by Adobe and is widely considered
1667 // to be the best.
1668 skcms_Matrix3x3 xyz_to_lms = {{
1669 { 0.8951f, 0.2664f, -0.1614f },
1670 { -0.7502f, 1.7135f, 0.0367f },
1671 { 0.0389f, -0.0685f, 1.0296f },
1672 }};
1673 skcms_Matrix3x3 lms_to_xyz = {{
1674 { 0.9869929f, -0.1470543f, 0.1599627f },
1675 { 0.4323053f, 0.5183603f, 0.0492912f },
1676 { -0.0085287f, 0.0400428f, 0.9684867f },
1677 }};
1678
1679 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1680 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1681
1682 *toXYZD50 = {{
1683 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1684 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1685 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1686 }};
1687 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1688 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1689
1690 return true;
1691 }
1692
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1693 bool skcms_PrimariesToXYZD50(float rx, float ry,
1694 float gx, float gy,
1695 float bx, float by,
1696 float wx, float wy,
1697 skcms_Matrix3x3* toXYZD50) {
1698 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1699 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1700 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1701 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1702 !toXYZD50) {
1703 return false;
1704 }
1705
1706 // First, we need to convert xy values (primaries) to XYZ.
1707 skcms_Matrix3x3 primaries = {{
1708 { rx, gx, bx },
1709 { ry, gy, by },
1710 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1711 }};
1712 skcms_Matrix3x3 primaries_inv;
1713 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1714 return false;
1715 }
1716
1717 // Assumes that Y is 1.0f.
1718 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1719 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1720
1721 skcms_Matrix3x3 toXYZ = {{
1722 { XYZ.vals[0], 0, 0 },
1723 { 0, XYZ.vals[1], 0 },
1724 { 0, 0, XYZ.vals[2] },
1725 }};
1726 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1727
1728 skcms_Matrix3x3 DXtoD50;
1729 if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1730 return false;
1731 }
1732
1733 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1734 return true;
1735 }
1736
1737
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1738 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1739 double a00 = src->vals[0][0],
1740 a01 = src->vals[1][0],
1741 a02 = src->vals[2][0],
1742 a10 = src->vals[0][1],
1743 a11 = src->vals[1][1],
1744 a12 = src->vals[2][1],
1745 a20 = src->vals[0][2],
1746 a21 = src->vals[1][2],
1747 a22 = src->vals[2][2];
1748
1749 double b0 = a00*a11 - a01*a10,
1750 b1 = a00*a12 - a02*a10,
1751 b2 = a01*a12 - a02*a11,
1752 b3 = a20,
1753 b4 = a21,
1754 b5 = a22;
1755
1756 double determinant = b0*b5
1757 - b1*b4
1758 + b2*b3;
1759
1760 if (determinant == 0) {
1761 return false;
1762 }
1763
1764 double invdet = 1.0 / determinant;
1765 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1766 return false;
1767 }
1768
1769 b0 *= invdet;
1770 b1 *= invdet;
1771 b2 *= invdet;
1772 b3 *= invdet;
1773 b4 *= invdet;
1774 b5 *= invdet;
1775
1776 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1777 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1778 dst->vals[2][0] = (float)( + b2 );
1779 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1780 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1781 dst->vals[2][1] = (float)( - b1 );
1782 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1783 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1784 dst->vals[2][2] = (float)( + b0 );
1785
1786 for (int r = 0; r < 3; ++r)
1787 for (int c = 0; c < 3; ++c) {
1788 if (!isfinitef_(dst->vals[r][c])) {
1789 return false;
1790 }
1791 }
1792 return true;
1793 }
1794
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1795 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1796 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1797 for (int r = 0; r < 3; r++)
1798 for (int c = 0; c < 3; c++) {
1799 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1800 + A->vals[r][1] * B->vals[1][c]
1801 + A->vals[r][2] * B->vals[2][c];
1802 }
1803 return m;
1804 }
1805
1806 #if defined(__clang__)
1807 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1808 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1809 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1810 TF_PQish pq;
1811 TF_HLGish hlg;
1812 switch (classify(*src, &pq, &hlg)) {
1813 case Bad: return false;
1814 case sRGBish: break; // handled below
1815
1816 case PQish:
1817 *dst = { TFKind_marker(PQish), -pq.A, pq.D, 1.0f/pq.F
1818 , pq.B, -pq.E, 1.0f/pq.C};
1819 return true;
1820
1821 case HLGish:
1822 *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1823 , 1.0f/hlg.a, hlg.b, hlg.c
1824 , hlg.K_minus_1 };
1825 return true;
1826
1827 case HLGinvish:
1828 *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1829 , 1.0f/hlg.a, hlg.b, hlg.c
1830 , hlg.K_minus_1 };
1831 return true;
1832 }
1833
1834 assert (classify(*src) == sRGBish);
1835
1836 // We're inverting this function, solving for x in terms of y.
1837 // y = (cx + f) x < d
1838 // (ax + b)^g + e x ≥ d
1839 // The inverse of this function can be expressed in the same piecewise form.
1840 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1841
1842 // We'll start by finding the new threshold inv.d.
1843 // In principle we should be able to find that by solving for y at x=d from either side.
1844 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1845 float d_l = src->c * src->d + src->f,
1846 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1847 if (fabsf_(d_l - d_r) > 1/512.0f) {
1848 return false;
1849 }
1850 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1851
1852 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1853 if (inv.d > 0) {
1854 // Inverting the linear section is pretty straightfoward:
1855 // y = cx + f
1856 // y - f = cx
1857 // (1/c)y - f/c = x
1858 inv.c = 1.0f/src->c;
1859 inv.f = -src->f/src->c;
1860 }
1861
1862 // The interesting part is inverting the nonlinear section:
1863 // y = (ax + b)^g + e.
1864 // y - e = (ax + b)^g
1865 // (y - e)^1/g = ax + b
1866 // (y - e)^1/g - b = ax
1867 // (1/a)(y - e)^1/g - b/a = x
1868 //
1869 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1870 // let k = (1/a)^g
1871 // (1/a)( y - e)^1/g - b/a = x
1872 // (ky - ke)^1/g - b/a = x
1873
1874 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1875 inv.g = 1.0f / src->g;
1876 inv.a = k;
1877 inv.b = -k * src->e;
1878 inv.e = -src->b / src->a;
1879
1880 // We need to enforce the same constraints here that we do when fitting a curve,
1881 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1882 // of the source function if we're here.
1883
1884 // Just like when fitting the curve, there's really no way to rescue a < 0.
1885 if (inv.a < 0) {
1886 return false;
1887 }
1888 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1889 if (inv.a * inv.d + inv.b < 0) {
1890 inv.b = -inv.a * inv.d;
1891 }
1892
1893 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1894 // where we might still fail here, like non-finite parameter values.
1895 if (classify(inv) != sRGBish) {
1896 return false;
1897 }
1898
1899 assert (inv.a >= 0);
1900 assert (inv.a * inv.d + inv.b >= 0);
1901
1902 // Now in principle we're done.
1903 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1904 // e or f of the inverse, depending on which segment contains src(1.0f).
1905 float s = skcms_TransferFunction_eval(src, 1.0f);
1906 if (!isfinitef_(s)) {
1907 return false;
1908 }
1909
1910 float sign = s < 0 ? -1.0f : 1.0f;
1911 s *= sign;
1912 if (s < inv.d) {
1913 inv.f = 1.0f - sign * inv.c * s;
1914 } else {
1915 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1916 }
1917
1918 *dst = inv;
1919 return classify(*dst) == sRGBish;
1920 }
1921
1922 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1923
1924 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1925 //
1926 // tf(x) = cx + f x < d
1927 // tf(x) = (ax + b)^g + e x ≥ d
1928 //
1929 // When fitting, we add the additional constraint that both pieces meet at d:
1930 //
1931 // cd + f = (ad + b)^g + e
1932 //
1933 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1934 //
1935 // tf(x) = cx + f x < d
1936 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1937 //
1938 // Our overall strategy is then:
1939 // For a couple tolerances,
1940 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1941 // - invert c,d,f
1942 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1943 // (and by constraint, inverted e) to the inverse of the table.
1944 // Return the parameters with least maximum error.
1945 //
1946 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1947 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1948 //
1949 // let y = Table(x)
1950 // r(x) = x - f_inv(y)
1951 //
1952 // ∂r/∂g = ln(ay + b)*(ay + b)^g
1953 // - ln(ad + b)*(ad + b)^g
1954 // ∂r/∂a = yg(ay + b)^(g-1)
1955 // - dg(ad + b)^(g-1)
1956 // ∂r/∂b = g(ay + b)^(g-1)
1957 // - g(ad + b)^(g-1)
1958
1959 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1960 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])1961 static float rg_nonlinear(float x,
1962 const skcms_Curve* curve,
1963 const skcms_TransferFunction* tf,
1964 float dfdP[3]) {
1965 const float y = eval_curve(curve, x);
1966
1967 const float g = tf->g, a = tf->a, b = tf->b,
1968 c = tf->c, d = tf->d, f = tf->f;
1969
1970 const float Y = fmaxf_(a*y + b, 0.0f),
1971 D = a*d + b;
1972 assert (D >= 0);
1973
1974 // The gradient.
1975 dfdP[0] = logf_(Y)*powf_(Y, g)
1976 - logf_(D)*powf_(D, g);
1977 dfdP[1] = y*g*powf_(Y, g-1)
1978 - d*g*powf_(D, g-1);
1979 dfdP[2] = g*powf_(Y, g-1)
1980 - g*powf_(D, g-1);
1981
1982 // The residual.
1983 const float f_inv = powf_(Y, g)
1984 - powf_(D, g)
1985 + c*d + f;
1986 return x - f_inv;
1987 }
1988
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)1989 static bool gauss_newton_step(const skcms_Curve* curve,
1990 skcms_TransferFunction* tf,
1991 float x0, float dx, int N) {
1992 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1993 //
1994 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1995 //
1996 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1997 // where r(P) is the residual vector
1998 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
1999 //
2000 // Let's review the shape of each of these expressions:
2001 // r(P) is [N x 1], a column vector with one entry per value of x tested
2002 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
2003 // Jf^T is [3 x N], the transpose of Jf
2004 //
2005 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2006 // and so is its inverse (Jf^T Jf)^-1
2007 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2008 //
2009 // Our implementation strategy to get to the final ∆P is
2010 // 1) evaluate Jf^T Jf, call that lhs
2011 // 2) evaluate Jf^T r(P), call that rhs
2012 // 3) invert lhs
2013 // 4) multiply inverse lhs by rhs
2014 //
2015 // This is a friendly implementation strategy because we don't have to have any
2016 // buffers that scale with N, and equally nice don't have to perform any matrix
2017 // operations that are variable size.
2018 //
2019 // Other implementation strategies could trade this off, e.g. evaluating the
2020 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2021 // the residuals. That would probably require implementing singular value
2022 // decomposition, and would create a [3 x N] matrix to be multiplied by the
2023 // [N x 1] residual vector, but on the upside I think that'd eliminate the
2024 // possibility of this gauss_newton_step() function ever failing.
2025
2026 // 0) start off with lhs and rhs safely zeroed.
2027 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2028 skcms_Vector3 rhs = { {0,0,0} };
2029
2030 // 1,2) evaluate lhs and evaluate rhs
2031 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2032 // so we'll have to update lhs and rhs at the same time.
2033 for (int i = 0; i < N; i++) {
2034 float x = x0 + i*dx;
2035
2036 float dfdP[3] = {0,0,0};
2037 float resid = rg_nonlinear(x,curve,tf, dfdP);
2038
2039 for (int r = 0; r < 3; r++) {
2040 for (int c = 0; c < 3; c++) {
2041 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2042 }
2043 rhs.vals[r] += dfdP[r] * resid;
2044 }
2045 }
2046
2047 // If any of the 3 P parameters are unused, this matrix will be singular.
2048 // Detect those cases and fix them up to indentity instead, so we can invert.
2049 for (int k = 0; k < 3; k++) {
2050 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2051 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2052 lhs.vals[k][k] = 1;
2053 }
2054 }
2055
2056 // 3) invert lhs
2057 skcms_Matrix3x3 lhs_inv;
2058 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2059 return false;
2060 }
2061
2062 // 4) multiply inverse lhs by rhs
2063 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2064 tf->g += dP.vals[0];
2065 tf->a += dP.vals[1];
2066 tf->b += dP.vals[2];
2067 return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2068 }
2069
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)2070 static float max_roundtrip_error_checked(const skcms_Curve* curve,
2071 const skcms_TransferFunction* tf_inv) {
2072 skcms_TransferFunction tf;
2073 if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
2074 return INFINITY_;
2075 }
2076
2077 skcms_TransferFunction tf_inv_again;
2078 if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2079 return INFINITY_;
2080 }
2081
2082 return skcms_MaxRoundtripError(curve, &tf_inv_again);
2083 }
2084
2085 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)2086 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2087 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2088 auto fixup_tf = [tf]() {
2089 // a must be non-negative. That ensures the function is monotonically increasing.
2090 // We don't really know how to fix up a if it goes negative.
2091 if (tf->a < 0) {
2092 return false;
2093 }
2094 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2095 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2096 if (tf->a * tf->d + tf->b < 0) {
2097 tf->b = -tf->a * tf->d;
2098 }
2099 assert (tf->a >= 0 &&
2100 tf->a * tf->d + tf->b >= 0);
2101
2102 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2103 // parameter so we can guarantee this.
2104 tf->e = tf->c*tf->d + tf->f
2105 - powf_(tf->a*tf->d + tf->b, tf->g);
2106
2107 return true;
2108 };
2109
2110 if (!fixup_tf()) {
2111 return false;
2112 }
2113
2114 // No matter where we start, dx should always represent N even steps from 0 to 1.
2115 const float dx = 1.0f / (N-1);
2116
2117 skcms_TransferFunction best_tf = *tf;
2118 float best_max_error = INFINITY_;
2119
2120 // Need this or several curves get worse... *sigh*
2121 float init_error = max_roundtrip_error_checked(curve, tf);
2122 if (init_error < best_max_error) {
2123 best_max_error = init_error;
2124 best_tf = *tf;
2125 }
2126
2127 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2128 for (int j = 0; j < 8; j++) {
2129 if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
2130 *tf = best_tf;
2131 return isfinitef_(best_max_error);
2132 }
2133
2134 float max_error = max_roundtrip_error_checked(curve, tf);
2135 if (max_error < best_max_error) {
2136 best_max_error = max_error;
2137 best_tf = *tf;
2138 }
2139 }
2140
2141 *tf = best_tf;
2142 return isfinitef_(best_max_error);
2143 }
2144
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)2145 bool skcms_ApproximateCurve(const skcms_Curve* curve,
2146 skcms_TransferFunction* approx,
2147 float* max_error) {
2148 if (!curve || !approx || !max_error) {
2149 return false;
2150 }
2151
2152 if (curve->table_entries == 0) {
2153 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2154 return false;
2155 }
2156
2157 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2158 // We need at least two points, and must put some reasonable cap on the maximum number.
2159 return false;
2160 }
2161
2162 int N = (int)curve->table_entries;
2163 const float dx = 1.0f / (N - 1);
2164
2165 *max_error = INFINITY_;
2166 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2167 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2168 skcms_TransferFunction tf,
2169 tf_inv;
2170
2171 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2172 tf.f = 0.0f;
2173 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2174
2175 if (L == N) {
2176 // If the entire data set was linear, move the coefficients to the nonlinear portion
2177 // with G == 1. This lets use a canonical representation with d == 0.
2178 tf.g = 1;
2179 tf.a = tf.c;
2180 tf.b = tf.f;
2181 tf.c = tf.d = tf.e = tf.f = 0;
2182 } else if (L == N - 1) {
2183 // Degenerate case with only two points in the nonlinear segment. Solve directly.
2184 tf.g = 1;
2185 tf.a = (eval_curve(curve, (N-1)*dx) -
2186 eval_curve(curve, (N-2)*dx))
2187 / dx;
2188 tf.b = eval_curve(curve, (N-2)*dx)
2189 - tf.a * (N-2)*dx;
2190 tf.e = 0;
2191 } else {
2192 // Start by guessing a gamma-only curve through the midpoint.
2193 int mid = (L + N) / 2;
2194 float mid_x = mid / (N - 1.0f);
2195 float mid_y = eval_curve(curve, mid_x);
2196 tf.g = log2f_(mid_y) / log2f_(mid_x);
2197 tf.a = 1;
2198 tf.b = 0;
2199 tf.e = tf.c*tf.d + tf.f
2200 - powf_(tf.a*tf.d + tf.b, tf.g);
2201
2202
2203 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2204 !fit_nonlinear(curve, L,N, &tf_inv)) {
2205 continue;
2206 }
2207
2208 // We fit tf_inv, so calculate tf to keep in sync.
2209 // fit_nonlinear() should guarantee invertibility.
2210 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2211 assert(false);
2212 continue;
2213 }
2214 }
2215
2216 // We'd better have a sane, sRGB-ish TF by now.
2217 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2218 // anything else is just some accident of math and the way we pun tf.g as a type flag.
2219 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2220 if (sRGBish != classify(tf)) {
2221 continue;
2222 }
2223
2224 // We find our error by roundtripping the table through tf_inv.
2225 //
2226 // (The most likely use case for this approximation is to be inverted and
2227 // used as the transfer function for a destination color space.)
2228 //
2229 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2230 // invertible, so re-verify that here (and use the new inverse for testing).
2231 // fit_nonlinear() should guarantee this, but the special cases that don't use
2232 // it may fail this test.
2233 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2234 continue;
2235 }
2236
2237 float err = skcms_MaxRoundtripError(curve, &tf_inv);
2238 if (*max_error > err) {
2239 *max_error = err;
2240 *approx = tf;
2241 }
2242 }
2243 return isfinitef_(*max_error);
2244 }
2245
2246 // ~~~~ Impl. of skcms_Transform() ~~~~
2247
2248 typedef enum {
2249 Op_load_a8,
2250 Op_load_g8,
2251 Op_load_8888_palette8,
2252 Op_load_4444,
2253 Op_load_565,
2254 Op_load_888,
2255 Op_load_8888,
2256 Op_load_1010102,
2257 Op_load_161616LE,
2258 Op_load_16161616LE,
2259 Op_load_161616BE,
2260 Op_load_16161616BE,
2261 Op_load_hhh,
2262 Op_load_hhhh,
2263 Op_load_fff,
2264 Op_load_ffff,
2265
2266 Op_swap_rb,
2267 Op_clamp,
2268 Op_invert,
2269 Op_force_opaque,
2270 Op_premul,
2271 Op_unpremul,
2272 Op_matrix_3x3,
2273 Op_matrix_3x4,
2274
2275 Op_lab_to_xyz,
2276 Op_xyz_to_lab,
2277
2278 Op_tf_r,
2279 Op_tf_g,
2280 Op_tf_b,
2281 Op_tf_a,
2282
2283 Op_pq_r,
2284 Op_pq_g,
2285 Op_pq_b,
2286 Op_pq_a,
2287
2288 Op_hlg_r,
2289 Op_hlg_g,
2290 Op_hlg_b,
2291 Op_hlg_a,
2292
2293 Op_hlginv_r,
2294 Op_hlginv_g,
2295 Op_hlginv_b,
2296 Op_hlginv_a,
2297
2298 Op_table_r,
2299 Op_table_g,
2300 Op_table_b,
2301 Op_table_a,
2302
2303 Op_clut_A2B,
2304 Op_clut_B2A,
2305
2306 Op_store_a8,
2307 Op_store_g8,
2308 Op_store_4444,
2309 Op_store_565,
2310 Op_store_888,
2311 Op_store_8888,
2312 Op_store_1010102,
2313 Op_store_161616LE,
2314 Op_store_16161616LE,
2315 Op_store_161616BE,
2316 Op_store_16161616BE,
2317 Op_store_hhh,
2318 Op_store_hhhh,
2319 Op_store_fff,
2320 Op_store_ffff,
2321 } Op;
2322
2323 #if defined(__clang__)
2324 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2325 #elif defined(__GNUC__)
2326 // For some reason GCC accepts this nonsense, but not the more straightforward version,
2327 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2328 template <int N, typename T>
2329 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2330
2331 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2332 #endif
2333
2334 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2335
2336 namespace baseline {
2337 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2338 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2339 #define N 1
2340 template <typename T> using V = T;
2341 using Color = float;
2342 #elif defined(__AVX512F__)
2343 #define N 16
2344 template <typename T> using V = Vec<N,T>;
2345 using Color = float;
2346 #elif defined(__AVX__)
2347 #define N 8
2348 template <typename T> using V = Vec<N,T>;
2349 using Color = float;
2350 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2351 #define N 8
2352 template <typename T> using V = Vec<N,T>;
2353 using Color = _Float16;
2354 #else
2355 #define N 4
2356 template <typename T> using V = Vec<N,T>;
2357 using Color = float;
2358 #endif
2359
2360 #include "src/Transform_inl.h"
2361 #undef N
2362 }
2363
2364 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2365 #if !defined(SKCMS_PORTABLE) && \
2366 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2367 (( defined(__clang__) && __clang_major__ >= 5) || \
2368 (!defined(__clang__) && defined(__GNUC__))) \
2369 && defined(__x86_64__)
2370
2371 #if !defined(__AVX2__)
2372 #if defined(__clang__)
2373 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2374 #elif defined(__GNUC__)
2375 #pragma GCC push_options
2376 #pragma GCC target("avx2,f16c")
2377 #endif
2378
2379 namespace hsw {
2380 #define USING_AVX
2381 #define USING_AVX_F16C
2382 #define USING_AVX2
2383 #define N 8
2384 template <typename T> using V = Vec<N,T>;
2385 using Color = float;
2386
2387 #include "src/Transform_inl.h"
2388
2389 // src/Transform_inl.h will undefine USING_* for us.
2390 #undef N
2391 }
2392
2393 #if defined(__clang__)
2394 #pragma clang attribute pop
2395 #elif defined(__GNUC__)
2396 #pragma GCC pop_options
2397 #endif
2398
2399 #define TEST_FOR_HSW
2400 #endif
2401
2402 #if !defined(__AVX512F__)
2403 #if defined(__clang__)
2404 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2405 #elif defined(__GNUC__)
2406 #pragma GCC push_options
2407 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2408 #endif
2409
2410 namespace skx {
2411 #define USING_AVX512F
2412 #define N 16
2413 template <typename T> using V = Vec<N,T>;
2414 using Color = float;
2415
2416 #include "src/Transform_inl.h"
2417
2418 // src/Transform_inl.h will undefine USING_* for us.
2419 #undef N
2420 }
2421
2422 #if defined(__clang__)
2423 #pragma clang attribute pop
2424 #elif defined(__GNUC__)
2425 #pragma GCC pop_options
2426 #endif
2427
2428 #define TEST_FOR_SKX
2429 #endif
2430
2431 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2432 enum class CpuType { None, HSW, SKX };
cpu_type()2433 static CpuType cpu_type() {
2434 static const CpuType type = []{
2435 if (!runtime_cpu_detection) {
2436 return CpuType::None;
2437 }
2438 // See http://www.sandpile.org/x86/cpuid.htm
2439
2440 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2441 uint32_t eax, ebx, ecx, edx;
2442 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2443 : "0"(1), "2"(0));
2444 if ((edx & (1u<<25)) && // SSE
2445 (edx & (1u<<26)) && // SSE2
2446 (ecx & (1u<< 0)) && // SSE3
2447 (ecx & (1u<< 9)) && // SSSE3
2448 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2449 (ecx & (1u<<19)) && // SSE4.1
2450 (ecx & (1u<<20)) && // SSE4.2
2451 (ecx & (1u<<26)) && // XSAVE
2452 (ecx & (1u<<27)) && // OSXSAVE
2453 (ecx & (1u<<28)) && // AVX
2454 (ecx & (1u<<29))) { // F16C
2455
2456 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2457 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2458 : "0"(7), "2"(0));
2459 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2460 uint32_t xcr0, dont_need_edx;
2461 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2462
2463 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2464 (xcr0 & (1u<<2)) && // YMM register state saved?
2465 (ebx & (1u<<5))) { // AVX2
2466 // At this point we're at least HSW. Continue checking for SKX.
2467 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2468 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2469 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2470 (ebx & (1u<<16)) && // AVX512F
2471 (ebx & (1u<<17)) && // AVX512DQ
2472 (ebx & (1u<<28)) && // AVX512CD
2473 (ebx & (1u<<30)) && // AVX512BW
2474 (ebx & (1u<<31))) { // AVX512VL
2475 return CpuType::SKX;
2476 }
2477 return CpuType::HSW;
2478 }
2479 }
2480 return CpuType::None;
2481 }();
2482 return type;
2483 }
2484 #endif
2485
2486 #endif
2487
2488 typedef struct {
2489 Op op;
2490 const void* arg;
2491 } OpAndArg;
2492
select_curve_op(const skcms_Curve * curve,int channel)2493 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2494 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2495 { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2496 { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2497 { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2498 { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2499 };
2500 const auto& op = ops[channel];
2501
2502 if (curve->table_entries == 0) {
2503 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2504
2505 const skcms_TransferFunction& tf = curve->parametric;
2506
2507 if (tf.g == 1 && tf.a == 1 &&
2508 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2509 return noop;
2510 }
2511
2512 switch (classify(tf)) {
2513 case Bad: return noop;
2514 case sRGBish: return OpAndArg{op.sRGBish, &tf};
2515 case PQish: return OpAndArg{op.PQish, &tf};
2516 case HLGish: return OpAndArg{op.HLGish, &tf};
2517 case HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2518 }
2519 }
2520 return OpAndArg{op.table, curve};
2521 }
2522
bytes_per_pixel(skcms_PixelFormat fmt)2523 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2524 switch (fmt >> 1) { // ignore rgb/bgr
2525 case skcms_PixelFormat_A_8 >> 1: return 1;
2526 case skcms_PixelFormat_G_8 >> 1: return 1;
2527 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2528 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2529 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2530 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2531 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2532 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2533 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2534 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2535 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2536 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2537 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2538 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2539 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2540 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2541 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2542 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2543 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2544 }
2545 assert(false);
2546 return 0;
2547 }
2548
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2549 static bool prep_for_destination(const skcms_ICCProfile* profile,
2550 skcms_Matrix3x3* fromXYZD50,
2551 skcms_TransferFunction* invR,
2552 skcms_TransferFunction* invG,
2553 skcms_TransferFunction* invB) {
2554 // skcms_Transform() supports B2A destinations...
2555 if (profile->has_B2A) { return true; }
2556 // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2557 return profile->has_trc
2558 && profile->has_toXYZD50
2559 && profile->trc[0].table_entries == 0
2560 && profile->trc[1].table_entries == 0
2561 && profile->trc[2].table_entries == 0
2562 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2563 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2564 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2565 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2566 }
2567
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2568 bool skcms_Transform(const void* src,
2569 skcms_PixelFormat srcFmt,
2570 skcms_AlphaFormat srcAlpha,
2571 const skcms_ICCProfile* srcProfile,
2572 void* dst,
2573 skcms_PixelFormat dstFmt,
2574 skcms_AlphaFormat dstAlpha,
2575 const skcms_ICCProfile* dstProfile,
2576 size_t npixels) {
2577 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2578 dst, dstFmt, dstAlpha, dstProfile,
2579 npixels, nullptr);
2580 }
2581
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2582 bool skcms_TransformWithPalette(const void* src,
2583 skcms_PixelFormat srcFmt,
2584 skcms_AlphaFormat srcAlpha,
2585 const skcms_ICCProfile* srcProfile,
2586 void* dst,
2587 skcms_PixelFormat dstFmt,
2588 skcms_AlphaFormat dstAlpha,
2589 const skcms_ICCProfile* dstProfile,
2590 size_t nz,
2591 const void* palette) {
2592 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2593 src_bpp = bytes_per_pixel(srcFmt);
2594 // Let's just refuse if the request is absurdly big.
2595 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2596 return false;
2597 }
2598 int n = (int)nz;
2599
2600 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2601 if (!srcProfile) {
2602 srcProfile = skcms_sRGB_profile();
2603 }
2604 if (!dstProfile) {
2605 dstProfile = skcms_sRGB_profile();
2606 }
2607
2608 // We can't transform in place unless the PixelFormats are the same size.
2609 if (dst == src && dst_bpp != src_bpp) {
2610 return false;
2611 }
2612 // TODO: more careful alias rejection (like, dst == src + 1)?
2613
2614 if (needs_palette(srcFmt) && !palette) {
2615 return false;
2616 }
2617
2618 Op program [32];
2619 const void* arguments[32];
2620
2621 Op* ops = program;
2622 const void** args = arguments;
2623
2624 // These are always parametric curves of some sort.
2625 skcms_Curve dst_curves[3];
2626 dst_curves[0].table_entries =
2627 dst_curves[1].table_entries =
2628 dst_curves[2].table_entries = 0;
2629
2630 skcms_Matrix3x3 from_xyz;
2631
2632 switch (srcFmt >> 1) {
2633 default: return false;
2634 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2635 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2636 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2637 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2638 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2639 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2640 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2641 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2642 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2643 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2644 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2645 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2646 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2647 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2648 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2649 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2650 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2651
2652 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2653 *args++ = palette;
2654 break;
2655 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2656 *ops++ = Op_load_8888;
2657 *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction();
2658 *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction();
2659 *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction();
2660 break;
2661 }
2662 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2663 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2664 *ops++ = Op_clamp;
2665 }
2666 if (srcFmt & 1) {
2667 *ops++ = Op_swap_rb;
2668 }
2669 skcms_ICCProfile gray_dst_profile;
2670 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2671 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2672 // luminance (Y) by the destination transfer function.
2673 gray_dst_profile = *dstProfile;
2674 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2675 dstProfile = &gray_dst_profile;
2676 }
2677
2678 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2679 // Photoshop creates CMYK images as inverse CMYK.
2680 // These happen to be the only ones we've _ever_ seen.
2681 *ops++ = Op_invert;
2682 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2683 srcAlpha = skcms_AlphaFormat_Unpremul;
2684 }
2685
2686 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2687 *ops++ = Op_force_opaque;
2688 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2689 *ops++ = Op_unpremul;
2690 }
2691
2692 if (dstProfile != srcProfile) {
2693
2694 if (!prep_for_destination(dstProfile,
2695 &from_xyz,
2696 &dst_curves[0].parametric,
2697 &dst_curves[1].parametric,
2698 &dst_curves[2].parametric)) {
2699 return false;
2700 }
2701
2702 if (srcProfile->has_A2B) {
2703 if (srcProfile->A2B.input_channels) {
2704 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2705 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2706 if (oa.arg) {
2707 *ops++ = oa.op;
2708 *args++ = oa.arg;
2709 }
2710 }
2711 *ops++ = Op_clamp;
2712 *ops++ = Op_clut_A2B;
2713 *args++ = &srcProfile->A2B;
2714 }
2715
2716 if (srcProfile->A2B.matrix_channels == 3) {
2717 for (int i = 0; i < 3; i++) {
2718 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2719 if (oa.arg) {
2720 *ops++ = oa.op;
2721 *args++ = oa.arg;
2722 }
2723 }
2724
2725 static const skcms_Matrix3x4 I = {{
2726 {1,0,0,0},
2727 {0,1,0,0},
2728 {0,0,1,0},
2729 }};
2730 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2731 *ops++ = Op_matrix_3x4;
2732 *args++ = &srcProfile->A2B.matrix;
2733 }
2734 }
2735
2736 if (srcProfile->A2B.output_channels == 3) {
2737 for (int i = 0; i < 3; i++) {
2738 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2739 if (oa.arg) {
2740 *ops++ = oa.op;
2741 *args++ = oa.arg;
2742 }
2743 }
2744 }
2745
2746 if (srcProfile->pcs == skcms_Signature_Lab) {
2747 *ops++ = Op_lab_to_xyz;
2748 }
2749
2750 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2751 for (int i = 0; i < 3; i++) {
2752 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2753 if (oa.arg) {
2754 *ops++ = oa.op;
2755 *args++ = oa.arg;
2756 }
2757 }
2758 } else {
2759 return false;
2760 }
2761
2762 // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2763 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2764
2765 if (dstProfile->has_B2A) {
2766 // B2A needs its input in XYZD50, so transform TRC sources now.
2767 if (!srcProfile->has_A2B) {
2768 *ops++ = Op_matrix_3x3;
2769 *args++ = &srcProfile->toXYZD50;
2770 }
2771
2772 if (dstProfile->pcs == skcms_Signature_Lab) {
2773 *ops++ = Op_xyz_to_lab;
2774 }
2775
2776 if (dstProfile->B2A.input_channels == 3) {
2777 for (int i = 0; i < 3; i++) {
2778 OpAndArg oa = select_curve_op(&dstProfile->B2A.input_curves[i], i);
2779 if (oa.arg) {
2780 *ops++ = oa.op;
2781 *args++ = oa.arg;
2782 }
2783 }
2784 }
2785
2786 if (dstProfile->B2A.matrix_channels == 3) {
2787 static const skcms_Matrix3x4 I = {{
2788 {1,0,0,0},
2789 {0,1,0,0},
2790 {0,0,1,0},
2791 }};
2792 if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2793 *ops++ = Op_matrix_3x4;
2794 *args++ = &dstProfile->B2A.matrix;
2795 }
2796
2797 for (int i = 0; i < 3; i++) {
2798 OpAndArg oa = select_curve_op(&dstProfile->B2A.matrix_curves[i], i);
2799 if (oa.arg) {
2800 *ops++ = oa.op;
2801 *args++ = oa.arg;
2802 }
2803 }
2804 }
2805
2806 if (dstProfile->B2A.output_channels) {
2807 *ops++ = Op_clamp;
2808 *ops++ = Op_clut_B2A;
2809 *args++ = &dstProfile->B2A;
2810 for (int i = 0; i < (int)dstProfile->B2A.output_channels; i++) {
2811 OpAndArg oa = select_curve_op(&dstProfile->B2A.output_curves[i], i);
2812 if (oa.arg) {
2813 *ops++ = oa.op;
2814 *args++ = oa.arg;
2815 }
2816 }
2817 }
2818 } else {
2819 // This is a TRC destination.
2820 // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2821 // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2822 static const skcms_Matrix3x3 I = {{
2823 { 1.0f, 0.0f, 0.0f },
2824 { 0.0f, 1.0f, 0.0f },
2825 { 0.0f, 0.0f, 1.0f },
2826 }};
2827 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2828
2829 // There's a chance the source and destination gamuts are identical,
2830 // in which case we can skip the gamut transform.
2831 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2832 // Concat the entire gamut transform into from_xyz,
2833 // now slightly misnamed but it's a handy spot to stash the result.
2834 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2835 *ops++ = Op_matrix_3x3;
2836 *args++ = &from_xyz;
2837 }
2838
2839 // Encode back to dst RGB using its parametric transfer functions.
2840 for (int i = 0; i < 3; i++) {
2841 OpAndArg oa = select_curve_op(dst_curves+i, i);
2842 if (oa.arg) {
2843 assert (oa.op != Op_table_r &&
2844 oa.op != Op_table_g &&
2845 oa.op != Op_table_b &&
2846 oa.op != Op_table_a);
2847 *ops++ = oa.op;
2848 *args++ = oa.arg;
2849 }
2850 }
2851 }
2852 }
2853
2854 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2855 // not just to values that fit in [0,1].
2856 //
2857 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2858 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2859 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2860 *ops++ = Op_clamp;
2861 }
2862
2863 if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2864 // Photoshop creates CMYK images as inverse CMYK.
2865 // These happen to be the only ones we've _ever_ seen.
2866 *ops++ = Op_invert;
2867
2868 // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2869 dstAlpha = skcms_AlphaFormat_Unpremul;
2870 }
2871
2872 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2873 *ops++ = Op_force_opaque;
2874 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2875 *ops++ = Op_premul;
2876 }
2877 if (dstFmt & 1) {
2878 *ops++ = Op_swap_rb;
2879 }
2880 switch (dstFmt >> 1) {
2881 default: return false;
2882 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2883 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2884 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2885 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2886 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2887 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2888 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2889 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2890 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2891 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2892 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2893 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2894 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2895 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2896 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2897 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2898 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2899
2900 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2901 *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction();
2902 *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction();
2903 *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction();
2904 *ops++ = Op_store_8888;
2905 break;
2906 }
2907
2908 auto run = baseline::run_program;
2909 #if defined(TEST_FOR_HSW)
2910 switch (cpu_type()) {
2911 case CpuType::None: break;
2912 case CpuType::HSW: run = hsw::run_program; break;
2913 case CpuType::SKX: run = hsw::run_program; break;
2914 }
2915 #endif
2916 #if defined(TEST_FOR_SKX)
2917 switch (cpu_type()) {
2918 case CpuType::None: break;
2919 case CpuType::HSW: break;
2920 case CpuType::SKX: run = skx::run_program; break;
2921 }
2922 #endif
2923 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2924 return true;
2925 }
2926
assert_usable_as_destination(const skcms_ICCProfile * profile)2927 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2928 #if defined(NDEBUG)
2929 (void)profile;
2930 #else
2931 skcms_Matrix3x3 fromXYZD50;
2932 skcms_TransferFunction invR, invG, invB;
2933 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2934 #endif
2935 }
2936
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2937 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2938 if (!profile->has_B2A) {
2939 skcms_Matrix3x3 fromXYZD50;
2940 if (!profile->has_trc || !profile->has_toXYZD50
2941 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2942 return false;
2943 }
2944
2945 skcms_TransferFunction tf[3];
2946 for (int i = 0; i < 3; i++) {
2947 skcms_TransferFunction inv;
2948 if (profile->trc[i].table_entries == 0
2949 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2950 tf[i] = profile->trc[i].parametric;
2951 continue;
2952 }
2953
2954 float max_error;
2955 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2956 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2957 return false;
2958 }
2959 }
2960
2961 for (int i = 0; i < 3; ++i) {
2962 profile->trc[i].table_entries = 0;
2963 profile->trc[i].parametric = tf[i];
2964 }
2965 }
2966 assert_usable_as_destination(profile);
2967 return true;
2968 }
2969
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2970 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2971 // Call skcms_MakeUsableAsDestination() with B2A disabled;
2972 // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
2973 skcms_ICCProfile result = *profile;
2974 result.has_B2A = false;
2975 if (!skcms_MakeUsableAsDestination(&result)) {
2976 return false;
2977 }
2978
2979 // Of the three, pick the transfer function that best fits the other two.
2980 int best_tf = 0;
2981 float min_max_error = INFINITY_;
2982 for (int i = 0; i < 3; i++) {
2983 skcms_TransferFunction inv;
2984 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2985 return false;
2986 }
2987
2988 float err = 0;
2989 for (int j = 0; j < 3; ++j) {
2990 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2991 }
2992 if (min_max_error > err) {
2993 min_max_error = err;
2994 best_tf = i;
2995 }
2996 }
2997
2998 for (int i = 0; i < 3; i++) {
2999 result.trc[i].parametric = result.trc[best_tf].parametric;
3000 }
3001
3002 *profile = result;
3003 assert_usable_as_destination(profile);
3004 return true;
3005 }
3006