1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #if defined(__ARM_NEON)
17 #include <arm_neon.h>
18 #elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33 #endif
34
35 static bool runtime_cpu_detection = true;
skcms_DisableRuntimeCPUDetection()36 void skcms_DisableRuntimeCPUDetection() {
37 runtime_cpu_detection = false;
38 }
39
40 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42 //
43 // Please do not use sizeof() directly, and size_t only when required.
44 // (We have no way of enforcing these requests...)
45 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46
47 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49
50 static const union {
51 uint32_t bits;
52 float f;
53 } inf_ = { 0x7f800000 };
54 #define INFINITY_ inf_.f
55
56 #if defined(__clang__) || defined(__GNUC__)
57 #define small_memcpy __builtin_memcpy
58 #else
59 #define small_memcpy memcpy
60 #endif
61
log2f_(float x)62 static float log2f_(float x) {
63 // The first approximation of log2(x) is its exponent 'e', minus 127.
64 int32_t bits;
65 small_memcpy(&bits, &x, sizeof(bits));
66
67 float e = (float)bits * (1.0f / (1<<23));
68
69 // If we use the mantissa too we can refine the error signficantly.
70 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71 float m;
72 small_memcpy(&m, &m_bits, sizeof(m));
73
74 return (e - 124.225514990f
75 - 1.498030302f*m
76 - 1.725879990f/(0.3520887068f + m));
77 }
logf_(float x)78 static float logf_(float x) {
79 const float ln2 = 0.69314718f;
80 return ln2*log2f_(x);
81 }
82
exp2f_(float x)83 static float exp2f_(float x) {
84 float fract = x - floorf_(x);
85
86 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
87 - 1.490129070f*fract
88 + 27.728023300f/(4.84252568f - fract));
89
90 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
91 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
92 // Negative values are effectively underflow - we'll end up returning a (different) negative
93 // value, which makes no sense. So clamp to zero.
94 if (fbits >= (float)INT_MAX) {
95 return INFINITY_;
96 } else if (fbits < 0) {
97 return 0;
98 }
99
100 int32_t bits = (int32_t)fbits;
101 small_memcpy(&x, &bits, sizeof(x));
102 return x;
103 }
104
105 // Not static, as it's used by some test tools.
powf_(float x,float y)106 float powf_(float x, float y) {
107 assert (x >= 0);
108 return (x == 0) || (x == 1) ? x
109 : exp2f_(log2f_(x) * y);
110 }
111
expf_(float x)112 static float expf_(float x) {
113 const float log2_e = 1.4426950408889634074f;
114 return exp2f_(log2_e * x);
115 }
116
fmaxf_(float x,float y)117 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)118 static float fminf_(float x, float y) { return x < y ? x : y; }
119
isfinitef_(float x)120 static bool isfinitef_(float x) { return 0 == x*0; }
121
minus_1_ulp(float x)122 static float minus_1_ulp(float x) {
123 int32_t bits;
124 memcpy(&bits, &x, sizeof(bits));
125 bits = bits - 1;
126 memcpy(&x, &bits, sizeof(bits));
127 return x;
128 }
129
130 // Most transfer functions we work with are sRGBish.
131 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
132 // and repurpose the other fields to hold the parameters of the HDR functions.
133 struct TF_PQish { float A,B,C,D,E,F; };
134 struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
135 // We didn't originally support a scale factor K for HLG, and instead just stored 0 in
136 // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
137 // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
138
TFKind_marker(skcms_TFType kind)139 static float TFKind_marker(skcms_TFType kind) {
140 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
141 return -(float)kind;
142 }
143
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)144 static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
145 , TF_HLGish* hlg = nullptr) {
146 if (tf.g < 0 && static_cast<float>(static_cast<int>(tf.g)) == tf.g) {
147 // TODO: soundness checks for PQ/HLG like we do for sRGBish?
148 switch ((int)tf.g) {
149 case -skcms_TFType_PQish:
150 if (pq) {
151 memcpy(pq , &tf.a, sizeof(*pq ));
152 }
153 return skcms_TFType_PQish;
154 case -skcms_TFType_HLGish:
155 if (hlg) {
156 memcpy(hlg, &tf.a, sizeof(*hlg));
157 }
158 return skcms_TFType_HLGish;
159 case -skcms_TFType_HLGinvish:
160 if (hlg) {
161 memcpy(hlg, &tf.a, sizeof(*hlg));
162 }
163 return skcms_TFType_HLGinvish;
164 }
165 return skcms_TFType_Invalid;
166 }
167
168 // Basic soundness checks for sRGBish transfer functions.
169 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
170 // a,c,d,g should be non-negative to make any sense.
171 && tf.a >= 0
172 && tf.c >= 0
173 && tf.d >= 0
174 && tf.g >= 0
175 // Raising a negative value to a fractional tf->g produces complex numbers.
176 && tf.a * tf.d + tf.b >= 0) {
177 return skcms_TFType_sRGBish;
178 }
179
180 return skcms_TFType_Invalid;
181 }
182
skcms_TransferFunction_getType(const skcms_TransferFunction * tf)183 skcms_TFType skcms_TransferFunction_getType(const skcms_TransferFunction* tf) {
184 return classify(*tf);
185 }
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)186 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
187 return classify(*tf) == skcms_TFType_sRGBish;
188 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)189 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
190 return classify(*tf) == skcms_TFType_PQish;
191 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)192 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
193 return classify(*tf) == skcms_TFType_HLGish;
194 }
195
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)196 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
197 float A, float B, float C,
198 float D, float E, float F) {
199 *tf = { TFKind_marker(skcms_TFType_PQish), A,B,C,D,E,F };
200 assert(skcms_TransferFunction_isPQish(tf));
201 return true;
202 }
203
skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction * tf,float K,float R,float G,float a,float b,float c)204 bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
205 float K, float R, float G,
206 float a, float b, float c) {
207 *tf = { TFKind_marker(skcms_TFType_HLGish), R,G, a,b,c, K-1.0f };
208 assert(skcms_TransferFunction_isHLGish(tf));
209 return true;
210 }
211
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)212 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
213 float sign = x < 0 ? -1.0f : 1.0f;
214 x *= sign;
215
216 TF_PQish pq;
217 TF_HLGish hlg;
218 switch (classify(*tf, &pq, &hlg)) {
219 case skcms_TFType_Invalid: break;
220
221 case skcms_TFType_HLGish: {
222 const float K = hlg.K_minus_1 + 1.0f;
223 return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
224 : expf_((x-hlg.c)*hlg.a) + hlg.b);
225 }
226
227 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
228 case skcms_TFType_HLGinvish: {
229 const float K = hlg.K_minus_1 + 1.0f;
230 x /= K;
231 return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
232 : hlg.a * logf_(x - hlg.b) + hlg.c);
233 }
234
235 case skcms_TFType_sRGBish:
236 return sign * (x < tf->d ? tf->c * x + tf->f
237 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
238
239 case skcms_TFType_PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
240 / (pq.D + pq.E * powf_(x, pq.C)),
241 pq.F);
242 }
243 return 0;
244 }
245
246
eval_curve(const skcms_Curve * curve,float x)247 static float eval_curve(const skcms_Curve* curve, float x) {
248 if (curve->table_entries == 0) {
249 return skcms_TransferFunction_eval(&curve->parametric, x);
250 }
251
252 float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1);
253 int lo = (int) ix ,
254 hi = (int)(float)minus_1_ulp(ix + 1.0f);
255 float t = ix - (float)lo;
256
257 float l, h;
258 if (curve->table_8) {
259 l = curve->table_8[lo] * (1/255.0f);
260 h = curve->table_8[hi] * (1/255.0f);
261 } else {
262 uint16_t be_l, be_h;
263 memcpy(&be_l, curve->table_16 + 2*lo, 2);
264 memcpy(&be_h, curve->table_16 + 2*hi, 2);
265 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
266 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
267 l = le_l * (1/65535.0f);
268 h = le_h * (1/65535.0f);
269 }
270 return l + (h-l)*t;
271 }
272
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)273 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
274 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
275 const float dx = 1.0f / static_cast<float>(N - 1);
276 float err = 0;
277 for (uint32_t i = 0; i < N; i++) {
278 float x = static_cast<float>(i) * dx,
279 y = eval_curve(curve, x);
280 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
281 }
282 return err;
283 }
284
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)285 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
286 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
287 }
288
289 // Additional ICC signature values that are only used internally
290 enum {
291 // File signature
292 skcms_Signature_acsp = 0x61637370,
293
294 // Tag signatures
295 skcms_Signature_rTRC = 0x72545243,
296 skcms_Signature_gTRC = 0x67545243,
297 skcms_Signature_bTRC = 0x62545243,
298 skcms_Signature_kTRC = 0x6B545243,
299
300 skcms_Signature_rXYZ = 0x7258595A,
301 skcms_Signature_gXYZ = 0x6758595A,
302 skcms_Signature_bXYZ = 0x6258595A,
303
304 skcms_Signature_A2B0 = 0x41324230,
305 skcms_Signature_B2A0 = 0x42324130,
306
307 skcms_Signature_CHAD = 0x63686164,
308 skcms_Signature_WTPT = 0x77747074,
309
310 skcms_Signature_CICP = 0x63696370,
311
312 // Type signatures
313 skcms_Signature_curv = 0x63757276,
314 skcms_Signature_mft1 = 0x6D667431,
315 skcms_Signature_mft2 = 0x6D667432,
316 skcms_Signature_mAB = 0x6D414220,
317 skcms_Signature_mBA = 0x6D424120,
318 skcms_Signature_para = 0x70617261,
319 skcms_Signature_sf32 = 0x73663332,
320 // XYZ is also a PCS signature, so it's defined in skcms.h
321 // skcms_Signature_XYZ = 0x58595A20,
322 };
323
read_big_u16(const uint8_t * ptr)324 static uint16_t read_big_u16(const uint8_t* ptr) {
325 uint16_t be;
326 memcpy(&be, ptr, sizeof(be));
327 #if defined(_MSC_VER)
328 return _byteswap_ushort(be);
329 #else
330 return __builtin_bswap16(be);
331 #endif
332 }
333
read_big_u32(const uint8_t * ptr)334 static uint32_t read_big_u32(const uint8_t* ptr) {
335 uint32_t be;
336 memcpy(&be, ptr, sizeof(be));
337 #if defined(_MSC_VER)
338 return _byteswap_ulong(be);
339 #else
340 return __builtin_bswap32(be);
341 #endif
342 }
343
read_big_i32(const uint8_t * ptr)344 static int32_t read_big_i32(const uint8_t* ptr) {
345 return (int32_t)read_big_u32(ptr);
346 }
347
read_big_fixed(const uint8_t * ptr)348 static float read_big_fixed(const uint8_t* ptr) {
349 return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
350 }
351
352 // Maps to an in-memory profile so that fields line up to the locations specified
353 // in ICC.1:2010, section 7.2
354 typedef struct {
355 uint8_t size [ 4];
356 uint8_t cmm_type [ 4];
357 uint8_t version [ 4];
358 uint8_t profile_class [ 4];
359 uint8_t data_color_space [ 4];
360 uint8_t pcs [ 4];
361 uint8_t creation_date_time [12];
362 uint8_t signature [ 4];
363 uint8_t platform [ 4];
364 uint8_t flags [ 4];
365 uint8_t device_manufacturer [ 4];
366 uint8_t device_model [ 4];
367 uint8_t device_attributes [ 8];
368 uint8_t rendering_intent [ 4];
369 uint8_t illuminant_X [ 4];
370 uint8_t illuminant_Y [ 4];
371 uint8_t illuminant_Z [ 4];
372 uint8_t creator [ 4];
373 uint8_t profile_id [16];
374 uint8_t reserved [28];
375 uint8_t tag_count [ 4]; // Technically not part of header, but required
376 } header_Layout;
377
378 typedef struct {
379 uint8_t signature [4];
380 uint8_t offset [4];
381 uint8_t size [4];
382 } tag_Layout;
383
get_tag_table(const skcms_ICCProfile * profile)384 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
385 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
386 }
387
388 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
389 // use of the type is for the CHAD tag that stores exactly nine values.
390 typedef struct {
391 uint8_t type [ 4];
392 uint8_t reserved [ 4];
393 uint8_t values [36];
394 } sf32_Layout;
395
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)396 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
397 skcms_ICCTag tag;
398 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
399 return false;
400 }
401
402 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
403 return false;
404 }
405
406 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
407 const uint8_t* values = sf32Tag->values;
408 for (int r = 0; r < 3; ++r)
409 for (int c = 0; c < 3; ++c, values += 4) {
410 m->vals[r][c] = read_big_fixed(values);
411 }
412 return true;
413 }
414
415 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
416 // the type are for tags/data that store exactly one triple.
417 typedef struct {
418 uint8_t type [4];
419 uint8_t reserved [4];
420 uint8_t X [4];
421 uint8_t Y [4];
422 uint8_t Z [4];
423 } XYZ_Layout;
424
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)425 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
426 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
427 return false;
428 }
429
430 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
431
432 *x = read_big_fixed(xyzTag->X);
433 *y = read_big_fixed(xyzTag->Y);
434 *z = read_big_fixed(xyzTag->Z);
435 return true;
436 }
437
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])438 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
439 skcms_ICCTag tag;
440 return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
441 read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
442 }
443
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)444 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
445 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
446 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
447 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
448 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
449 }
450
451 typedef struct {
452 uint8_t type [4];
453 uint8_t reserved_a [4];
454 uint8_t function_type [2];
455 uint8_t reserved_b [2];
456 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
457 } para_Layout;
458
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)459 static bool read_curve_para(const uint8_t* buf, uint32_t size,
460 skcms_Curve* curve, uint32_t* curve_size) {
461 if (size < SAFE_FIXED_SIZE(para_Layout)) {
462 return false;
463 }
464
465 const para_Layout* paraTag = (const para_Layout*)buf;
466
467 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
468 uint16_t function_type = read_big_u16(paraTag->function_type);
469 if (function_type > kGABCDEF) {
470 return false;
471 }
472
473 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
474 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
475 return false;
476 }
477
478 if (curve_size) {
479 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
480 }
481
482 curve->table_entries = 0;
483 curve->parametric.a = 1.0f;
484 curve->parametric.b = 0.0f;
485 curve->parametric.c = 0.0f;
486 curve->parametric.d = 0.0f;
487 curve->parametric.e = 0.0f;
488 curve->parametric.f = 0.0f;
489 curve->parametric.g = read_big_fixed(paraTag->variable);
490
491 switch (function_type) {
492 case kGAB:
493 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
494 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
495 if (curve->parametric.a == 0) {
496 return false;
497 }
498 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
499 break;
500 case kGABC:
501 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
502 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
503 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
504 if (curve->parametric.a == 0) {
505 return false;
506 }
507 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
508 curve->parametric.f = curve->parametric.e;
509 break;
510 case kGABCD:
511 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
512 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
513 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
514 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
515 break;
516 case kGABCDEF:
517 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
518 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
519 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
520 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
521 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
522 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
523 break;
524 }
525 return skcms_TransferFunction_isSRGBish(&curve->parametric);
526 }
527
528 typedef struct {
529 uint8_t type [4];
530 uint8_t reserved [4];
531 uint8_t value_count [4];
532 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
533 } curv_Layout;
534
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)535 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
536 skcms_Curve* curve, uint32_t* curve_size) {
537 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
538 return false;
539 }
540
541 const curv_Layout* curvTag = (const curv_Layout*)buf;
542
543 uint32_t value_count = read_big_u32(curvTag->value_count);
544 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
545 return false;
546 }
547
548 if (curve_size) {
549 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
550 }
551
552 if (value_count < 2) {
553 curve->table_entries = 0;
554 curve->parametric.a = 1.0f;
555 curve->parametric.b = 0.0f;
556 curve->parametric.c = 0.0f;
557 curve->parametric.d = 0.0f;
558 curve->parametric.e = 0.0f;
559 curve->parametric.f = 0.0f;
560 if (value_count == 0) {
561 // Empty tables are a shorthand for an identity curve
562 curve->parametric.g = 1.0f;
563 } else {
564 // Single entry tables are a shorthand for simple gamma
565 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
566 }
567 } else {
568 curve->table_8 = nullptr;
569 curve->table_16 = curvTag->variable;
570 curve->table_entries = value_count;
571 }
572
573 return true;
574 }
575
576 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
577 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)578 static bool read_curve(const uint8_t* buf, uint32_t size,
579 skcms_Curve* curve, uint32_t* curve_size) {
580 if (!buf || size < 4 || !curve) {
581 return false;
582 }
583
584 uint32_t type = read_big_u32(buf);
585 if (type == skcms_Signature_para) {
586 return read_curve_para(buf, size, curve, curve_size);
587 } else if (type == skcms_Signature_curv) {
588 return read_curve_curv(buf, size, curve, curve_size);
589 }
590
591 return false;
592 }
593
594 // mft1 and mft2 share a large chunk of data
595 typedef struct {
596 uint8_t type [ 4];
597 uint8_t reserved_a [ 4];
598 uint8_t input_channels [ 1];
599 uint8_t output_channels [ 1];
600 uint8_t grid_points [ 1];
601 uint8_t reserved_b [ 1];
602 uint8_t matrix [36];
603 } mft_CommonLayout;
604
605 typedef struct {
606 mft_CommonLayout common [1];
607
608 uint8_t variable [1/*variable*/];
609 } mft1_Layout;
610
611 typedef struct {
612 mft_CommonLayout common [1];
613
614 uint8_t input_table_entries [2];
615 uint8_t output_table_entries [2];
616 uint8_t variable [1/*variable*/];
617 } mft2_Layout;
618
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)619 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
620 // MFT matrices are applied before the first set of curves, but must be identity unless the
621 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
622 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
623 // field/flag.
624 a2b->matrix_channels = 0;
625 a2b-> input_channels = mftTag-> input_channels[0];
626 a2b->output_channels = mftTag->output_channels[0];
627
628 // We require exactly three (ie XYZ/Lab/RGB) output channels
629 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
630 return false;
631 }
632 // We require at least one, and no more than four (ie CMYK) input channels
633 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
634 return false;
635 }
636
637 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
638 a2b->grid_points[i] = mftTag->grid_points[0];
639 }
640 // The grid only makes sense with at least two points along each axis
641 if (a2b->grid_points[0] < 2) {
642 return false;
643 }
644 return true;
645 }
646
647 // All as the A2B version above, except where noted.
read_mft_common(const mft_CommonLayout * mftTag,skcms_B2A * b2a)648 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
649 // Same as A2B.
650 b2a->matrix_channels = 0;
651 b2a-> input_channels = mftTag-> input_channels[0];
652 b2a->output_channels = mftTag->output_channels[0];
653
654
655 // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
656 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
657 return false;
658 }
659 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
660 return false;
661 }
662
663 // Same as A2B.
664 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
665 b2a->grid_points[i] = mftTag->grid_points[0];
666 }
667 if (b2a->grid_points[0] < 2) {
668 return false;
669 }
670 return true;
671 }
672
673 template <typename A2B_or_B2A>
init_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,A2B_or_B2A * out)674 static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
675 uint32_t input_table_entries, uint32_t output_table_entries,
676 A2B_or_B2A* out) {
677 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
678 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
679 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
680
681 // [input|output]_channels are <= 4, so still no overflow
682 uint32_t byte_len_all_input_tables = out->input_channels * byte_len_per_input_table;
683 uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
684
685 uint64_t grid_size = out->output_channels * byte_width;
686 for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
687 grid_size *= out->grid_points[axis];
688 }
689
690 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
691 return false;
692 }
693
694 for (uint32_t i = 0; i < out->input_channels; ++i) {
695 out->input_curves[i].table_entries = input_table_entries;
696 if (byte_width == 1) {
697 out->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
698 out->input_curves[i].table_16 = nullptr;
699 } else {
700 out->input_curves[i].table_8 = nullptr;
701 out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
702 }
703 }
704
705 if (byte_width == 1) {
706 out->grid_8 = table_base + byte_len_all_input_tables;
707 out->grid_16 = nullptr;
708 } else {
709 out->grid_8 = nullptr;
710 out->grid_16 = table_base + byte_len_all_input_tables;
711 }
712
713 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
714 for (uint32_t i = 0; i < out->output_channels; ++i) {
715 out->output_curves[i].table_entries = output_table_entries;
716 if (byte_width == 1) {
717 out->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
718 out->output_curves[i].table_16 = nullptr;
719 } else {
720 out->output_curves[i].table_8 = nullptr;
721 out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
722 }
723 }
724
725 return true;
726 }
727
728 template <typename A2B_or_B2A>
read_tag_mft1(const skcms_ICCTag * tag,A2B_or_B2A * out)729 static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
730 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
731 return false;
732 }
733
734 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
735 if (!read_mft_common(mftTag->common, out)) {
736 return false;
737 }
738
739 uint32_t input_table_entries = 256;
740 uint32_t output_table_entries = 256;
741
742 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
743 input_table_entries, output_table_entries, out);
744 }
745
746 template <typename A2B_or_B2A>
read_tag_mft2(const skcms_ICCTag * tag,A2B_or_B2A * out)747 static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
748 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
749 return false;
750 }
751
752 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
753 if (!read_mft_common(mftTag->common, out)) {
754 return false;
755 }
756
757 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
758 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
759
760 // ICC spec mandates that 2 <= table_entries <= 4096
761 if (input_table_entries < 2 || input_table_entries > 4096 ||
762 output_table_entries < 2 || output_table_entries > 4096) {
763 return false;
764 }
765
766 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
767 input_table_entries, output_table_entries, out);
768 }
769
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)770 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
771 uint32_t num_curves, skcms_Curve* curves) {
772 for (uint32_t i = 0; i < num_curves; ++i) {
773 if (curve_offset > size) {
774 return false;
775 }
776
777 uint32_t curve_bytes;
778 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
779 return false;
780 }
781
782 if (curve_bytes > UINT32_MAX - 3) {
783 return false;
784 }
785 curve_bytes = (curve_bytes + 3) & ~3U;
786
787 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
788 curve_offset = (uint32_t)new_offset_64;
789 if (new_offset_64 != curve_offset) {
790 return false;
791 }
792 }
793
794 return true;
795 }
796
797 // mAB and mBA tags use the same encoding, including color lookup tables.
798 typedef struct {
799 uint8_t type [ 4];
800 uint8_t reserved_a [ 4];
801 uint8_t input_channels [ 1];
802 uint8_t output_channels [ 1];
803 uint8_t reserved_b [ 2];
804 uint8_t b_curve_offset [ 4];
805 uint8_t matrix_offset [ 4];
806 uint8_t m_curve_offset [ 4];
807 uint8_t clut_offset [ 4];
808 uint8_t a_curve_offset [ 4];
809 } mAB_or_mBA_Layout;
810
811 typedef struct {
812 uint8_t grid_points [16];
813 uint8_t grid_byte_width [ 1];
814 uint8_t reserved [ 3];
815 uint8_t variable [1/*variable*/];
816 } CLUT_Layout;
817
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)818 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
819 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
820 return false;
821 }
822
823 const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
824
825 a2b->input_channels = mABTag->input_channels[0];
826 a2b->output_channels = mABTag->output_channels[0];
827
828 // We require exactly three (ie XYZ/Lab/RGB) output channels
829 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
830 return false;
831 }
832 // We require no more than four (ie CMYK) input channels
833 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
834 return false;
835 }
836
837 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
838 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
839 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
840 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
841 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
842
843 // "B" curves must be present
844 if (0 == b_curve_offset) {
845 return false;
846 }
847
848 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
849 a2b->output_curves)) {
850 return false;
851 }
852
853 // "M" curves and Matrix must be used together
854 if (0 != m_curve_offset) {
855 if (0 == matrix_offset) {
856 return false;
857 }
858 a2b->matrix_channels = a2b->output_channels;
859 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
860 a2b->matrix_curves)) {
861 return false;
862 }
863
864 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
865 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
866 return false;
867 }
868 float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
869 const uint8_t* mtx_buf = tag->buf + matrix_offset;
870 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
871 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
872 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
873 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
874 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
875 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
876 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
877 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
878 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
879 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
880 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
881 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
882 } else {
883 if (0 != matrix_offset) {
884 return false;
885 }
886 a2b->matrix_channels = 0;
887 }
888
889 // "A" curves and CLUT must be used together
890 if (0 != a_curve_offset) {
891 if (0 == clut_offset) {
892 return false;
893 }
894 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
895 a2b->input_curves)) {
896 return false;
897 }
898
899 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
900 return false;
901 }
902 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
903
904 if (clut->grid_byte_width[0] == 1) {
905 a2b->grid_8 = clut->variable;
906 a2b->grid_16 = nullptr;
907 } else if (clut->grid_byte_width[0] == 2) {
908 a2b->grid_8 = nullptr;
909 a2b->grid_16 = clut->variable;
910 } else {
911 return false;
912 }
913
914 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; // the payload
915 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
916 a2b->grid_points[i] = clut->grid_points[i];
917 // The grid only makes sense with at least two points along each axis
918 if (a2b->grid_points[i] < 2) {
919 return false;
920 }
921 grid_size *= a2b->grid_points[i];
922 }
923 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
924 return false;
925 }
926 } else {
927 if (0 != clut_offset) {
928 return false;
929 }
930
931 // If there is no CLUT, the number of input and output channels must match
932 if (a2b->input_channels != a2b->output_channels) {
933 return false;
934 }
935
936 // Zero out the number of input channels to signal that we're skipping this stage
937 a2b->input_channels = 0;
938 }
939
940 return true;
941 }
942
943 // Exactly the same as read_tag_mab(), except where there are comments.
944 // TODO: refactor the two to eliminate common code?
read_tag_mba(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)945 static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
946 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
947 return false;
948 }
949
950 const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
951
952 b2a->input_channels = mBATag->input_channels[0];
953 b2a->output_channels = mBATag->output_channels[0];
954
955 // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
956 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
957 return false;
958 }
959 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
960 return false;
961 }
962
963 uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
964 uint32_t matrix_offset = read_big_u32(mBATag->matrix_offset);
965 uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
966 uint32_t clut_offset = read_big_u32(mBATag->clut_offset);
967 uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
968
969 if (0 == b_curve_offset) {
970 return false;
971 }
972
973 // "B" curves are our inputs, not outputs.
974 if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
975 b2a->input_curves)) {
976 return false;
977 }
978
979 if (0 != m_curve_offset) {
980 if (0 == matrix_offset) {
981 return false;
982 }
983 // Matrix channels is tied to input_channels (3), not output_channels.
984 b2a->matrix_channels = b2a->input_channels;
985
986 if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
987 b2a->matrix_curves)) {
988 return false;
989 }
990
991 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
992 return false;
993 }
994 float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f; // TODO: understand
995 const uint8_t* mtx_buf = tag->buf + matrix_offset;
996 b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
997 b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
998 b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
999 b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
1000 b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
1001 b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
1002 b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
1003 b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
1004 b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
1005 b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
1006 b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
1007 b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
1008 } else {
1009 if (0 != matrix_offset) {
1010 return false;
1011 }
1012 b2a->matrix_channels = 0;
1013 }
1014
1015 if (0 != a_curve_offset) {
1016 if (0 == clut_offset) {
1017 return false;
1018 }
1019
1020 // "A" curves are our output, not input.
1021 if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1022 b2a->output_curves)) {
1023 return false;
1024 }
1025
1026 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1027 return false;
1028 }
1029 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1030
1031 if (clut->grid_byte_width[0] == 1) {
1032 b2a->grid_8 = clut->variable;
1033 b2a->grid_16 = nullptr;
1034 } else if (clut->grid_byte_width[0] == 2) {
1035 b2a->grid_8 = nullptr;
1036 b2a->grid_16 = clut->variable;
1037 } else {
1038 return false;
1039 }
1040
1041 uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1042 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1043 b2a->grid_points[i] = clut->grid_points[i];
1044 if (b2a->grid_points[i] < 2) {
1045 return false;
1046 }
1047 grid_size *= b2a->grid_points[i];
1048 }
1049 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1050 return false;
1051 }
1052 } else {
1053 if (0 != clut_offset) {
1054 return false;
1055 }
1056
1057 if (b2a->input_channels != b2a->output_channels) {
1058 return false;
1059 }
1060
1061 // Zero out *output* channels to skip this stage.
1062 b2a->output_channels = 0;
1063 }
1064 return true;
1065 }
1066
1067 // If you pass f, we'll fit a possibly-non-zero value for *f.
1068 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)1069 static int fit_linear(const skcms_Curve* curve, int N, float tol,
1070 float* c, float* d, float* f = nullptr) {
1071 assert(N > 1);
1072 // We iteratively fit the first points to the TF's linear piece.
1073 // We want the cx + f line to pass through the first and last points we fit exactly.
1074 //
1075 // As we walk along the points we find the minimum and maximum slope of the line before the
1076 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
1077 // emtpy, when we definitely can't add any more points.
1078 //
1079 // Some points' error intervals may intersect the running interval but not lie fully
1080 // within it. So we keep track of the last point we saw that is a valid end point candidate,
1081 // and once the search is done, back up to build the line through *that* point.
1082 const float dx = 1.0f / static_cast<float>(N - 1);
1083
1084 int lin_points = 1;
1085
1086 float f_zero = 0.0f;
1087 if (f) {
1088 *f = eval_curve(curve, 0);
1089 } else {
1090 f = &f_zero;
1091 }
1092
1093
1094 float slope_min = -INFINITY_;
1095 float slope_max = +INFINITY_;
1096 for (int i = 1; i < N; ++i) {
1097 float x = static_cast<float>(i) * dx;
1098 float y = eval_curve(curve, x);
1099
1100 float slope_max_i = (y + tol - *f) / x,
1101 slope_min_i = (y - tol - *f) / x;
1102 if (slope_max_i < slope_min || slope_max < slope_min_i) {
1103 // Slope intervals would no longer overlap.
1104 break;
1105 }
1106 slope_max = fminf_(slope_max, slope_max_i);
1107 slope_min = fmaxf_(slope_min, slope_min_i);
1108
1109 float cur_slope = (y - *f) / x;
1110 if (slope_min <= cur_slope && cur_slope <= slope_max) {
1111 lin_points = i + 1;
1112 *c = cur_slope;
1113 }
1114 }
1115
1116 // Set D to the last point that met our tolerance.
1117 *d = static_cast<float>(lin_points - 1) * dx;
1118 return lin_points;
1119 }
1120
1121 // If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
canonicalize_identity(skcms_Curve * curve)1122 static void canonicalize_identity(skcms_Curve* curve) {
1123 if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1124 int N = (int)curve->table_entries;
1125
1126 float c = 0.0f, d = 0.0f, f = 0.0f;
1127 if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
1128 && c == 1.0f
1129 && f == 0.0f) {
1130 curve->table_entries = 0;
1131 curve->table_8 = nullptr;
1132 curve->table_16 = nullptr;
1133 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
1134 }
1135 }
1136 }
1137
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)1138 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1139 bool ok = false;
1140 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1141 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1142 if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1143 if (!ok) {
1144 return false;
1145 }
1146
1147 if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1148 if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1149 if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1150 if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1151
1152 if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1153 if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1154 if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1155
1156 if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1157 if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1158 if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1159
1160 return true;
1161 }
1162
read_b2a(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1163 static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1164 bool ok = false;
1165 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1166 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1167 if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1168 if (!ok) {
1169 return false;
1170 }
1171
1172 if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1173 if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1174 if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1175
1176 if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1177 if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1178 if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1179
1180 if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1181 if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1182 if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1183 if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1184
1185 return true;
1186 }
1187
1188 typedef struct {
1189 uint8_t type [4];
1190 uint8_t reserved [4];
1191 uint8_t color_primaries [1];
1192 uint8_t transfer_characteristics [1];
1193 uint8_t matrix_coefficients [1];
1194 uint8_t video_full_range_flag [1];
1195 } CICP_Layout;
1196
read_cicp(const skcms_ICCTag * tag,skcms_CICP * cicp)1197 static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1198 if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
1199 return false;
1200 }
1201
1202 const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
1203
1204 cicp->color_primaries = cicpTag->color_primaries[0];
1205 cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1206 cicp->matrix_coefficients = cicpTag->matrix_coefficients[0];
1207 cicp->video_full_range_flag = cicpTag->video_full_range_flag[0];
1208 return true;
1209 }
1210
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)1211 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1212 if (!profile || !profile->buffer || !tag) { return; }
1213 if (idx > profile->tag_count) { return; }
1214 const tag_Layout* tags = get_tag_table(profile);
1215 tag->signature = read_big_u32(tags[idx].signature);
1216 tag->size = read_big_u32(tags[idx].size);
1217 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
1218 tag->type = read_big_u32(tag->buf);
1219 }
1220
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1221 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1222 if (!profile || !profile->buffer || !tag) { return false; }
1223 const tag_Layout* tags = get_tag_table(profile);
1224 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1225 if (read_big_u32(tags[i].signature) == sig) {
1226 tag->signature = sig;
1227 tag->size = read_big_u32(tags[i].size);
1228 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
1229 tag->type = read_big_u32(tag->buf);
1230 return true;
1231 }
1232 }
1233 return false;
1234 }
1235
usable_as_src(const skcms_ICCProfile * profile)1236 static bool usable_as_src(const skcms_ICCProfile* profile) {
1237 return profile->has_A2B
1238 || (profile->has_trc && profile->has_toXYZD50);
1239 }
1240
skcms_ParseWithA2BPriority(const void * buf,size_t len,const int priority[],const int priorities,skcms_ICCProfile * profile)1241 bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1242 const int priority[], const int priorities,
1243 skcms_ICCProfile* profile) {
1244 assert(SAFE_SIZEOF(header_Layout) == 132);
1245
1246 if (!profile) {
1247 return false;
1248 }
1249 memset(profile, 0, SAFE_SIZEOF(*profile));
1250
1251 if (len < SAFE_SIZEOF(header_Layout)) {
1252 return false;
1253 }
1254
1255 // Byte-swap all header fields
1256 const header_Layout* header = (const header_Layout*)buf;
1257 profile->buffer = (const uint8_t*)buf;
1258 profile->size = read_big_u32(header->size);
1259 uint32_t version = read_big_u32(header->version);
1260 profile->data_color_space = read_big_u32(header->data_color_space);
1261 profile->pcs = read_big_u32(header->pcs);
1262 uint32_t signature = read_big_u32(header->signature);
1263 float illuminant_X = read_big_fixed(header->illuminant_X);
1264 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1265 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1266 profile->tag_count = read_big_u32(header->tag_count);
1267
1268 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1269 // and major version
1270 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1271 if (signature != skcms_Signature_acsp ||
1272 profile->size > len ||
1273 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1274 (version >> 24) > 4) {
1275 return false;
1276 }
1277
1278 // Validate that illuminant is D50 white
1279 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1280 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1281 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1282 return false;
1283 }
1284
1285 // Validate that all tag entries have sane offset + size
1286 const tag_Layout* tags = get_tag_table(profile);
1287 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1288 uint32_t tag_offset = read_big_u32(tags[i].offset);
1289 uint32_t tag_size = read_big_u32(tags[i].size);
1290 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1291 if (tag_size < 4 || tag_end > profile->size) {
1292 return false;
1293 }
1294 }
1295
1296 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1297 return false;
1298 }
1299
1300 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1301
1302 // Pre-parse commonly used tags.
1303 skcms_ICCTag kTRC;
1304 if (profile->data_color_space == skcms_Signature_Gray &&
1305 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1306 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1307 // Malformed tag
1308 return false;
1309 }
1310 profile->trc[1] = profile->trc[0];
1311 profile->trc[2] = profile->trc[0];
1312 profile->has_trc = true;
1313
1314 if (pcs_is_xyz) {
1315 profile->toXYZD50.vals[0][0] = illuminant_X;
1316 profile->toXYZD50.vals[1][1] = illuminant_Y;
1317 profile->toXYZD50.vals[2][2] = illuminant_Z;
1318 profile->has_toXYZD50 = true;
1319 }
1320 } else {
1321 skcms_ICCTag rTRC, gTRC, bTRC;
1322 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1323 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1324 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1325 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1326 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1327 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1328 // Malformed TRC tags
1329 return false;
1330 }
1331 profile->has_trc = true;
1332 }
1333
1334 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1335 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1336 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1337 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1338 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1339 // Malformed XYZ tags
1340 return false;
1341 }
1342 profile->has_toXYZD50 = true;
1343 }
1344 }
1345
1346 for (int i = 0; i < priorities; i++) {
1347 // enum { perceptual, relative_colormetric, saturation }
1348 if (priority[i] < 0 || priority[i] > 2) {
1349 return false;
1350 }
1351 uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1352 skcms_ICCTag tag;
1353 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1354 if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1355 // Malformed A2B tag
1356 return false;
1357 }
1358 profile->has_A2B = true;
1359 break;
1360 }
1361 }
1362
1363 for (int i = 0; i < priorities; i++) {
1364 // enum { perceptual, relative_colormetric, saturation }
1365 if (priority[i] < 0 || priority[i] > 2) {
1366 return false;
1367 }
1368 uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1369 skcms_ICCTag tag;
1370 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1371 if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1372 // Malformed B2A tag
1373 return false;
1374 }
1375 profile->has_B2A = true;
1376 break;
1377 }
1378 }
1379
1380 skcms_ICCTag cicp_tag;
1381 if (skcms_GetTagBySignature(profile, skcms_Signature_CICP, &cicp_tag)) {
1382 if (!read_cicp(&cicp_tag, &profile->CICP)) {
1383 // Malformed CICP tag
1384 return false;
1385 }
1386 profile->has_CICP = true;
1387 }
1388
1389 return usable_as_src(profile);
1390 }
1391
1392
skcms_sRGB_profile()1393 const skcms_ICCProfile* skcms_sRGB_profile() {
1394 static const skcms_ICCProfile sRGB_profile = {
1395 nullptr, // buffer, moot here
1396
1397 0, // size, moot here
1398 skcms_Signature_RGB, // data_color_space
1399 skcms_Signature_XYZ, // pcs
1400 0, // tag count, moot here
1401
1402 // We choose to represent sRGB with its canonical transfer function,
1403 // and with its canonical XYZD50 gamut matrix.
1404 true, // has_trc, followed by the 3 trc curves
1405 {
1406 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1407 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1408 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1409 },
1410
1411 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1412 {{
1413 { 0.436065674f, 0.385147095f, 0.143066406f },
1414 { 0.222488403f, 0.716873169f, 0.060607910f },
1415 { 0.013916016f, 0.097076416f, 0.714096069f },
1416 }},
1417
1418 false, // has_A2B, followed by A2B itself, which we don't care about.
1419 {
1420 0,
1421 {
1422 {{0, {0,0, 0,0,0,0,0}}},
1423 {{0, {0,0, 0,0,0,0,0}}},
1424 {{0, {0,0, 0,0,0,0,0}}},
1425 {{0, {0,0, 0,0,0,0,0}}},
1426 },
1427 {0,0,0,0},
1428 nullptr,
1429 nullptr,
1430
1431 0,
1432 {
1433 {{0, {0,0, 0,0,0,0,0}}},
1434 {{0, {0,0, 0,0,0,0,0}}},
1435 {{0, {0,0, 0,0,0,0,0}}},
1436 },
1437 {{
1438 { 0,0,0,0 },
1439 { 0,0,0,0 },
1440 { 0,0,0,0 },
1441 }},
1442
1443 0,
1444 {
1445 {{0, {0,0, 0,0,0,0,0}}},
1446 {{0, {0,0, 0,0,0,0,0}}},
1447 {{0, {0,0, 0,0,0,0,0}}},
1448 },
1449 },
1450
1451 false, // has_B2A, followed by B2A itself, which we also don't care about.
1452 {
1453 0,
1454 {
1455 {{0, {0,0, 0,0,0,0,0}}},
1456 {{0, {0,0, 0,0,0,0,0}}},
1457 {{0, {0,0, 0,0,0,0,0}}},
1458 },
1459
1460 0,
1461 {{
1462 { 0,0,0,0 },
1463 { 0,0,0,0 },
1464 { 0,0,0,0 },
1465 }},
1466 {
1467 {{0, {0,0, 0,0,0,0,0}}},
1468 {{0, {0,0, 0,0,0,0,0}}},
1469 {{0, {0,0, 0,0,0,0,0}}},
1470 },
1471
1472 0,
1473 {0,0,0,0},
1474 nullptr,
1475 nullptr,
1476 {
1477 {{0, {0,0, 0,0,0,0,0}}},
1478 {{0, {0,0, 0,0,0,0,0}}},
1479 {{0, {0,0, 0,0,0,0,0}}},
1480 {{0, {0,0, 0,0,0,0,0}}},
1481 },
1482 },
1483
1484 false, // has_CICP, followed by cicp itself which we don't care about.
1485 { 0, 0, 0, 0 },
1486 };
1487 return &sRGB_profile;
1488 }
1489
skcms_XYZD50_profile()1490 const skcms_ICCProfile* skcms_XYZD50_profile() {
1491 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1492 static const skcms_ICCProfile XYZD50_profile = {
1493 nullptr, // buffer, moot here
1494
1495 0, // size, moot here
1496 skcms_Signature_RGB, // data_color_space
1497 skcms_Signature_XYZ, // pcs
1498 0, // tag count, moot here
1499
1500 true, // has_trc, followed by the 3 trc curves
1501 {
1502 {{0, {1,1, 0,0,0,0,0}}},
1503 {{0, {1,1, 0,0,0,0,0}}},
1504 {{0, {1,1, 0,0,0,0,0}}},
1505 },
1506
1507 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1508 {{
1509 { 1,0,0 },
1510 { 0,1,0 },
1511 { 0,0,1 },
1512 }},
1513
1514 false, // has_A2B, followed by A2B itself, which we don't care about.
1515 {
1516 0,
1517 {
1518 {{0, {0,0, 0,0,0,0,0}}},
1519 {{0, {0,0, 0,0,0,0,0}}},
1520 {{0, {0,0, 0,0,0,0,0}}},
1521 {{0, {0,0, 0,0,0,0,0}}},
1522 },
1523 {0,0,0,0},
1524 nullptr,
1525 nullptr,
1526
1527 0,
1528 {
1529 {{0, {0,0, 0,0,0,0,0}}},
1530 {{0, {0,0, 0,0,0,0,0}}},
1531 {{0, {0,0, 0,0,0,0,0}}},
1532 },
1533 {{
1534 { 0,0,0,0 },
1535 { 0,0,0,0 },
1536 { 0,0,0,0 },
1537 }},
1538
1539 0,
1540 {
1541 {{0, {0,0, 0,0,0,0,0}}},
1542 {{0, {0,0, 0,0,0,0,0}}},
1543 {{0, {0,0, 0,0,0,0,0}}},
1544 },
1545 },
1546
1547 false, // has_B2A, followed by B2A itself, which we also don't care about.
1548 {
1549 0,
1550 {
1551 {{0, {0,0, 0,0,0,0,0}}},
1552 {{0, {0,0, 0,0,0,0,0}}},
1553 {{0, {0,0, 0,0,0,0,0}}},
1554 },
1555
1556 0,
1557 {{
1558 { 0,0,0,0 },
1559 { 0,0,0,0 },
1560 { 0,0,0,0 },
1561 }},
1562 {
1563 {{0, {0,0, 0,0,0,0,0}}},
1564 {{0, {0,0, 0,0,0,0,0}}},
1565 {{0, {0,0, 0,0,0,0,0}}},
1566 },
1567
1568 0,
1569 {0,0,0,0},
1570 nullptr,
1571 nullptr,
1572 {
1573 {{0, {0,0, 0,0,0,0,0}}},
1574 {{0, {0,0, 0,0,0,0,0}}},
1575 {{0, {0,0, 0,0,0,0,0}}},
1576 {{0, {0,0, 0,0,0,0,0}}},
1577 },
1578 },
1579
1580 false, // has_CICP, followed by cicp itself which we don't care about.
1581 { 0, 0, 0, 0 },
1582 };
1583
1584 return &XYZD50_profile;
1585 }
1586
skcms_sRGB_TransferFunction()1587 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1588 return &skcms_sRGB_profile()->trc[0].parametric;
1589 }
1590
skcms_sRGB_Inverse_TransferFunction()1591 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1592 static const skcms_TransferFunction sRGB_inv =
1593 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1594 return &sRGB_inv;
1595 }
1596
skcms_Identity_TransferFunction()1597 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1598 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1599 return &identity;
1600 }
1601
1602 const uint8_t skcms_252_random_bytes[] = {
1603 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1604 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1605 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1606 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1607 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1608 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1609 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1610 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1611 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1612 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1613 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1614 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1615 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1616 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1617 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1618 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1619 };
1620
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1621 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1622 // Test for exactly equal profiles first.
1623 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1624 return true;
1625 }
1626
1627 // For now this is the essentially the same strategy we use in test_only.c
1628 // for our skcms_Transform() smoke tests:
1629 // 1) transform A to XYZD50
1630 // 2) transform B to XYZD50
1631 // 3) return true if they're similar enough
1632 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1633
1634 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1635 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1636
1637 // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1638 // to be treated as equal. But CMYK profiles are a totally different ballgame.
1639 const auto CMYK = skcms_Signature_CMYK;
1640 if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1641 return false;
1642 }
1643
1644 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1645 // TODO: working with RGBA_8888 either way is probably fastest.
1646 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1647 size_t npixels = 84;
1648 if (A->data_color_space == skcms_Signature_CMYK) {
1649 fmt = skcms_PixelFormat_RGBA_8888;
1650 npixels = 63;
1651 }
1652
1653 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1654 // use pre-canned results and skip that skcms_Transform() call?
1655 uint8_t dstA[252],
1656 dstB[252];
1657 if (!skcms_Transform(
1658 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1659 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1660 npixels)) {
1661 return false;
1662 }
1663 if (!skcms_Transform(
1664 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1665 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1666 npixels)) {
1667 return false;
1668 }
1669
1670 // TODO: make sure this final check has reasonable codegen.
1671 for (size_t i = 0; i < 252; i++) {
1672 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1673 return false;
1674 }
1675 }
1676 return true;
1677 }
1678
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1679 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1680 const skcms_TransferFunction* inv_tf) {
1681 if (!profile || !profile->has_trc) {
1682 return false;
1683 }
1684
1685 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1686 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1687 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1688 }
1689
is_zero_to_one(float x)1690 static bool is_zero_to_one(float x) {
1691 return 0 <= x && x <= 1;
1692 }
1693
1694 typedef struct { float vals[3]; } skcms_Vector3;
1695
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1696 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1697 skcms_Vector3 dst = {{0,0,0}};
1698 for (int row = 0; row < 3; ++row) {
1699 dst.vals[row] = m->vals[row][0] * v->vals[0]
1700 + m->vals[row][1] * v->vals[1]
1701 + m->vals[row][2] * v->vals[2];
1702 }
1703 return dst;
1704 }
1705
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1706 bool skcms_AdaptToXYZD50(float wx, float wy,
1707 skcms_Matrix3x3* toXYZD50) {
1708 if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1709 !toXYZD50) {
1710 return false;
1711 }
1712
1713 // Assumes that Y is 1.0f.
1714 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1715
1716 // Now convert toXYZ matrix to toXYZD50.
1717 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1718
1719 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1720 // the matrices below. The Bradford method is used by Adobe and is widely considered
1721 // to be the best.
1722 skcms_Matrix3x3 xyz_to_lms = {{
1723 { 0.8951f, 0.2664f, -0.1614f },
1724 { -0.7502f, 1.7135f, 0.0367f },
1725 { 0.0389f, -0.0685f, 1.0296f },
1726 }};
1727 skcms_Matrix3x3 lms_to_xyz = {{
1728 { 0.9869929f, -0.1470543f, 0.1599627f },
1729 { 0.4323053f, 0.5183603f, 0.0492912f },
1730 { -0.0085287f, 0.0400428f, 0.9684867f },
1731 }};
1732
1733 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1734 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1735
1736 *toXYZD50 = {{
1737 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1738 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1739 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1740 }};
1741 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1742 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1743
1744 return true;
1745 }
1746
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1747 bool skcms_PrimariesToXYZD50(float rx, float ry,
1748 float gx, float gy,
1749 float bx, float by,
1750 float wx, float wy,
1751 skcms_Matrix3x3* toXYZD50) {
1752 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1753 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1754 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1755 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1756 !toXYZD50) {
1757 return false;
1758 }
1759
1760 // First, we need to convert xy values (primaries) to XYZ.
1761 skcms_Matrix3x3 primaries = {{
1762 { rx, gx, bx },
1763 { ry, gy, by },
1764 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1765 }};
1766 skcms_Matrix3x3 primaries_inv;
1767 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1768 return false;
1769 }
1770
1771 // Assumes that Y is 1.0f.
1772 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1773 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1774
1775 skcms_Matrix3x3 toXYZ = {{
1776 { XYZ.vals[0], 0, 0 },
1777 { 0, XYZ.vals[1], 0 },
1778 { 0, 0, XYZ.vals[2] },
1779 }};
1780 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1781
1782 skcms_Matrix3x3 DXtoD50;
1783 if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1784 return false;
1785 }
1786
1787 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1788 return true;
1789 }
1790
1791
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1792 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1793 double a00 = src->vals[0][0],
1794 a01 = src->vals[1][0],
1795 a02 = src->vals[2][0],
1796 a10 = src->vals[0][1],
1797 a11 = src->vals[1][1],
1798 a12 = src->vals[2][1],
1799 a20 = src->vals[0][2],
1800 a21 = src->vals[1][2],
1801 a22 = src->vals[2][2];
1802
1803 double b0 = a00*a11 - a01*a10,
1804 b1 = a00*a12 - a02*a10,
1805 b2 = a01*a12 - a02*a11,
1806 b3 = a20,
1807 b4 = a21,
1808 b5 = a22;
1809
1810 double determinant = b0*b5
1811 - b1*b4
1812 + b2*b3;
1813
1814 if (determinant == 0) {
1815 return false;
1816 }
1817
1818 double invdet = 1.0 / determinant;
1819 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1820 return false;
1821 }
1822
1823 b0 *= invdet;
1824 b1 *= invdet;
1825 b2 *= invdet;
1826 b3 *= invdet;
1827 b4 *= invdet;
1828 b5 *= invdet;
1829
1830 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1831 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1832 dst->vals[2][0] = (float)( + b2 );
1833 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1834 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1835 dst->vals[2][1] = (float)( - b1 );
1836 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1837 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1838 dst->vals[2][2] = (float)( + b0 );
1839
1840 for (int r = 0; r < 3; ++r)
1841 for (int c = 0; c < 3; ++c) {
1842 if (!isfinitef_(dst->vals[r][c])) {
1843 return false;
1844 }
1845 }
1846 return true;
1847 }
1848
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1849 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1850 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1851 for (int r = 0; r < 3; r++)
1852 for (int c = 0; c < 3; c++) {
1853 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1854 + A->vals[r][1] * B->vals[1][c]
1855 + A->vals[r][2] * B->vals[2][c];
1856 }
1857 return m;
1858 }
1859
1860 #if defined(__clang__)
1861 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1862 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1863 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1864 TF_PQish pq;
1865 TF_HLGish hlg;
1866 switch (classify(*src, &pq, &hlg)) {
1867 case skcms_TFType_Invalid: return false;
1868 case skcms_TFType_sRGBish: break; // handled below
1869
1870 case skcms_TFType_PQish:
1871 *dst = { TFKind_marker(skcms_TFType_PQish), -pq.A, pq.D, 1.0f/pq.F
1872 , pq.B, -pq.E, 1.0f/pq.C};
1873 return true;
1874
1875 case skcms_TFType_HLGish:
1876 *dst = { TFKind_marker(skcms_TFType_HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1877 , 1.0f/hlg.a, hlg.b, hlg.c
1878 , hlg.K_minus_1 };
1879 return true;
1880
1881 case skcms_TFType_HLGinvish:
1882 *dst = { TFKind_marker(skcms_TFType_HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1883 , 1.0f/hlg.a, hlg.b, hlg.c
1884 , hlg.K_minus_1 };
1885 return true;
1886 }
1887
1888 assert (classify(*src) == skcms_TFType_sRGBish);
1889
1890 // We're inverting this function, solving for x in terms of y.
1891 // y = (cx + f) x < d
1892 // (ax + b)^g + e x ≥ d
1893 // The inverse of this function can be expressed in the same piecewise form.
1894 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1895
1896 // We'll start by finding the new threshold inv.d.
1897 // In principle we should be able to find that by solving for y at x=d from either side.
1898 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1899 float d_l = src->c * src->d + src->f,
1900 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1901 if (fabsf_(d_l - d_r) > 1/512.0f) {
1902 return false;
1903 }
1904 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1905
1906 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1907 if (inv.d > 0) {
1908 // Inverting the linear section is pretty straightfoward:
1909 // y = cx + f
1910 // y - f = cx
1911 // (1/c)y - f/c = x
1912 inv.c = 1.0f/src->c;
1913 inv.f = -src->f/src->c;
1914 }
1915
1916 // The interesting part is inverting the nonlinear section:
1917 // y = (ax + b)^g + e.
1918 // y - e = (ax + b)^g
1919 // (y - e)^1/g = ax + b
1920 // (y - e)^1/g - b = ax
1921 // (1/a)(y - e)^1/g - b/a = x
1922 //
1923 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1924 // let k = (1/a)^g
1925 // (1/a)( y - e)^1/g - b/a = x
1926 // (ky - ke)^1/g - b/a = x
1927
1928 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1929 inv.g = 1.0f / src->g;
1930 inv.a = k;
1931 inv.b = -k * src->e;
1932 inv.e = -src->b / src->a;
1933
1934 // We need to enforce the same constraints here that we do when fitting a curve,
1935 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1936 // of the source function if we're here.
1937
1938 // Just like when fitting the curve, there's really no way to rescue a < 0.
1939 if (inv.a < 0) {
1940 return false;
1941 }
1942 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1943 if (inv.a * inv.d + inv.b < 0) {
1944 inv.b = -inv.a * inv.d;
1945 }
1946
1947 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1948 // where we might still fail here, like non-finite parameter values.
1949 if (classify(inv) != skcms_TFType_sRGBish) {
1950 return false;
1951 }
1952
1953 assert (inv.a >= 0);
1954 assert (inv.a * inv.d + inv.b >= 0);
1955
1956 // Now in principle we're done.
1957 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1958 // e or f of the inverse, depending on which segment contains src(1.0f).
1959 float s = skcms_TransferFunction_eval(src, 1.0f);
1960 if (!isfinitef_(s)) {
1961 return false;
1962 }
1963
1964 float sign = s < 0 ? -1.0f : 1.0f;
1965 s *= sign;
1966 if (s < inv.d) {
1967 inv.f = 1.0f - sign * inv.c * s;
1968 } else {
1969 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1970 }
1971
1972 *dst = inv;
1973 return classify(*dst) == skcms_TFType_sRGBish;
1974 }
1975
1976 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1977
1978 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1979 //
1980 // tf(x) = cx + f x < d
1981 // tf(x) = (ax + b)^g + e x ≥ d
1982 //
1983 // When fitting, we add the additional constraint that both pieces meet at d:
1984 //
1985 // cd + f = (ad + b)^g + e
1986 //
1987 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1988 //
1989 // tf(x) = cx + f x < d
1990 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1991 //
1992 // Our overall strategy is then:
1993 // For a couple tolerances,
1994 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1995 // - invert c,d,f
1996 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1997 // (and by constraint, inverted e) to the inverse of the table.
1998 // Return the parameters with least maximum error.
1999 //
2000 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
2001 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
2002 //
2003 // let y = Table(x)
2004 // r(x) = x - f_inv(y)
2005 //
2006 // ∂r/∂g = ln(ay + b)*(ay + b)^g
2007 // - ln(ad + b)*(ad + b)^g
2008 // ∂r/∂a = yg(ay + b)^(g-1)
2009 // - dg(ad + b)^(g-1)
2010 // ∂r/∂b = g(ay + b)^(g-1)
2011 // - g(ad + b)^(g-1)
2012
2013 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
2014 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])2015 static float rg_nonlinear(float x,
2016 const skcms_Curve* curve,
2017 const skcms_TransferFunction* tf,
2018 float dfdP[3]) {
2019 const float y = eval_curve(curve, x);
2020
2021 const float g = tf->g, a = tf->a, b = tf->b,
2022 c = tf->c, d = tf->d, f = tf->f;
2023
2024 const float Y = fmaxf_(a*y + b, 0.0f),
2025 D = a*d + b;
2026 assert (D >= 0);
2027
2028 // The gradient.
2029 dfdP[0] = logf_(Y)*powf_(Y, g)
2030 - logf_(D)*powf_(D, g);
2031 dfdP[1] = y*g*powf_(Y, g-1)
2032 - d*g*powf_(D, g-1);
2033 dfdP[2] = g*powf_(Y, g-1)
2034 - g*powf_(D, g-1);
2035
2036 // The residual.
2037 const float f_inv = powf_(Y, g)
2038 - powf_(D, g)
2039 + c*d + f;
2040 return x - f_inv;
2041 }
2042
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)2043 static bool gauss_newton_step(const skcms_Curve* curve,
2044 skcms_TransferFunction* tf,
2045 float x0, float dx, int N) {
2046 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2047 //
2048 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2049 //
2050 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2051 // where r(P) is the residual vector
2052 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
2053 //
2054 // Let's review the shape of each of these expressions:
2055 // r(P) is [N x 1], a column vector with one entry per value of x tested
2056 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
2057 // Jf^T is [3 x N], the transpose of Jf
2058 //
2059 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2060 // and so is its inverse (Jf^T Jf)^-1
2061 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2062 //
2063 // Our implementation strategy to get to the final ∆P is
2064 // 1) evaluate Jf^T Jf, call that lhs
2065 // 2) evaluate Jf^T r(P), call that rhs
2066 // 3) invert lhs
2067 // 4) multiply inverse lhs by rhs
2068 //
2069 // This is a friendly implementation strategy because we don't have to have any
2070 // buffers that scale with N, and equally nice don't have to perform any matrix
2071 // operations that are variable size.
2072 //
2073 // Other implementation strategies could trade this off, e.g. evaluating the
2074 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2075 // the residuals. That would probably require implementing singular value
2076 // decomposition, and would create a [3 x N] matrix to be multiplied by the
2077 // [N x 1] residual vector, but on the upside I think that'd eliminate the
2078 // possibility of this gauss_newton_step() function ever failing.
2079
2080 // 0) start off with lhs and rhs safely zeroed.
2081 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2082 skcms_Vector3 rhs = { {0,0,0} };
2083
2084 // 1,2) evaluate lhs and evaluate rhs
2085 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2086 // so we'll have to update lhs and rhs at the same time.
2087 for (int i = 0; i < N; i++) {
2088 float x = x0 + static_cast<float>(i)*dx;
2089
2090 float dfdP[3] = {0,0,0};
2091 float resid = rg_nonlinear(x,curve,tf, dfdP);
2092
2093 for (int r = 0; r < 3; r++) {
2094 for (int c = 0; c < 3; c++) {
2095 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2096 }
2097 rhs.vals[r] += dfdP[r] * resid;
2098 }
2099 }
2100
2101 // If any of the 3 P parameters are unused, this matrix will be singular.
2102 // Detect those cases and fix them up to indentity instead, so we can invert.
2103 for (int k = 0; k < 3; k++) {
2104 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2105 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2106 lhs.vals[k][k] = 1;
2107 }
2108 }
2109
2110 // 3) invert lhs
2111 skcms_Matrix3x3 lhs_inv;
2112 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2113 return false;
2114 }
2115
2116 // 4) multiply inverse lhs by rhs
2117 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2118 tf->g += dP.vals[0];
2119 tf->a += dP.vals[1];
2120 tf->b += dP.vals[2];
2121 return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2122 }
2123
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)2124 static float max_roundtrip_error_checked(const skcms_Curve* curve,
2125 const skcms_TransferFunction* tf_inv) {
2126 skcms_TransferFunction tf;
2127 if (!skcms_TransferFunction_invert(tf_inv, &tf) || skcms_TFType_sRGBish != classify(tf)) {
2128 return INFINITY_;
2129 }
2130
2131 skcms_TransferFunction tf_inv_again;
2132 if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2133 return INFINITY_;
2134 }
2135
2136 return skcms_MaxRoundtripError(curve, &tf_inv_again);
2137 }
2138
2139 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)2140 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2141 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2142 auto fixup_tf = [tf]() {
2143 // a must be non-negative. That ensures the function is monotonically increasing.
2144 // We don't really know how to fix up a if it goes negative.
2145 if (tf->a < 0) {
2146 return false;
2147 }
2148 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2149 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2150 if (tf->a * tf->d + tf->b < 0) {
2151 tf->b = -tf->a * tf->d;
2152 }
2153 assert (tf->a >= 0 &&
2154 tf->a * tf->d + tf->b >= 0);
2155
2156 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2157 // parameter so we can guarantee this.
2158 tf->e = tf->c*tf->d + tf->f
2159 - powf_(tf->a*tf->d + tf->b, tf->g);
2160
2161 return true;
2162 };
2163
2164 if (!fixup_tf()) {
2165 return false;
2166 }
2167
2168 // No matter where we start, dx should always represent N even steps from 0 to 1.
2169 const float dx = 1.0f / static_cast<float>(N-1);
2170
2171 skcms_TransferFunction best_tf = *tf;
2172 float best_max_error = INFINITY_;
2173
2174 // Need this or several curves get worse... *sigh*
2175 float init_error = max_roundtrip_error_checked(curve, tf);
2176 if (init_error < best_max_error) {
2177 best_max_error = init_error;
2178 best_tf = *tf;
2179 }
2180
2181 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2182 for (int j = 0; j < 8; j++) {
2183 if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
2184 *tf = best_tf;
2185 return isfinitef_(best_max_error);
2186 }
2187
2188 float max_error = max_roundtrip_error_checked(curve, tf);
2189 if (max_error < best_max_error) {
2190 best_max_error = max_error;
2191 best_tf = *tf;
2192 }
2193 }
2194
2195 *tf = best_tf;
2196 return isfinitef_(best_max_error);
2197 }
2198
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)2199 bool skcms_ApproximateCurve(const skcms_Curve* curve,
2200 skcms_TransferFunction* approx,
2201 float* max_error) {
2202 if (!curve || !approx || !max_error) {
2203 return false;
2204 }
2205
2206 if (curve->table_entries == 0) {
2207 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2208 return false;
2209 }
2210
2211 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2212 // We need at least two points, and must put some reasonable cap on the maximum number.
2213 return false;
2214 }
2215
2216 int N = (int)curve->table_entries;
2217 const float dx = 1.0f / static_cast<float>(N - 1);
2218
2219 *max_error = INFINITY_;
2220 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2221 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2222 skcms_TransferFunction tf,
2223 tf_inv;
2224
2225 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2226 tf.f = 0.0f;
2227 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2228
2229 if (L == N) {
2230 // If the entire data set was linear, move the coefficients to the nonlinear portion
2231 // with G == 1. This lets use a canonical representation with d == 0.
2232 tf.g = 1;
2233 tf.a = tf.c;
2234 tf.b = tf.f;
2235 tf.c = tf.d = tf.e = tf.f = 0;
2236 } else if (L == N - 1) {
2237 // Degenerate case with only two points in the nonlinear segment. Solve directly.
2238 tf.g = 1;
2239 tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
2240 eval_curve(curve, static_cast<float>(N-2)*dx))
2241 / dx;
2242 tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
2243 - tf.a * static_cast<float>(N-2)*dx;
2244 tf.e = 0;
2245 } else {
2246 // Start by guessing a gamma-only curve through the midpoint.
2247 int mid = (L + N) / 2;
2248 float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
2249 float mid_y = eval_curve(curve, mid_x);
2250 tf.g = log2f_(mid_y) / log2f_(mid_x);
2251 tf.a = 1;
2252 tf.b = 0;
2253 tf.e = tf.c*tf.d + tf.f
2254 - powf_(tf.a*tf.d + tf.b, tf.g);
2255
2256
2257 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2258 !fit_nonlinear(curve, L,N, &tf_inv)) {
2259 continue;
2260 }
2261
2262 // We fit tf_inv, so calculate tf to keep in sync.
2263 // fit_nonlinear() should guarantee invertibility.
2264 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2265 assert(false);
2266 continue;
2267 }
2268 }
2269
2270 // We'd better have a sane, sRGB-ish TF by now.
2271 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2272 // anything else is just some accident of math and the way we pun tf.g as a type flag.
2273 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2274 if (skcms_TFType_sRGBish != classify(tf)) {
2275 continue;
2276 }
2277
2278 // We find our error by roundtripping the table through tf_inv.
2279 //
2280 // (The most likely use case for this approximation is to be inverted and
2281 // used as the transfer function for a destination color space.)
2282 //
2283 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2284 // invertible, so re-verify that here (and use the new inverse for testing).
2285 // fit_nonlinear() should guarantee this, but the special cases that don't use
2286 // it may fail this test.
2287 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2288 continue;
2289 }
2290
2291 float err = skcms_MaxRoundtripError(curve, &tf_inv);
2292 if (*max_error > err) {
2293 *max_error = err;
2294 *approx = tf;
2295 }
2296 }
2297 return isfinitef_(*max_error);
2298 }
2299
2300 // ~~~~ Impl. of skcms_Transform() ~~~~
2301
2302 typedef enum {
2303 Op_load_a8,
2304 Op_load_g8,
2305 Op_load_8888_palette8,
2306 Op_load_4444,
2307 Op_load_565,
2308 Op_load_888,
2309 Op_load_8888,
2310 Op_load_1010102,
2311 Op_load_101010x_XR,
2312 Op_load_161616LE,
2313 Op_load_16161616LE,
2314 Op_load_161616BE,
2315 Op_load_16161616BE,
2316 Op_load_hhh,
2317 Op_load_hhhh,
2318 Op_load_fff,
2319 Op_load_ffff,
2320
2321 Op_swap_rb,
2322 Op_clamp,
2323 Op_invert,
2324 Op_force_opaque,
2325 Op_premul,
2326 Op_unpremul,
2327 Op_matrix_3x3,
2328 Op_matrix_3x4,
2329
2330 Op_lab_to_xyz,
2331 Op_xyz_to_lab,
2332
2333 Op_tf_r,
2334 Op_tf_g,
2335 Op_tf_b,
2336 Op_tf_a,
2337
2338 Op_pq_r,
2339 Op_pq_g,
2340 Op_pq_b,
2341 Op_pq_a,
2342
2343 Op_hlg_r,
2344 Op_hlg_g,
2345 Op_hlg_b,
2346 Op_hlg_a,
2347
2348 Op_hlginv_r,
2349 Op_hlginv_g,
2350 Op_hlginv_b,
2351 Op_hlginv_a,
2352
2353 Op_table_r,
2354 Op_table_g,
2355 Op_table_b,
2356 Op_table_a,
2357
2358 Op_clut_A2B,
2359 Op_clut_B2A,
2360
2361 Op_store_a8,
2362 Op_store_g8,
2363 Op_store_4444,
2364 Op_store_565,
2365 Op_store_888,
2366 Op_store_8888,
2367 Op_store_1010102,
2368 Op_store_161616LE,
2369 Op_store_16161616LE,
2370 Op_store_161616BE,
2371 Op_store_16161616BE,
2372 Op_store_101010x_XR,
2373 Op_store_hhh,
2374 Op_store_hhhh,
2375 Op_store_fff,
2376 Op_store_ffff,
2377 } Op;
2378
2379 #if defined(__clang__)
2380 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2381 #elif defined(__GNUC__)
2382 // For some reason GCC accepts this nonsense, but not the more straightforward version,
2383 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2384 template <int N, typename T>
2385 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2386
2387 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2388 #endif
2389
2390 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2391
2392 namespace baseline {
2393 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2394 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2395 #define N 1
2396 template <typename T> using V = T;
2397 using Color = float;
2398 #elif defined(__AVX512F__) && defined(__AVX512DQ__)
2399 #define N 16
2400 template <typename T> using V = Vec<N,T>;
2401 using Color = float;
2402 #elif defined(__AVX__)
2403 #define N 8
2404 template <typename T> using V = Vec<N,T>;
2405 using Color = float;
2406 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2407 #define N 8
2408 template <typename T> using V = Vec<N,T>;
2409 using Color = _Float16;
2410 #else
2411 #define N 4
2412 template <typename T> using V = Vec<N,T>;
2413 using Color = float;
2414 #endif
2415
2416 #include "src/Transform_inl.h"
2417 #undef N
2418 }
2419
2420 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2421 #if !defined(SKCMS_PORTABLE) && \
2422 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2423 (( defined(__clang__) && __clang_major__ >= 5) || \
2424 (!defined(__clang__) && defined(__GNUC__))) \
2425 && defined(__x86_64__)
2426
2427 #if !defined(__AVX2__)
2428 #if defined(__clang__)
2429 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2430 #elif defined(__GNUC__)
2431 #pragma GCC push_options
2432 #pragma GCC target("avx2,f16c")
2433 #endif
2434
2435 namespace hsw {
2436 #define USING_AVX
2437 #define USING_AVX_F16C
2438 #define USING_AVX2
2439 #define N 8
2440 template <typename T> using V = Vec<N,T>;
2441 using Color = float;
2442
2443 #include "src/Transform_inl.h"
2444
2445 // src/Transform_inl.h will undefine USING_* for us.
2446 #undef N
2447 }
2448
2449 #if defined(__clang__)
2450 #pragma clang attribute pop
2451 #elif defined(__GNUC__)
2452 #pragma GCC pop_options
2453 #endif
2454
2455 #define TEST_FOR_HSW
2456 #endif
2457
2458 #if !defined(__AVX512F__) || !defined(__AVX512DQ__)
2459 #if defined(__clang__)
2460 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2461 #elif defined(__GNUC__)
2462 #pragma GCC push_options
2463 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2464 #endif
2465
2466 namespace skx {
2467 #define USING_AVX512F
2468 #define N 16
2469 template <typename T> using V = Vec<N,T>;
2470 using Color = float;
2471
2472 #include "src/Transform_inl.h"
2473
2474 // src/Transform_inl.h will undefine USING_* for us.
2475 #undef N
2476 }
2477
2478 #if defined(__clang__)
2479 #pragma clang attribute pop
2480 #elif defined(__GNUC__)
2481 #pragma GCC pop_options
2482 #endif
2483
2484 #define TEST_FOR_SKX
2485 #endif
2486
2487 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2488 enum class CpuType { None, HSW, SKX };
cpu_type()2489 static CpuType cpu_type() {
2490 static const CpuType type = []{
2491 if (!runtime_cpu_detection) {
2492 return CpuType::None;
2493 }
2494 // See http://www.sandpile.org/x86/cpuid.htm
2495
2496 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2497 uint32_t eax, ebx, ecx, edx;
2498 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2499 : "0"(1), "2"(0));
2500 if ((edx & (1u<<25)) && // SSE
2501 (edx & (1u<<26)) && // SSE2
2502 (ecx & (1u<< 0)) && // SSE3
2503 (ecx & (1u<< 9)) && // SSSE3
2504 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2505 (ecx & (1u<<19)) && // SSE4.1
2506 (ecx & (1u<<20)) && // SSE4.2
2507 (ecx & (1u<<26)) && // XSAVE
2508 (ecx & (1u<<27)) && // OSXSAVE
2509 (ecx & (1u<<28)) && // AVX
2510 (ecx & (1u<<29))) { // F16C
2511
2512 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2513 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2514 : "0"(7), "2"(0));
2515 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2516 uint32_t xcr0, dont_need_edx;
2517 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2518
2519 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2520 (xcr0 & (1u<<2)) && // YMM register state saved?
2521 (ebx & (1u<<5))) { // AVX2
2522 // At this point we're at least HSW. Continue checking for SKX.
2523 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2524 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2525 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2526 (ebx & (1u<<16)) && // AVX512F
2527 (ebx & (1u<<17)) && // AVX512DQ
2528 (ebx & (1u<<28)) && // AVX512CD
2529 (ebx & (1u<<30)) && // AVX512BW
2530 (ebx & (1u<<31))) { // AVX512VL
2531 return CpuType::SKX;
2532 }
2533 return CpuType::HSW;
2534 }
2535 }
2536 return CpuType::None;
2537 }();
2538 return type;
2539 }
2540 #endif
2541
2542 #endif
2543
2544 typedef struct {
2545 Op op;
2546 const void* arg;
2547 } OpAndArg;
2548
select_curve_op(const skcms_Curve * curve,int channel)2549 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2550 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2551 { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2552 { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2553 { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2554 { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2555 };
2556 const auto& op = ops[channel];
2557
2558 if (curve->table_entries == 0) {
2559 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2560
2561 const skcms_TransferFunction& tf = curve->parametric;
2562
2563 if (tf.g == 1 && tf.a == 1 &&
2564 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2565 return noop;
2566 }
2567
2568 switch (classify(tf)) {
2569 case skcms_TFType_Invalid: return noop;
2570 case skcms_TFType_sRGBish: return OpAndArg{op.sRGBish, &tf};
2571 case skcms_TFType_PQish: return OpAndArg{op.PQish, &tf};
2572 case skcms_TFType_HLGish: return OpAndArg{op.HLGish, &tf};
2573 case skcms_TFType_HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2574 }
2575 }
2576 return OpAndArg{op.table, curve};
2577 }
2578
bytes_per_pixel(skcms_PixelFormat fmt)2579 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2580 switch (fmt >> 1) { // ignore rgb/bgr
2581 case skcms_PixelFormat_A_8 >> 1: return 1;
2582 case skcms_PixelFormat_G_8 >> 1: return 1;
2583 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2584 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2585 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2586 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2587 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2588 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2589 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2590 case skcms_PixelFormat_RGB_101010x_XR >> 1: return 4;
2591 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2592 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2593 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2594 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2595 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2596 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2597 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2598 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2599 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2600 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2601 }
2602 assert(false);
2603 return 0;
2604 }
2605
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2606 static bool prep_for_destination(const skcms_ICCProfile* profile,
2607 skcms_Matrix3x3* fromXYZD50,
2608 skcms_TransferFunction* invR,
2609 skcms_TransferFunction* invG,
2610 skcms_TransferFunction* invB) {
2611 // skcms_Transform() supports B2A destinations...
2612 if (profile->has_B2A) { return true; }
2613 // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2614 return profile->has_trc
2615 && profile->has_toXYZD50
2616 && profile->trc[0].table_entries == 0
2617 && profile->trc[1].table_entries == 0
2618 && profile->trc[2].table_entries == 0
2619 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2620 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2621 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2622 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2623 }
2624
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2625 bool skcms_Transform(const void* src,
2626 skcms_PixelFormat srcFmt,
2627 skcms_AlphaFormat srcAlpha,
2628 const skcms_ICCProfile* srcProfile,
2629 void* dst,
2630 skcms_PixelFormat dstFmt,
2631 skcms_AlphaFormat dstAlpha,
2632 const skcms_ICCProfile* dstProfile,
2633 size_t npixels) {
2634 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2635 dst, dstFmt, dstAlpha, dstProfile,
2636 npixels, nullptr);
2637 }
2638
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2639 bool skcms_TransformWithPalette(const void* src,
2640 skcms_PixelFormat srcFmt,
2641 skcms_AlphaFormat srcAlpha,
2642 const skcms_ICCProfile* srcProfile,
2643 void* dst,
2644 skcms_PixelFormat dstFmt,
2645 skcms_AlphaFormat dstAlpha,
2646 const skcms_ICCProfile* dstProfile,
2647 size_t nz,
2648 const void* palette) {
2649 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2650 src_bpp = bytes_per_pixel(srcFmt);
2651 // Let's just refuse if the request is absurdly big.
2652 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2653 return false;
2654 }
2655 int n = (int)nz;
2656
2657 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2658 if (!srcProfile) {
2659 srcProfile = skcms_sRGB_profile();
2660 }
2661 if (!dstProfile) {
2662 dstProfile = skcms_sRGB_profile();
2663 }
2664
2665 // We can't transform in place unless the PixelFormats are the same size.
2666 if (dst == src && dst_bpp != src_bpp) {
2667 return false;
2668 }
2669 // TODO: more careful alias rejection (like, dst == src + 1)?
2670
2671 if (needs_palette(srcFmt) && !palette) {
2672 return false;
2673 }
2674
2675 Op program [32];
2676 const void* arguments[32];
2677
2678 Op* ops = program;
2679 const void** args = arguments;
2680
2681 // These are always parametric curves of some sort.
2682 skcms_Curve dst_curves[3];
2683 dst_curves[0].table_entries =
2684 dst_curves[1].table_entries =
2685 dst_curves[2].table_entries = 0;
2686
2687 skcms_Matrix3x3 from_xyz;
2688
2689 switch (srcFmt >> 1) {
2690 default: return false;
2691 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2692 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2693 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2694 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2695 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2696 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2697 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2698 case skcms_PixelFormat_RGB_101010x_XR >> 1: *ops++ = Op_load_101010x_XR; break;
2699 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2700 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2701 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2702 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2703 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2704 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2705 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2706 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2707 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2708 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2709
2710 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2711 *args++ = palette;
2712 break;
2713 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2714 *ops++ = Op_load_8888;
2715 *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction();
2716 *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction();
2717 *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction();
2718 break;
2719 }
2720 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2721 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2722 *ops++ = Op_clamp;
2723 }
2724 if (srcFmt & 1) {
2725 *ops++ = Op_swap_rb;
2726 }
2727 skcms_ICCProfile gray_dst_profile;
2728 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2729 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2730 // luminance (Y) by the destination transfer function.
2731 gray_dst_profile = *dstProfile;
2732 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2733 dstProfile = &gray_dst_profile;
2734 }
2735
2736 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2737 // Photoshop creates CMYK images as inverse CMYK.
2738 // These happen to be the only ones we've _ever_ seen.
2739 *ops++ = Op_invert;
2740 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2741 srcAlpha = skcms_AlphaFormat_Unpremul;
2742 }
2743
2744 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2745 *ops++ = Op_force_opaque;
2746 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2747 *ops++ = Op_unpremul;
2748 }
2749
2750 if (dstProfile != srcProfile) {
2751
2752 if (!prep_for_destination(dstProfile,
2753 &from_xyz,
2754 &dst_curves[0].parametric,
2755 &dst_curves[1].parametric,
2756 &dst_curves[2].parametric)) {
2757 return false;
2758 }
2759
2760 if (srcProfile->has_A2B) {
2761 if (srcProfile->A2B.input_channels) {
2762 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2763 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2764 if (oa.arg) {
2765 *ops++ = oa.op;
2766 *args++ = oa.arg;
2767 }
2768 }
2769 *ops++ = Op_clamp;
2770 *ops++ = Op_clut_A2B;
2771 *args++ = &srcProfile->A2B;
2772 }
2773
2774 if (srcProfile->A2B.matrix_channels == 3) {
2775 for (int i = 0; i < 3; i++) {
2776 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2777 if (oa.arg) {
2778 *ops++ = oa.op;
2779 *args++ = oa.arg;
2780 }
2781 }
2782
2783 static const skcms_Matrix3x4 I = {{
2784 {1,0,0,0},
2785 {0,1,0,0},
2786 {0,0,1,0},
2787 }};
2788 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2789 *ops++ = Op_matrix_3x4;
2790 *args++ = &srcProfile->A2B.matrix;
2791 }
2792 }
2793
2794 if (srcProfile->A2B.output_channels == 3) {
2795 for (int i = 0; i < 3; i++) {
2796 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2797 if (oa.arg) {
2798 *ops++ = oa.op;
2799 *args++ = oa.arg;
2800 }
2801 }
2802 }
2803
2804 if (srcProfile->pcs == skcms_Signature_Lab) {
2805 *ops++ = Op_lab_to_xyz;
2806 }
2807
2808 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2809 for (int i = 0; i < 3; i++) {
2810 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2811 if (oa.arg) {
2812 *ops++ = oa.op;
2813 *args++ = oa.arg;
2814 }
2815 }
2816 } else {
2817 return false;
2818 }
2819
2820 // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2821 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2822
2823 if (dstProfile->has_B2A) {
2824 // B2A needs its input in XYZD50, so transform TRC sources now.
2825 if (!srcProfile->has_A2B) {
2826 *ops++ = Op_matrix_3x3;
2827 *args++ = &srcProfile->toXYZD50;
2828 }
2829
2830 if (dstProfile->pcs == skcms_Signature_Lab) {
2831 *ops++ = Op_xyz_to_lab;
2832 }
2833
2834 if (dstProfile->B2A.input_channels == 3) {
2835 for (int i = 0; i < 3; i++) {
2836 OpAndArg oa = select_curve_op(&dstProfile->B2A.input_curves[i], i);
2837 if (oa.arg) {
2838 *ops++ = oa.op;
2839 *args++ = oa.arg;
2840 }
2841 }
2842 }
2843
2844 if (dstProfile->B2A.matrix_channels == 3) {
2845 static const skcms_Matrix3x4 I = {{
2846 {1,0,0,0},
2847 {0,1,0,0},
2848 {0,0,1,0},
2849 }};
2850 if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2851 *ops++ = Op_matrix_3x4;
2852 *args++ = &dstProfile->B2A.matrix;
2853 }
2854
2855 for (int i = 0; i < 3; i++) {
2856 OpAndArg oa = select_curve_op(&dstProfile->B2A.matrix_curves[i], i);
2857 if (oa.arg) {
2858 *ops++ = oa.op;
2859 *args++ = oa.arg;
2860 }
2861 }
2862 }
2863
2864 if (dstProfile->B2A.output_channels) {
2865 *ops++ = Op_clamp;
2866 *ops++ = Op_clut_B2A;
2867 *args++ = &dstProfile->B2A;
2868 for (int i = 0; i < (int)dstProfile->B2A.output_channels; i++) {
2869 OpAndArg oa = select_curve_op(&dstProfile->B2A.output_curves[i], i);
2870 if (oa.arg) {
2871 *ops++ = oa.op;
2872 *args++ = oa.arg;
2873 }
2874 }
2875 }
2876 } else {
2877 // This is a TRC destination.
2878 // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2879 // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2880 static const skcms_Matrix3x3 I = {{
2881 { 1.0f, 0.0f, 0.0f },
2882 { 0.0f, 1.0f, 0.0f },
2883 { 0.0f, 0.0f, 1.0f },
2884 }};
2885 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2886
2887 // There's a chance the source and destination gamuts are identical,
2888 // in which case we can skip the gamut transform.
2889 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2890 // Concat the entire gamut transform into from_xyz,
2891 // now slightly misnamed but it's a handy spot to stash the result.
2892 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2893 *ops++ = Op_matrix_3x3;
2894 *args++ = &from_xyz;
2895 }
2896
2897 // Encode back to dst RGB using its parametric transfer functions.
2898 for (int i = 0; i < 3; i++) {
2899 OpAndArg oa = select_curve_op(dst_curves+i, i);
2900 if (oa.arg) {
2901 assert (oa.op != Op_table_r &&
2902 oa.op != Op_table_g &&
2903 oa.op != Op_table_b &&
2904 oa.op != Op_table_a);
2905 *ops++ = oa.op;
2906 *args++ = oa.arg;
2907 }
2908 }
2909 }
2910 }
2911
2912 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2913 // not just to values that fit in [0,1].
2914 //
2915 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2916 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2917 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2918 *ops++ = Op_clamp;
2919 }
2920
2921 if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2922 // Photoshop creates CMYK images as inverse CMYK.
2923 // These happen to be the only ones we've _ever_ seen.
2924 *ops++ = Op_invert;
2925
2926 // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2927 dstAlpha = skcms_AlphaFormat_Unpremul;
2928 }
2929
2930 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2931 *ops++ = Op_force_opaque;
2932 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2933 *ops++ = Op_premul;
2934 }
2935 if (dstFmt & 1) {
2936 *ops++ = Op_swap_rb;
2937 }
2938 switch (dstFmt >> 1) {
2939 default: return false;
2940 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2941 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2942 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2943 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2944 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2945 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2946 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2947 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2948 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2949 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2950 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2951 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2952 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2953 case skcms_PixelFormat_RGB_101010x_XR >> 1: *ops++ = Op_store_101010x_XR; break;
2954 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2955 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2956 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2957 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2958
2959 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2960 *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction();
2961 *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction();
2962 *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction();
2963 *ops++ = Op_store_8888;
2964 break;
2965 }
2966
2967 auto run = baseline::run_program;
2968 #if defined(TEST_FOR_HSW)
2969 switch (cpu_type()) {
2970 case CpuType::None: break;
2971 case CpuType::HSW: run = hsw::run_program; break;
2972 case CpuType::SKX: run = hsw::run_program; break;
2973 }
2974 #endif
2975 #if defined(TEST_FOR_SKX)
2976 switch (cpu_type()) {
2977 case CpuType::None: break;
2978 case CpuType::HSW: break;
2979 case CpuType::SKX: run = skx::run_program; break;
2980 }
2981 #endif
2982 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2983 return true;
2984 }
2985
assert_usable_as_destination(const skcms_ICCProfile * profile)2986 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2987 #if defined(NDEBUG)
2988 (void)profile;
2989 #else
2990 skcms_Matrix3x3 fromXYZD50;
2991 skcms_TransferFunction invR, invG, invB;
2992 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2993 #endif
2994 }
2995
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2996 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2997 if (!profile->has_B2A) {
2998 skcms_Matrix3x3 fromXYZD50;
2999 if (!profile->has_trc || !profile->has_toXYZD50
3000 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
3001 return false;
3002 }
3003
3004 skcms_TransferFunction tf[3];
3005 for (int i = 0; i < 3; i++) {
3006 skcms_TransferFunction inv;
3007 if (profile->trc[i].table_entries == 0
3008 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
3009 tf[i] = profile->trc[i].parametric;
3010 continue;
3011 }
3012
3013 float max_error;
3014 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
3015 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
3016 return false;
3017 }
3018 }
3019
3020 for (int i = 0; i < 3; ++i) {
3021 profile->trc[i].table_entries = 0;
3022 profile->trc[i].parametric = tf[i];
3023 }
3024 }
3025 assert_usable_as_destination(profile);
3026 return true;
3027 }
3028
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)3029 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
3030 // Call skcms_MakeUsableAsDestination() with B2A disabled;
3031 // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
3032 skcms_ICCProfile result = *profile;
3033 result.has_B2A = false;
3034 if (!skcms_MakeUsableAsDestination(&result)) {
3035 return false;
3036 }
3037
3038 // Of the three, pick the transfer function that best fits the other two.
3039 int best_tf = 0;
3040 float min_max_error = INFINITY_;
3041 for (int i = 0; i < 3; i++) {
3042 skcms_TransferFunction inv;
3043 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
3044 return false;
3045 }
3046
3047 float err = 0;
3048 for (int j = 0; j < 3; ++j) {
3049 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
3050 }
3051 if (min_max_error > err) {
3052 min_max_error = err;
3053 best_tf = i;
3054 }
3055 }
3056
3057 for (int i = 0; i < 3; i++) {
3058 result.trc[i].parametric = result.trc[best_tf].parametric;
3059 }
3060
3061 *profile = result;
3062 assert_usable_as_destination(profile);
3063 return true;
3064 }
3065