1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #if defined(__ARM_NEON)
17 #include <arm_neon.h>
18 #elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33 #endif
34
35 static bool runtime_cpu_detection = true;
skcms_DisableRuntimeCPUDetection()36 void skcms_DisableRuntimeCPUDetection() {
37 runtime_cpu_detection = false;
38 }
39
40 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42 //
43 // Please do not use sizeof() directly, and size_t only when required.
44 // (We have no way of enforcing these requests...)
45 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46
47 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49
50 static const union {
51 uint32_t bits;
52 float f;
53 } inf_ = { 0x7f800000 };
54 #define INFINITY_ inf_.f
55
56 #if defined(__clang__) || defined(__GNUC__)
57 #define small_memcpy __builtin_memcpy
58 #else
59 #define small_memcpy memcpy
60 #endif
61
log2f_(float x)62 static float log2f_(float x) {
63 // The first approximation of log2(x) is its exponent 'e', minus 127.
64 int32_t bits;
65 small_memcpy(&bits, &x, sizeof(bits));
66
67 float e = (float)bits * (1.0f / (1<<23));
68
69 // If we use the mantissa too we can refine the error signficantly.
70 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71 float m;
72 small_memcpy(&m, &m_bits, sizeof(m));
73
74 return (e - 124.225514990f
75 - 1.498030302f*m
76 - 1.725879990f/(0.3520887068f + m));
77 }
logf_(float x)78 static float logf_(float x) {
79 const float ln2 = 0.69314718f;
80 return ln2*log2f_(x);
81 }
82
exp2f_(float x)83 static float exp2f_(float x) {
84 float fract = x - floorf_(x);
85
86 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
87 - 1.490129070f*fract
88 + 27.728023300f/(4.84252568f - fract));
89
90 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
91 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
92 // Negative values are effectively underflow - we'll end up returning a (different) negative
93 // value, which makes no sense. So clamp to zero.
94 if (fbits >= (float)INT_MAX) {
95 return INFINITY_;
96 } else if (fbits < 0) {
97 return 0;
98 }
99
100 int32_t bits = (int32_t)fbits;
101 small_memcpy(&x, &bits, sizeof(x));
102 return x;
103 }
104
105 // Not static, as it's used by some test tools.
powf_(float x,float y)106 float powf_(float x, float y) {
107 assert (x >= 0);
108 return (x == 0) || (x == 1) ? x
109 : exp2f_(log2f_(x) * y);
110 }
111
expf_(float x)112 static float expf_(float x) {
113 const float log2_e = 1.4426950408889634074f;
114 return exp2f_(log2_e * x);
115 }
116
fmaxf_(float x,float y)117 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)118 static float fminf_(float x, float y) { return x < y ? x : y; }
119
isfinitef_(float x)120 static bool isfinitef_(float x) { return 0 == x*0; }
121
minus_1_ulp(float x)122 static float minus_1_ulp(float x) {
123 int32_t bits;
124 memcpy(&bits, &x, sizeof(bits));
125 bits = bits - 1;
126 memcpy(&x, &bits, sizeof(bits));
127 return x;
128 }
129
130 // Most transfer functions we work with are sRGBish.
131 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
132 // and repurpose the other fields to hold the parameters of the HDR functions.
133 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
134 struct TF_PQish { float A,B,C,D,E,F; };
135 struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
136 // We didn't originally support a scale factor K for HLG, and instead just stored 0 in
137 // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
138 // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
139
TFKind_marker(TFKind kind)140 static float TFKind_marker(TFKind kind) {
141 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
142 return -(float)kind;
143 }
144
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)145 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
146 , TF_HLGish* hlg = nullptr) {
147 if (tf.g < 0 && (int)tf.g == tf.g) {
148 // TODO: soundness checks for PQ/HLG like we do for sRGBish?
149 switch ((int)tf.g) {
150 case -PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
151 case -HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
152 case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
153 }
154 return Bad;
155 }
156
157 // Basic soundness checks for sRGBish transfer functions.
158 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
159 // a,c,d,g should be non-negative to make any sense.
160 && tf.a >= 0
161 && tf.c >= 0
162 && tf.d >= 0
163 && tf.g >= 0
164 // Raising a negative value to a fractional tf->g produces complex numbers.
165 && tf.a * tf.d + tf.b >= 0) {
166 return sRGBish;
167 }
168
169 return Bad;
170 }
171
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)172 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
173 return classify(*tf) == sRGBish;
174 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)175 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
176 return classify(*tf) == PQish;
177 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)178 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
179 return classify(*tf) == HLGish;
180 }
181
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)182 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
183 float A, float B, float C,
184 float D, float E, float F) {
185 *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
186 assert(skcms_TransferFunction_isPQish(tf));
187 return true;
188 }
189
skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction * tf,float K,float R,float G,float a,float b,float c)190 bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
191 float K, float R, float G,
192 float a, float b, float c) {
193 *tf = { TFKind_marker(HLGish), R,G, a,b,c, K-1.0f };
194 assert(skcms_TransferFunction_isHLGish(tf));
195 return true;
196 }
197
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)198 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
199 float sign = x < 0 ? -1.0f : 1.0f;
200 x *= sign;
201
202 TF_PQish pq;
203 TF_HLGish hlg;
204 switch (classify(*tf, &pq, &hlg)) {
205 case Bad: break;
206
207 case HLGish: {
208 const float K = hlg.K_minus_1 + 1.0f;
209 return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
210 : expf_((x-hlg.c)*hlg.a) + hlg.b);
211 }
212
213 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
214 case HLGinvish: {
215 const float K = hlg.K_minus_1 + 1.0f;
216 x /= K;
217 return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
218 : hlg.a * logf_(x - hlg.b) + hlg.c);
219 }
220
221
222 case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
223 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
224
225 case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
226 / (pq.D + pq.E * powf_(x, pq.C)),
227 pq.F);
228 }
229 return 0;
230 }
231
232
eval_curve(const skcms_Curve * curve,float x)233 static float eval_curve(const skcms_Curve* curve, float x) {
234 if (curve->table_entries == 0) {
235 return skcms_TransferFunction_eval(&curve->parametric, x);
236 }
237
238 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
239 int lo = (int) ix ,
240 hi = (int)(float)minus_1_ulp(ix + 1.0f);
241 float t = ix - (float)lo;
242
243 float l, h;
244 if (curve->table_8) {
245 l = curve->table_8[lo] * (1/255.0f);
246 h = curve->table_8[hi] * (1/255.0f);
247 } else {
248 uint16_t be_l, be_h;
249 memcpy(&be_l, curve->table_16 + 2*lo, 2);
250 memcpy(&be_h, curve->table_16 + 2*hi, 2);
251 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
252 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
253 l = le_l * (1/65535.0f);
254 h = le_h * (1/65535.0f);
255 }
256 return l + (h-l)*t;
257 }
258
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)259 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
260 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
261 const float dx = 1.0f / (N - 1);
262 float err = 0;
263 for (uint32_t i = 0; i < N; i++) {
264 float x = i * dx,
265 y = eval_curve(curve, x);
266 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
267 }
268 return err;
269 }
270
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)271 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
272 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
273 }
274
275 // Additional ICC signature values that are only used internally
276 enum {
277 // File signature
278 skcms_Signature_acsp = 0x61637370,
279
280 // Tag signatures
281 skcms_Signature_rTRC = 0x72545243,
282 skcms_Signature_gTRC = 0x67545243,
283 skcms_Signature_bTRC = 0x62545243,
284 skcms_Signature_kTRC = 0x6B545243,
285
286 skcms_Signature_rXYZ = 0x7258595A,
287 skcms_Signature_gXYZ = 0x6758595A,
288 skcms_Signature_bXYZ = 0x6258595A,
289
290 skcms_Signature_A2B0 = 0x41324230,
291 skcms_Signature_B2A0 = 0x42324130,
292
293 skcms_Signature_CHAD = 0x63686164,
294 skcms_Signature_WTPT = 0x77747074,
295
296 // Type signatures
297 skcms_Signature_curv = 0x63757276,
298 skcms_Signature_mft1 = 0x6D667431,
299 skcms_Signature_mft2 = 0x6D667432,
300 skcms_Signature_mAB = 0x6D414220,
301 skcms_Signature_mBA = 0x6D424120,
302 skcms_Signature_para = 0x70617261,
303 skcms_Signature_sf32 = 0x73663332,
304 // XYZ is also a PCS signature, so it's defined in skcms.h
305 // skcms_Signature_XYZ = 0x58595A20,
306
307 // cicp type signatures. Reference: ICC Chapter 9.2.17
308 skcms_Signature_cicp = 0x63696370,
309 };
310
read_big_u16(const uint8_t * ptr)311 static uint16_t read_big_u16(const uint8_t* ptr) {
312 uint16_t be;
313 memcpy(&be, ptr, sizeof(be));
314 #if defined(_MSC_VER)
315 return _byteswap_ushort(be);
316 #else
317 return __builtin_bswap16(be);
318 #endif
319 }
320
read_big_u32(const uint8_t * ptr)321 static uint32_t read_big_u32(const uint8_t* ptr) {
322 uint32_t be;
323 memcpy(&be, ptr, sizeof(be));
324 #if defined(_MSC_VER)
325 return _byteswap_ulong(be);
326 #else
327 return __builtin_bswap32(be);
328 #endif
329 }
330
read_big_i32(const uint8_t * ptr)331 static int32_t read_big_i32(const uint8_t* ptr) {
332 return (int32_t)read_big_u32(ptr);
333 }
334
read_big_fixed(const uint8_t * ptr)335 static float read_big_fixed(const uint8_t* ptr) {
336 return read_big_i32(ptr) * (1.0f / 65536.0f);
337 }
338
339 // Maps to an in-memory profile so that fields line up to the locations specified
340 // in ICC.1:2010, section 7.2
341 typedef struct {
342 uint8_t size [ 4];
343 uint8_t cmm_type [ 4];
344 uint8_t version [ 4];
345 uint8_t profile_class [ 4];
346 uint8_t data_color_space [ 4];
347 uint8_t pcs [ 4];
348 uint8_t creation_date_time [12];
349 uint8_t signature [ 4];
350 uint8_t platform [ 4];
351 uint8_t flags [ 4];
352 uint8_t device_manufacturer [ 4];
353 uint8_t device_model [ 4];
354 uint8_t device_attributes [ 8];
355 uint8_t rendering_intent [ 4];
356 uint8_t illuminant_X [ 4];
357 uint8_t illuminant_Y [ 4];
358 uint8_t illuminant_Z [ 4];
359 uint8_t creator [ 4];
360 uint8_t profile_id [16];
361 uint8_t reserved [28];
362 uint8_t tag_count [ 4]; // Technically not part of header, but required
363 } header_Layout;
364
365 typedef struct {
366 uint8_t signature [4];
367 uint8_t offset [4];
368 uint8_t size [4];
369 } tag_Layout;
370
get_tag_table(const skcms_ICCProfile * profile)371 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
372 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
373 }
374
375 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
376 // use of the type is for the CHAD tag that stores exactly nine values.
377 typedef struct {
378 uint8_t type [ 4];
379 uint8_t reserved [ 4];
380 uint8_t values [36];
381 } sf32_Layout;
382
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)383 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
384 skcms_ICCTag tag;
385 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
386 return false;
387 }
388
389 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
390 return false;
391 }
392
393 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
394 const uint8_t* values = sf32Tag->values;
395 for (int r = 0; r < 3; ++r)
396 for (int c = 0; c < 3; ++c, values += 4) {
397 m->vals[r][c] = read_big_fixed(values);
398 }
399 return true;
400 }
401
402 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
403 // the type are for tags/data that store exactly one triple.
404 typedef struct {
405 uint8_t type [4];
406 uint8_t reserved [4];
407 uint8_t X [4];
408 uint8_t Y [4];
409 uint8_t Z [4];
410 } XYZ_Layout;
411
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)412 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
413 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
414 return false;
415 }
416
417 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
418
419 *x = read_big_fixed(xyzTag->X);
420 *y = read_big_fixed(xyzTag->Y);
421 *z = read_big_fixed(xyzTag->Z);
422 return true;
423 }
424
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])425 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
426 skcms_ICCTag tag;
427 return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
428 read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
429 }
430
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)431 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
432 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
433 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
434 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
435 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
436 }
437
438 typedef struct {
439 uint8_t type [4];
440 uint8_t reserved_a [4];
441 uint8_t function_type [2];
442 uint8_t reserved_b [2];
443 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
444 } para_Layout;
445
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)446 static bool read_curve_para(const uint8_t* buf, uint32_t size,
447 skcms_Curve* curve, uint32_t* curve_size) {
448 if (size < SAFE_FIXED_SIZE(para_Layout)) {
449 return false;
450 }
451
452 const para_Layout* paraTag = (const para_Layout*)buf;
453
454 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
455 uint16_t function_type = read_big_u16(paraTag->function_type);
456 if (function_type > kGABCDEF) {
457 return false;
458 }
459
460 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
461 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
462 return false;
463 }
464
465 if (curve_size) {
466 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
467 }
468
469 curve->table_entries = 0;
470 curve->parametric.a = 1.0f;
471 curve->parametric.b = 0.0f;
472 curve->parametric.c = 0.0f;
473 curve->parametric.d = 0.0f;
474 curve->parametric.e = 0.0f;
475 curve->parametric.f = 0.0f;
476 curve->parametric.g = read_big_fixed(paraTag->variable);
477
478 switch (function_type) {
479 case kGAB:
480 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
481 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
482 if (curve->parametric.a == 0) {
483 return false;
484 }
485 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
486 break;
487 case kGABC:
488 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
489 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
490 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
491 if (curve->parametric.a == 0) {
492 return false;
493 }
494 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
495 curve->parametric.f = curve->parametric.e;
496 break;
497 case kGABCD:
498 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
499 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
500 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
501 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
502 break;
503 case kGABCDEF:
504 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
505 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
506 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
507 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
508 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
509 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
510 break;
511 }
512 return skcms_TransferFunction_isSRGBish(&curve->parametric);
513 }
514
515 typedef struct {
516 uint8_t type [4];
517 uint8_t reserved [4];
518 uint8_t value_count [4];
519 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
520 } curv_Layout;
521
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)522 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
523 skcms_Curve* curve, uint32_t* curve_size) {
524 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
525 return false;
526 }
527
528 const curv_Layout* curvTag = (const curv_Layout*)buf;
529
530 uint32_t value_count = read_big_u32(curvTag->value_count);
531 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
532 return false;
533 }
534
535 if (curve_size) {
536 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
537 }
538
539 if (value_count < 2) {
540 curve->table_entries = 0;
541 curve->parametric.a = 1.0f;
542 curve->parametric.b = 0.0f;
543 curve->parametric.c = 0.0f;
544 curve->parametric.d = 0.0f;
545 curve->parametric.e = 0.0f;
546 curve->parametric.f = 0.0f;
547 if (value_count == 0) {
548 // Empty tables are a shorthand for an identity curve
549 curve->parametric.g = 1.0f;
550 } else {
551 // Single entry tables are a shorthand for simple gamma
552 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
553 }
554 } else {
555 curve->table_8 = nullptr;
556 curve->table_16 = curvTag->variable;
557 curve->table_entries = value_count;
558 }
559
560 return true;
561 }
562
563 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
564 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)565 static bool read_curve(const uint8_t* buf, uint32_t size,
566 skcms_Curve* curve, uint32_t* curve_size) {
567 if (!buf || size < 4 || !curve) {
568 return false;
569 }
570
571 uint32_t type = read_big_u32(buf);
572 if (type == skcms_Signature_para) {
573 return read_curve_para(buf, size, curve, curve_size);
574 } else if (type == skcms_Signature_curv) {
575 return read_curve_curv(buf, size, curve, curve_size);
576 }
577
578 return false;
579 }
580
581 // mft1 and mft2 share a large chunk of data
582 typedef struct {
583 uint8_t type [ 4];
584 uint8_t reserved_a [ 4];
585 uint8_t input_channels [ 1];
586 uint8_t output_channels [ 1];
587 uint8_t grid_points [ 1];
588 uint8_t reserved_b [ 1];
589 uint8_t matrix [36];
590 } mft_CommonLayout;
591
592 typedef struct {
593 mft_CommonLayout common [1];
594
595 uint8_t variable [1/*variable*/];
596 } mft1_Layout;
597
598 typedef struct {
599 mft_CommonLayout common [1];
600
601 uint8_t input_table_entries [2];
602 uint8_t output_table_entries [2];
603 uint8_t variable [1/*variable*/];
604 } mft2_Layout;
605
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)606 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
607 // MFT matrices are applied before the first set of curves, but must be identity unless the
608 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
609 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
610 // field/flag.
611 a2b->matrix_channels = 0;
612 a2b-> input_channels = mftTag-> input_channels[0];
613 a2b->output_channels = mftTag->output_channels[0];
614
615 // We require exactly three (ie XYZ/Lab/RGB) output channels
616 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
617 return false;
618 }
619 // We require at least one, and no more than four (ie CMYK) input channels
620 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
621 return false;
622 }
623
624 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
625 a2b->grid_points[i] = mftTag->grid_points[0];
626 }
627 // The grid only makes sense with at least two points along each axis
628 if (a2b->grid_points[0] < 2) {
629 return false;
630 }
631 return true;
632 }
633
634 // All as the A2B version above, except where noted.
read_mft_common(const mft_CommonLayout * mftTag,skcms_B2A * b2a)635 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
636 // Same as A2B.
637 b2a->matrix_channels = 0;
638 b2a-> input_channels = mftTag-> input_channels[0];
639 b2a->output_channels = mftTag->output_channels[0];
640
641
642 // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
643 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
644 return false;
645 }
646 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
647 return false;
648 }
649
650 // Same as A2B.
651 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
652 b2a->grid_points[i] = mftTag->grid_points[0];
653 }
654 if (b2a->grid_points[0] < 2) {
655 return false;
656 }
657 return true;
658 }
659
660 template <typename A2B_or_B2A>
init_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,A2B_or_B2A * out)661 static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
662 uint32_t input_table_entries, uint32_t output_table_entries,
663 A2B_or_B2A* out) {
664 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
665 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
666 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
667
668 // [input|output]_channels are <= 4, so still no overflow
669 uint32_t byte_len_all_input_tables = out->input_channels * byte_len_per_input_table;
670 uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
671
672 uint64_t grid_size = out->output_channels * byte_width;
673 for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
674 grid_size *= out->grid_points[axis];
675 }
676
677 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
678 return false;
679 }
680
681 for (uint32_t i = 0; i < out->input_channels; ++i) {
682 out->input_curves[i].table_entries = input_table_entries;
683 if (byte_width == 1) {
684 out->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
685 out->input_curves[i].table_16 = nullptr;
686 } else {
687 out->input_curves[i].table_8 = nullptr;
688 out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
689 }
690 }
691
692 if (byte_width == 1) {
693 out->grid_8 = table_base + byte_len_all_input_tables;
694 out->grid_16 = nullptr;
695 } else {
696 out->grid_8 = nullptr;
697 out->grid_16 = table_base + byte_len_all_input_tables;
698 }
699
700 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
701 for (uint32_t i = 0; i < out->output_channels; ++i) {
702 out->output_curves[i].table_entries = output_table_entries;
703 if (byte_width == 1) {
704 out->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
705 out->output_curves[i].table_16 = nullptr;
706 } else {
707 out->output_curves[i].table_8 = nullptr;
708 out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
709 }
710 }
711
712 return true;
713 }
714
715 template <typename A2B_or_B2A>
read_tag_mft1(const skcms_ICCTag * tag,A2B_or_B2A * out)716 static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
717 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
718 return false;
719 }
720
721 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
722 if (!read_mft_common(mftTag->common, out)) {
723 return false;
724 }
725
726 uint32_t input_table_entries = 256;
727 uint32_t output_table_entries = 256;
728
729 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
730 input_table_entries, output_table_entries, out);
731 }
732
733 template <typename A2B_or_B2A>
read_tag_mft2(const skcms_ICCTag * tag,A2B_or_B2A * out)734 static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
735 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
736 return false;
737 }
738
739 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
740 if (!read_mft_common(mftTag->common, out)) {
741 return false;
742 }
743
744 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
745 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
746
747 // ICC spec mandates that 2 <= table_entries <= 4096
748 if (input_table_entries < 2 || input_table_entries > 4096 ||
749 output_table_entries < 2 || output_table_entries > 4096) {
750 return false;
751 }
752
753 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
754 input_table_entries, output_table_entries, out);
755 }
756
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)757 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
758 uint32_t num_curves, skcms_Curve* curves) {
759 for (uint32_t i = 0; i < num_curves; ++i) {
760 if (curve_offset > size) {
761 return false;
762 }
763
764 uint32_t curve_bytes;
765 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
766 return false;
767 }
768
769 if (curve_bytes > UINT32_MAX - 3) {
770 return false;
771 }
772 curve_bytes = (curve_bytes + 3) & ~3U;
773
774 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
775 curve_offset = (uint32_t)new_offset_64;
776 if (new_offset_64 != curve_offset) {
777 return false;
778 }
779 }
780
781 return true;
782 }
783
784 // mAB and mBA tags use the same encoding, including color lookup tables.
785 typedef struct {
786 uint8_t type [ 4];
787 uint8_t reserved_a [ 4];
788 uint8_t input_channels [ 1];
789 uint8_t output_channels [ 1];
790 uint8_t reserved_b [ 2];
791 uint8_t b_curve_offset [ 4];
792 uint8_t matrix_offset [ 4];
793 uint8_t m_curve_offset [ 4];
794 uint8_t clut_offset [ 4];
795 uint8_t a_curve_offset [ 4];
796 } mAB_or_mBA_Layout;
797
798 typedef struct {
799 uint8_t grid_points [16];
800 uint8_t grid_byte_width [ 1];
801 uint8_t reserved [ 3];
802 uint8_t variable [1/*variable*/];
803 } CLUT_Layout;
804
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)805 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
806 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
807 return false;
808 }
809
810 const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
811
812 a2b->input_channels = mABTag->input_channels[0];
813 a2b->output_channels = mABTag->output_channels[0];
814
815 // We require exactly three (ie XYZ/Lab/RGB) output channels
816 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
817 return false;
818 }
819 // We require no more than four (ie CMYK) input channels
820 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
821 return false;
822 }
823
824 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
825 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
826 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
827 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
828 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
829
830 // "B" curves must be present
831 if (0 == b_curve_offset) {
832 return false;
833 }
834
835 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
836 a2b->output_curves)) {
837 return false;
838 }
839
840 // "M" curves and Matrix must be used together
841 if (0 != m_curve_offset) {
842 if (0 == matrix_offset) {
843 return false;
844 }
845 a2b->matrix_channels = a2b->output_channels;
846 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
847 a2b->matrix_curves)) {
848 return false;
849 }
850
851 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
852 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
853 return false;
854 }
855 float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
856 const uint8_t* mtx_buf = tag->buf + matrix_offset;
857 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
858 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
859 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
860 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
861 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
862 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
863 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
864 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
865 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
866 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
867 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
868 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
869 } else {
870 if (0 != matrix_offset) {
871 return false;
872 }
873 a2b->matrix_channels = 0;
874 }
875
876 // "A" curves and CLUT must be used together
877 if (0 != a_curve_offset) {
878 if (0 == clut_offset) {
879 return false;
880 }
881 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
882 a2b->input_curves)) {
883 return false;
884 }
885
886 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
887 return false;
888 }
889 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
890
891 if (clut->grid_byte_width[0] == 1) {
892 a2b->grid_8 = clut->variable;
893 a2b->grid_16 = nullptr;
894 } else if (clut->grid_byte_width[0] == 2) {
895 a2b->grid_8 = nullptr;
896 a2b->grid_16 = clut->variable;
897 } else {
898 return false;
899 }
900
901 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; // the payload
902 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
903 a2b->grid_points[i] = clut->grid_points[i];
904 // The grid only makes sense with at least two points along each axis
905 if (a2b->grid_points[i] < 2) {
906 return false;
907 }
908 grid_size *= a2b->grid_points[i];
909 }
910 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
911 return false;
912 }
913 } else {
914 if (0 != clut_offset) {
915 return false;
916 }
917
918 // If there is no CLUT, the number of input and output channels must match
919 if (a2b->input_channels != a2b->output_channels) {
920 return false;
921 }
922
923 // Zero out the number of input channels to signal that we're skipping this stage
924 a2b->input_channels = 0;
925 }
926
927 return true;
928 }
929
930 // Exactly the same as read_tag_mab(), except where there are comments.
931 // TODO: refactor the two to eliminate common code?
read_tag_mba(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)932 static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
933 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
934 return false;
935 }
936
937 const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
938
939 b2a->input_channels = mBATag->input_channels[0];
940 b2a->output_channels = mBATag->output_channels[0];
941
942 // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
943 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
944 return false;
945 }
946 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
947 return false;
948 }
949
950 uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
951 uint32_t matrix_offset = read_big_u32(mBATag->matrix_offset);
952 uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
953 uint32_t clut_offset = read_big_u32(mBATag->clut_offset);
954 uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
955
956 if (0 == b_curve_offset) {
957 return false;
958 }
959
960 // "B" curves are our inputs, not outputs.
961 if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
962 b2a->input_curves)) {
963 return false;
964 }
965
966 if (0 != m_curve_offset) {
967 if (0 == matrix_offset) {
968 return false;
969 }
970 // Matrix channels is tied to input_channels (3), not output_channels.
971 b2a->matrix_channels = b2a->input_channels;
972
973 if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
974 b2a->matrix_curves)) {
975 return false;
976 }
977
978 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
979 return false;
980 }
981 float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f; // TODO: understand
982 const uint8_t* mtx_buf = tag->buf + matrix_offset;
983 b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
984 b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
985 b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
986 b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
987 b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
988 b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
989 b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
990 b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
991 b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
992 b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
993 b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
994 b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
995 } else {
996 if (0 != matrix_offset) {
997 return false;
998 }
999 b2a->matrix_channels = 0;
1000 }
1001
1002 if (0 != a_curve_offset) {
1003 if (0 == clut_offset) {
1004 return false;
1005 }
1006
1007 // "A" curves are our output, not input.
1008 if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1009 b2a->output_curves)) {
1010 return false;
1011 }
1012
1013 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1014 return false;
1015 }
1016 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1017
1018 if (clut->grid_byte_width[0] == 1) {
1019 b2a->grid_8 = clut->variable;
1020 b2a->grid_16 = nullptr;
1021 } else if (clut->grid_byte_width[0] == 2) {
1022 b2a->grid_8 = nullptr;
1023 b2a->grid_16 = clut->variable;
1024 } else {
1025 return false;
1026 }
1027
1028 uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1029 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1030 b2a->grid_points[i] = clut->grid_points[i];
1031 if (b2a->grid_points[i] < 2) {
1032 return false;
1033 }
1034 grid_size *= b2a->grid_points[i];
1035 }
1036 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1037 return false;
1038 }
1039 } else {
1040 if (0 != clut_offset) {
1041 return false;
1042 }
1043
1044 if (b2a->input_channels != b2a->output_channels) {
1045 return false;
1046 }
1047
1048 // Zero out *output* channels to skip this stage.
1049 b2a->output_channels = 0;
1050 }
1051 return true;
1052 }
1053
1054 // If you pass f, we'll fit a possibly-non-zero value for *f.
1055 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)1056 static int fit_linear(const skcms_Curve* curve, int N, float tol,
1057 float* c, float* d, float* f = nullptr) {
1058 assert(N > 1);
1059 // We iteratively fit the first points to the TF's linear piece.
1060 // We want the cx + f line to pass through the first and last points we fit exactly.
1061 //
1062 // As we walk along the points we find the minimum and maximum slope of the line before the
1063 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
1064 // emtpy, when we definitely can't add any more points.
1065 //
1066 // Some points' error intervals may intersect the running interval but not lie fully
1067 // within it. So we keep track of the last point we saw that is a valid end point candidate,
1068 // and once the search is done, back up to build the line through *that* point.
1069 const float dx = 1.0f / (N - 1);
1070
1071 int lin_points = 1;
1072
1073 float f_zero = 0.0f;
1074 if (f) {
1075 *f = eval_curve(curve, 0);
1076 } else {
1077 f = &f_zero;
1078 }
1079
1080
1081 float slope_min = -INFINITY_;
1082 float slope_max = +INFINITY_;
1083 for (int i = 1; i < N; ++i) {
1084 float x = i * dx;
1085 float y = eval_curve(curve, x);
1086
1087 float slope_max_i = (y + tol - *f) / x,
1088 slope_min_i = (y - tol - *f) / x;
1089 if (slope_max_i < slope_min || slope_max < slope_min_i) {
1090 // Slope intervals would no longer overlap.
1091 break;
1092 }
1093 slope_max = fminf_(slope_max, slope_max_i);
1094 slope_min = fmaxf_(slope_min, slope_min_i);
1095
1096 float cur_slope = (y - *f) / x;
1097 if (slope_min <= cur_slope && cur_slope <= slope_max) {
1098 lin_points = i + 1;
1099 *c = cur_slope;
1100 }
1101 }
1102
1103 // Set D to the last point that met our tolerance.
1104 *d = (lin_points - 1) * dx;
1105 return lin_points;
1106 }
1107
1108 // If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
canonicalize_identity(skcms_Curve * curve)1109 static void canonicalize_identity(skcms_Curve* curve) {
1110 if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1111 int N = (int)curve->table_entries;
1112
1113 float c = 0.0f, d = 0.0f, f = 0.0f;
1114 if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
1115 && c == 1.0f
1116 && f == 0.0f) {
1117 curve->table_entries = 0;
1118 curve->table_8 = nullptr;
1119 curve->table_16 = nullptr;
1120 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
1121 }
1122 }
1123 }
1124
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)1125 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1126 bool ok = false;
1127 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1128 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1129 if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1130 if (!ok) {
1131 return false;
1132 }
1133
1134 if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1135 if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1136 if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1137 if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1138
1139 if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1140 if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1141 if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1142
1143 if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1144 if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1145 if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1146
1147 return true;
1148 }
1149
read_b2a(const skcms_ICCTag * tag,skcms_B2A * b2a,bool pcs_is_xyz)1150 static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1151 bool ok = false;
1152 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1153 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1154 if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1155 if (!ok) {
1156 return false;
1157 }
1158
1159 if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1160 if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1161 if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1162
1163 if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1164 if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1165 if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1166
1167 if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1168 if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1169 if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1170 if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1171
1172 return true;
1173 }
1174
1175 typedef struct {
1176 uint8_t type [ 4];
1177 uint8_t reserved [ 4];
1178 uint8_t colour_primaries [ 1];
1179 uint8_t transfer_characteristics [ 1];
1180 uint8_t matrix_coefficients [ 1];
1181 uint8_t full_range_flag [ 1];
1182 } cicp_Layout;
1183
read_tag_cicp(const skcms_ICCTag * tag,skcms_CICP * cicp)1184 static bool read_tag_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1185 if (tag->type != skcms_Signature_cicp || tag->size < 12) {
1186 return false;
1187 }
1188 cicp_Layout* cicpTag = (cicp_Layout*)tag->buf;
1189 cicp->colour_primaries = cicpTag->colour_primaries[0];
1190 cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1191 cicp->matrix_coefficients = cicpTag->matrix_coefficients[0];
1192 cicp->full_range_flag = cicpTag->full_range_flag[0];
1193 return true;
1194 }
1195
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)1196 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1197 if (!profile || !profile->buffer || !tag) { return; }
1198 if (idx > profile->tag_count) { return; }
1199 const tag_Layout* tags = get_tag_table(profile);
1200 tag->signature = read_big_u32(tags[idx].signature);
1201 tag->size = read_big_u32(tags[idx].size);
1202 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
1203 tag->type = read_big_u32(tag->buf);
1204 }
1205
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1206 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1207 if (!profile || !profile->buffer || !tag) { return false; }
1208 const tag_Layout* tags = get_tag_table(profile);
1209 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1210 if (read_big_u32(tags[i].signature) == sig) {
1211 tag->signature = sig;
1212 tag->size = read_big_u32(tags[i].size);
1213 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
1214 tag->type = read_big_u32(tag->buf);
1215 return true;
1216 }
1217 }
1218 return false;
1219 }
1220
usable_as_src(const skcms_ICCProfile * profile)1221 static bool usable_as_src(const skcms_ICCProfile* profile) {
1222 return profile->has_A2B
1223 || (profile->has_trc && profile->has_toXYZD50);
1224 }
1225
skcms_ParseWithA2BPriority(const void * buf,size_t len,const int priority[],const int priorities,skcms_ICCProfile * profile)1226 bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1227 const int priority[], const int priorities,
1228 skcms_ICCProfile* profile) {
1229 assert(SAFE_SIZEOF(header_Layout) == 132);
1230
1231 if (!profile) {
1232 return false;
1233 }
1234 memset(profile, 0, SAFE_SIZEOF(*profile));
1235
1236 if (len < SAFE_SIZEOF(header_Layout)) {
1237 return false;
1238 }
1239
1240 // Byte-swap all header fields
1241 const header_Layout* header = (const header_Layout*)buf;
1242 profile->buffer = (const uint8_t*)buf;
1243 profile->size = read_big_u32(header->size);
1244 uint32_t version = read_big_u32(header->version);
1245 profile->data_color_space = read_big_u32(header->data_color_space);
1246 profile->pcs = read_big_u32(header->pcs);
1247 uint32_t signature = read_big_u32(header->signature);
1248 float illuminant_X = read_big_fixed(header->illuminant_X);
1249 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1250 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1251 profile->tag_count = read_big_u32(header->tag_count);
1252
1253 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1254 // and major version
1255 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1256 if (signature != skcms_Signature_acsp ||
1257 profile->size > len ||
1258 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1259 (version >> 24) > 4) {
1260 return false;
1261 }
1262
1263 // Validate that illuminant is D50 white
1264 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1265 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1266 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1267 return false;
1268 }
1269
1270 // Validate that all tag entries have sane offset + size
1271 const tag_Layout* tags = get_tag_table(profile);
1272 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1273 uint32_t tag_offset = read_big_u32(tags[i].offset);
1274 uint32_t tag_size = read_big_u32(tags[i].size);
1275 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1276 if (tag_size < 4 || tag_end > profile->size) {
1277 return false;
1278 }
1279 }
1280
1281 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1282 return false;
1283 }
1284
1285 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1286
1287 // Pre-parse commonly used tags.
1288 skcms_ICCTag kTRC;
1289 if (profile->data_color_space == skcms_Signature_Gray &&
1290 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1291 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1292 // Malformed tag
1293 return false;
1294 }
1295 profile->trc[1] = profile->trc[0];
1296 profile->trc[2] = profile->trc[0];
1297 profile->has_trc = true;
1298
1299 if (pcs_is_xyz) {
1300 profile->toXYZD50.vals[0][0] = illuminant_X;
1301 profile->toXYZD50.vals[1][1] = illuminant_Y;
1302 profile->toXYZD50.vals[2][2] = illuminant_Z;
1303 profile->has_toXYZD50 = true;
1304 }
1305 } else {
1306 skcms_ICCTag rTRC, gTRC, bTRC;
1307 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1308 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1309 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1310 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1311 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1312 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1313 // Malformed TRC tags
1314 return false;
1315 }
1316 profile->has_trc = true;
1317 }
1318
1319 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1320 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1321 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1322 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1323 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1324 // Malformed XYZ tags
1325 return false;
1326 }
1327 profile->has_toXYZD50 = true;
1328 }
1329 }
1330
1331 for (int i = 0; i < priorities; i++) {
1332 // enum { perceptual, relative_colormetric, saturation }
1333 if (priority[i] < 0 || priority[i] > 2) {
1334 return false;
1335 }
1336 uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1337 skcms_ICCTag tag;
1338 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1339 if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1340 // Malformed A2B tag
1341 return false;
1342 }
1343 profile->has_A2B = true;
1344 break;
1345 }
1346 }
1347
1348 for (int i = 0; i < priorities; i++) {
1349 // enum { perceptual, relative_colormetric, saturation }
1350 if (priority[i] < 0 || priority[i] > 2) {
1351 return false;
1352 }
1353 uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1354 skcms_ICCTag tag;
1355 if (skcms_GetTagBySignature(profile, sig, &tag)) {
1356 if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1357 // Malformed B2A tag
1358 return false;
1359 }
1360 profile->has_B2A = true;
1361 break;
1362 }
1363 }
1364
1365 skcms_ICCTag cicpTag;
1366 if (skcms_GetTagBySignature(profile, skcms_Signature_cicp, &cicpTag)) {
1367 if (read_tag_cicp(&cicpTag, &profile->cicp)) {
1368 profile->has_CICP = true;
1369 }
1370 }
1371 return usable_as_src(profile);
1372 }
1373
1374
skcms_sRGB_profile()1375 const skcms_ICCProfile* skcms_sRGB_profile() {
1376 static const skcms_ICCProfile sRGB_profile = {
1377 nullptr, // buffer, moot here
1378
1379 0, // size, moot here
1380 skcms_Signature_RGB, // data_color_space
1381 skcms_Signature_XYZ, // pcs
1382 0, // tag count, moot here
1383
1384 // We choose to represent sRGB with its canonical transfer function,
1385 // and with its canonical XYZD50 gamut matrix.
1386 true, // has_trc, followed by the 3 trc curves
1387 {
1388 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1389 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1390 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1391 },
1392
1393 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1394 {{
1395 { 0.436065674f, 0.385147095f, 0.143066406f },
1396 { 0.222488403f, 0.716873169f, 0.060607910f },
1397 { 0.013916016f, 0.097076416f, 0.714096069f },
1398 }},
1399
1400 false, // has_A2B, followed by A2B itself, which we don't care about.
1401 {
1402 0,
1403 {
1404 {{0, {0,0, 0,0,0,0,0}}},
1405 {{0, {0,0, 0,0,0,0,0}}},
1406 {{0, {0,0, 0,0,0,0,0}}},
1407 {{0, {0,0, 0,0,0,0,0}}},
1408 },
1409 {0,0,0,0},
1410 nullptr,
1411 nullptr,
1412
1413 0,
1414 {
1415 {{0, {0,0, 0,0,0,0,0}}},
1416 {{0, {0,0, 0,0,0,0,0}}},
1417 {{0, {0,0, 0,0,0,0,0}}},
1418 },
1419 {{
1420 { 0,0,0,0 },
1421 { 0,0,0,0 },
1422 { 0,0,0,0 },
1423 }},
1424
1425 0,
1426 {
1427 {{0, {0,0, 0,0,0,0,0}}},
1428 {{0, {0,0, 0,0,0,0,0}}},
1429 {{0, {0,0, 0,0,0,0,0}}},
1430 },
1431 },
1432
1433 false, // has_B2A, followed by B2A itself, which we also don't care about.
1434 {
1435 0,
1436 {
1437 {{0, {0,0, 0,0,0,0,0}}},
1438 {{0, {0,0, 0,0,0,0,0}}},
1439 {{0, {0,0, 0,0,0,0,0}}},
1440 },
1441
1442 0,
1443 {{
1444 { 0,0,0,0 },
1445 { 0,0,0,0 },
1446 { 0,0,0,0 },
1447 }},
1448 {
1449 {{0, {0,0, 0,0,0,0,0}}},
1450 {{0, {0,0, 0,0,0,0,0}}},
1451 {{0, {0,0, 0,0,0,0,0}}},
1452 },
1453
1454 0,
1455 {0,0,0,0},
1456 nullptr,
1457 nullptr,
1458 {
1459 {{0, {0,0, 0,0,0,0,0}}},
1460 {{0, {0,0, 0,0,0,0,0}}},
1461 {{0, {0,0, 0,0,0,0,0}}},
1462 {{0, {0,0, 0,0,0,0,0}}},
1463 },
1464 },
1465 };
1466 return &sRGB_profile;
1467 }
1468
skcms_XYZD50_profile()1469 const skcms_ICCProfile* skcms_XYZD50_profile() {
1470 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1471 static const skcms_ICCProfile XYZD50_profile = {
1472 nullptr, // buffer, moot here
1473
1474 0, // size, moot here
1475 skcms_Signature_RGB, // data_color_space
1476 skcms_Signature_XYZ, // pcs
1477 0, // tag count, moot here
1478
1479 true, // has_trc, followed by the 3 trc curves
1480 {
1481 {{0, {1,1, 0,0,0,0,0}}},
1482 {{0, {1,1, 0,0,0,0,0}}},
1483 {{0, {1,1, 0,0,0,0,0}}},
1484 },
1485
1486 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1487 {{
1488 { 1,0,0 },
1489 { 0,1,0 },
1490 { 0,0,1 },
1491 }},
1492
1493 false, // has_A2B, followed by A2B itself, which we don't care about.
1494 {
1495 0,
1496 {
1497 {{0, {0,0, 0,0,0,0,0}}},
1498 {{0, {0,0, 0,0,0,0,0}}},
1499 {{0, {0,0, 0,0,0,0,0}}},
1500 {{0, {0,0, 0,0,0,0,0}}},
1501 },
1502 {0,0,0,0},
1503 nullptr,
1504 nullptr,
1505
1506 0,
1507 {
1508 {{0, {0,0, 0,0,0,0,0}}},
1509 {{0, {0,0, 0,0,0,0,0}}},
1510 {{0, {0,0, 0,0,0,0,0}}},
1511 },
1512 {{
1513 { 0,0,0,0 },
1514 { 0,0,0,0 },
1515 { 0,0,0,0 },
1516 }},
1517
1518 0,
1519 {
1520 {{0, {0,0, 0,0,0,0,0}}},
1521 {{0, {0,0, 0,0,0,0,0}}},
1522 {{0, {0,0, 0,0,0,0,0}}},
1523 },
1524 },
1525
1526 false, // has_B2A, followed by B2A itself, which we also don't care about.
1527 {
1528 0,
1529 {
1530 {{0, {0,0, 0,0,0,0,0}}},
1531 {{0, {0,0, 0,0,0,0,0}}},
1532 {{0, {0,0, 0,0,0,0,0}}},
1533 },
1534
1535 0,
1536 {{
1537 { 0,0,0,0 },
1538 { 0,0,0,0 },
1539 { 0,0,0,0 },
1540 }},
1541 {
1542 {{0, {0,0, 0,0,0,0,0}}},
1543 {{0, {0,0, 0,0,0,0,0}}},
1544 {{0, {0,0, 0,0,0,0,0}}},
1545 },
1546
1547 0,
1548 {0,0,0,0},
1549 nullptr,
1550 nullptr,
1551 {
1552 {{0, {0,0, 0,0,0,0,0}}},
1553 {{0, {0,0, 0,0,0,0,0}}},
1554 {{0, {0,0, 0,0,0,0,0}}},
1555 {{0, {0,0, 0,0,0,0,0}}},
1556 },
1557 },
1558 };
1559
1560 return &XYZD50_profile;
1561 }
1562
skcms_sRGB_TransferFunction()1563 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1564 return &skcms_sRGB_profile()->trc[0].parametric;
1565 }
1566
skcms_sRGB_Inverse_TransferFunction()1567 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1568 static const skcms_TransferFunction sRGB_inv =
1569 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1570 return &sRGB_inv;
1571 }
1572
skcms_Identity_TransferFunction()1573 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1574 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1575 return &identity;
1576 }
1577
1578 const uint8_t skcms_252_random_bytes[] = {
1579 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1580 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1581 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1582 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1583 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1584 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1585 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1586 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1587 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1588 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1589 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1590 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1591 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1592 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1593 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1594 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1595 };
1596
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1597 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1598 // Test for exactly equal profiles first.
1599 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1600 return true;
1601 }
1602
1603 // For now this is the essentially the same strategy we use in test_only.c
1604 // for our skcms_Transform() smoke tests:
1605 // 1) transform A to XYZD50
1606 // 2) transform B to XYZD50
1607 // 3) return true if they're similar enough
1608 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1609
1610 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1611 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1612
1613 // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1614 // to be treated as equal. But CMYK profiles are a totally different ballgame.
1615 const auto CMYK = skcms_Signature_CMYK;
1616 if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1617 return false;
1618 }
1619
1620 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1621 // TODO: working with RGBA_8888 either way is probably fastest.
1622 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1623 size_t npixels = 84;
1624 if (A->data_color_space == skcms_Signature_CMYK) {
1625 fmt = skcms_PixelFormat_RGBA_8888;
1626 npixels = 63;
1627 }
1628
1629 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1630 // use pre-canned results and skip that skcms_Transform() call?
1631 uint8_t dstA[252],
1632 dstB[252];
1633 if (!skcms_Transform(
1634 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1635 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1636 npixels)) {
1637 return false;
1638 }
1639 if (!skcms_Transform(
1640 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1641 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1642 npixels)) {
1643 return false;
1644 }
1645
1646 // TODO: make sure this final check has reasonable codegen.
1647 for (size_t i = 0; i < 252; i++) {
1648 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1649 return false;
1650 }
1651 }
1652 return true;
1653 }
1654
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1655 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1656 const skcms_TransferFunction* inv_tf) {
1657 if (!profile || !profile->has_trc) {
1658 return false;
1659 }
1660
1661 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1662 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1663 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1664 }
1665
is_zero_to_one(float x)1666 static bool is_zero_to_one(float x) {
1667 return 0 <= x && x <= 1;
1668 }
1669
1670 typedef struct { float vals[3]; } skcms_Vector3;
1671
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1672 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1673 skcms_Vector3 dst = {{0,0,0}};
1674 for (int row = 0; row < 3; ++row) {
1675 dst.vals[row] = m->vals[row][0] * v->vals[0]
1676 + m->vals[row][1] * v->vals[1]
1677 + m->vals[row][2] * v->vals[2];
1678 }
1679 return dst;
1680 }
1681
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1682 bool skcms_AdaptToXYZD50(float wx, float wy,
1683 skcms_Matrix3x3* toXYZD50) {
1684 if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1685 !toXYZD50) {
1686 return false;
1687 }
1688
1689 // Assumes that Y is 1.0f.
1690 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1691
1692 // Now convert toXYZ matrix to toXYZD50.
1693 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1694
1695 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1696 // the matrices below. The Bradford method is used by Adobe and is widely considered
1697 // to be the best.
1698 skcms_Matrix3x3 xyz_to_lms = {{
1699 { 0.8951f, 0.2664f, -0.1614f },
1700 { -0.7502f, 1.7135f, 0.0367f },
1701 { 0.0389f, -0.0685f, 1.0296f },
1702 }};
1703 skcms_Matrix3x3 lms_to_xyz = {{
1704 { 0.9869929f, -0.1470543f, 0.1599627f },
1705 { 0.4323053f, 0.5183603f, 0.0492912f },
1706 { -0.0085287f, 0.0400428f, 0.9684867f },
1707 }};
1708
1709 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1710 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1711
1712 *toXYZD50 = {{
1713 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1714 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1715 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1716 }};
1717 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1718 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1719
1720 return true;
1721 }
1722
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1723 bool skcms_PrimariesToXYZD50(float rx, float ry,
1724 float gx, float gy,
1725 float bx, float by,
1726 float wx, float wy,
1727 skcms_Matrix3x3* toXYZD50) {
1728 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1729 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1730 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1731 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1732 !toXYZD50) {
1733 return false;
1734 }
1735
1736 // First, we need to convert xy values (primaries) to XYZ.
1737 skcms_Matrix3x3 primaries = {{
1738 { rx, gx, bx },
1739 { ry, gy, by },
1740 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1741 }};
1742 skcms_Matrix3x3 primaries_inv;
1743 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1744 return false;
1745 }
1746
1747 // Assumes that Y is 1.0f.
1748 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1749 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1750
1751 skcms_Matrix3x3 toXYZ = {{
1752 { XYZ.vals[0], 0, 0 },
1753 { 0, XYZ.vals[1], 0 },
1754 { 0, 0, XYZ.vals[2] },
1755 }};
1756 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1757
1758 skcms_Matrix3x3 DXtoD50;
1759 if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1760 return false;
1761 }
1762
1763 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1764 return true;
1765 }
1766
1767
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1768 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1769 double a00 = src->vals[0][0],
1770 a01 = src->vals[1][0],
1771 a02 = src->vals[2][0],
1772 a10 = src->vals[0][1],
1773 a11 = src->vals[1][1],
1774 a12 = src->vals[2][1],
1775 a20 = src->vals[0][2],
1776 a21 = src->vals[1][2],
1777 a22 = src->vals[2][2];
1778
1779 double b0 = a00*a11 - a01*a10,
1780 b1 = a00*a12 - a02*a10,
1781 b2 = a01*a12 - a02*a11,
1782 b3 = a20,
1783 b4 = a21,
1784 b5 = a22;
1785
1786 double determinant = b0*b5
1787 - b1*b4
1788 + b2*b3;
1789
1790 if (determinant == 0) {
1791 return false;
1792 }
1793
1794 double invdet = 1.0 / determinant;
1795 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1796 return false;
1797 }
1798
1799 b0 *= invdet;
1800 b1 *= invdet;
1801 b2 *= invdet;
1802 b3 *= invdet;
1803 b4 *= invdet;
1804 b5 *= invdet;
1805
1806 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1807 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1808 dst->vals[2][0] = (float)( + b2 );
1809 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1810 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1811 dst->vals[2][1] = (float)( - b1 );
1812 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1813 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1814 dst->vals[2][2] = (float)( + b0 );
1815
1816 for (int r = 0; r < 3; ++r)
1817 for (int c = 0; c < 3; ++c) {
1818 if (!isfinitef_(dst->vals[r][c])) {
1819 return false;
1820 }
1821 }
1822 return true;
1823 }
1824
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1825 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1826 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1827 for (int r = 0; r < 3; r++)
1828 for (int c = 0; c < 3; c++) {
1829 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1830 + A->vals[r][1] * B->vals[1][c]
1831 + A->vals[r][2] * B->vals[2][c];
1832 }
1833 return m;
1834 }
1835
1836 #if defined(__clang__)
1837 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1838 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1839 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1840 TF_PQish pq;
1841 TF_HLGish hlg;
1842 switch (classify(*src, &pq, &hlg)) {
1843 case Bad: return false;
1844 case sRGBish: break; // handled below
1845
1846 case PQish:
1847 *dst = { TFKind_marker(PQish), -pq.A, pq.D, 1.0f/pq.F
1848 , pq.B, -pq.E, 1.0f/pq.C};
1849 return true;
1850
1851 case HLGish:
1852 *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1853 , 1.0f/hlg.a, hlg.b, hlg.c
1854 , hlg.K_minus_1 };
1855 return true;
1856
1857 case HLGinvish:
1858 *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1859 , 1.0f/hlg.a, hlg.b, hlg.c
1860 , hlg.K_minus_1 };
1861 return true;
1862 }
1863
1864 assert (classify(*src) == sRGBish);
1865
1866 // We're inverting this function, solving for x in terms of y.
1867 // y = (cx + f) x < d
1868 // (ax + b)^g + e x ≥ d
1869 // The inverse of this function can be expressed in the same piecewise form.
1870 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1871
1872 // We'll start by finding the new threshold inv.d.
1873 // In principle we should be able to find that by solving for y at x=d from either side.
1874 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1875 float d_l = src->c * src->d + src->f,
1876 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1877 if (fabsf_(d_l - d_r) > 1/512.0f) {
1878 return false;
1879 }
1880 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1881
1882 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1883 if (inv.d > 0) {
1884 // Inverting the linear section is pretty straightfoward:
1885 // y = cx + f
1886 // y - f = cx
1887 // (1/c)y - f/c = x
1888 inv.c = 1.0f/src->c;
1889 inv.f = -src->f/src->c;
1890 }
1891
1892 // The interesting part is inverting the nonlinear section:
1893 // y = (ax + b)^g + e.
1894 // y - e = (ax + b)^g
1895 // (y - e)^1/g = ax + b
1896 // (y - e)^1/g - b = ax
1897 // (1/a)(y - e)^1/g - b/a = x
1898 //
1899 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1900 // let k = (1/a)^g
1901 // (1/a)( y - e)^1/g - b/a = x
1902 // (ky - ke)^1/g - b/a = x
1903
1904 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1905 inv.g = 1.0f / src->g;
1906 inv.a = k;
1907 inv.b = -k * src->e;
1908 inv.e = -src->b / src->a;
1909
1910 // We need to enforce the same constraints here that we do when fitting a curve,
1911 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1912 // of the source function if we're here.
1913
1914 // Just like when fitting the curve, there's really no way to rescue a < 0.
1915 if (inv.a < 0) {
1916 return false;
1917 }
1918 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1919 if (inv.a * inv.d + inv.b < 0) {
1920 inv.b = -inv.a * inv.d;
1921 }
1922
1923 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1924 // where we might still fail here, like non-finite parameter values.
1925 if (classify(inv) != sRGBish) {
1926 return false;
1927 }
1928
1929 assert (inv.a >= 0);
1930 assert (inv.a * inv.d + inv.b >= 0);
1931
1932 // Now in principle we're done.
1933 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1934 // e or f of the inverse, depending on which segment contains src(1.0f).
1935 float s = skcms_TransferFunction_eval(src, 1.0f);
1936 if (!isfinitef_(s)) {
1937 return false;
1938 }
1939
1940 float sign = s < 0 ? -1.0f : 1.0f;
1941 s *= sign;
1942 if (s < inv.d) {
1943 inv.f = 1.0f - sign * inv.c * s;
1944 } else {
1945 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1946 }
1947
1948 *dst = inv;
1949 return classify(*dst) == sRGBish;
1950 }
1951
1952 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1953
1954 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1955 //
1956 // tf(x) = cx + f x < d
1957 // tf(x) = (ax + b)^g + e x ≥ d
1958 //
1959 // When fitting, we add the additional constraint that both pieces meet at d:
1960 //
1961 // cd + f = (ad + b)^g + e
1962 //
1963 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1964 //
1965 // tf(x) = cx + f x < d
1966 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1967 //
1968 // Our overall strategy is then:
1969 // For a couple tolerances,
1970 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1971 // - invert c,d,f
1972 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1973 // (and by constraint, inverted e) to the inverse of the table.
1974 // Return the parameters with least maximum error.
1975 //
1976 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1977 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1978 //
1979 // let y = Table(x)
1980 // r(x) = x - f_inv(y)
1981 //
1982 // ∂r/∂g = ln(ay + b)*(ay + b)^g
1983 // - ln(ad + b)*(ad + b)^g
1984 // ∂r/∂a = yg(ay + b)^(g-1)
1985 // - dg(ad + b)^(g-1)
1986 // ∂r/∂b = g(ay + b)^(g-1)
1987 // - g(ad + b)^(g-1)
1988
1989 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1990 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])1991 static float rg_nonlinear(float x,
1992 const skcms_Curve* curve,
1993 const skcms_TransferFunction* tf,
1994 float dfdP[3]) {
1995 const float y = eval_curve(curve, x);
1996
1997 const float g = tf->g, a = tf->a, b = tf->b,
1998 c = tf->c, d = tf->d, f = tf->f;
1999
2000 const float Y = fmaxf_(a*y + b, 0.0f),
2001 D = a*d + b;
2002 assert (D >= 0);
2003
2004 // The gradient.
2005 dfdP[0] = logf_(Y)*powf_(Y, g)
2006 - logf_(D)*powf_(D, g);
2007 dfdP[1] = y*g*powf_(Y, g-1)
2008 - d*g*powf_(D, g-1);
2009 dfdP[2] = g*powf_(Y, g-1)
2010 - g*powf_(D, g-1);
2011
2012 // The residual.
2013 const float f_inv = powf_(Y, g)
2014 - powf_(D, g)
2015 + c*d + f;
2016 return x - f_inv;
2017 }
2018
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)2019 static bool gauss_newton_step(const skcms_Curve* curve,
2020 skcms_TransferFunction* tf,
2021 float x0, float dx, int N) {
2022 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2023 //
2024 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2025 //
2026 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2027 // where r(P) is the residual vector
2028 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
2029 //
2030 // Let's review the shape of each of these expressions:
2031 // r(P) is [N x 1], a column vector with one entry per value of x tested
2032 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
2033 // Jf^T is [3 x N], the transpose of Jf
2034 //
2035 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2036 // and so is its inverse (Jf^T Jf)^-1
2037 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2038 //
2039 // Our implementation strategy to get to the final ∆P is
2040 // 1) evaluate Jf^T Jf, call that lhs
2041 // 2) evaluate Jf^T r(P), call that rhs
2042 // 3) invert lhs
2043 // 4) multiply inverse lhs by rhs
2044 //
2045 // This is a friendly implementation strategy because we don't have to have any
2046 // buffers that scale with N, and equally nice don't have to perform any matrix
2047 // operations that are variable size.
2048 //
2049 // Other implementation strategies could trade this off, e.g. evaluating the
2050 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2051 // the residuals. That would probably require implementing singular value
2052 // decomposition, and would create a [3 x N] matrix to be multiplied by the
2053 // [N x 1] residual vector, but on the upside I think that'd eliminate the
2054 // possibility of this gauss_newton_step() function ever failing.
2055
2056 // 0) start off with lhs and rhs safely zeroed.
2057 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2058 skcms_Vector3 rhs = { {0,0,0} };
2059
2060 // 1,2) evaluate lhs and evaluate rhs
2061 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2062 // so we'll have to update lhs and rhs at the same time.
2063 for (int i = 0; i < N; i++) {
2064 float x = x0 + i*dx;
2065
2066 float dfdP[3] = {0,0,0};
2067 float resid = rg_nonlinear(x,curve,tf, dfdP);
2068
2069 for (int r = 0; r < 3; r++) {
2070 for (int c = 0; c < 3; c++) {
2071 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2072 }
2073 rhs.vals[r] += dfdP[r] * resid;
2074 }
2075 }
2076
2077 // If any of the 3 P parameters are unused, this matrix will be singular.
2078 // Detect those cases and fix them up to indentity instead, so we can invert.
2079 for (int k = 0; k < 3; k++) {
2080 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2081 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2082 lhs.vals[k][k] = 1;
2083 }
2084 }
2085
2086 // 3) invert lhs
2087 skcms_Matrix3x3 lhs_inv;
2088 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2089 return false;
2090 }
2091
2092 // 4) multiply inverse lhs by rhs
2093 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2094 tf->g += dP.vals[0];
2095 tf->a += dP.vals[1];
2096 tf->b += dP.vals[2];
2097 return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2098 }
2099
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)2100 static float max_roundtrip_error_checked(const skcms_Curve* curve,
2101 const skcms_TransferFunction* tf_inv) {
2102 skcms_TransferFunction tf;
2103 if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
2104 return INFINITY_;
2105 }
2106
2107 skcms_TransferFunction tf_inv_again;
2108 if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2109 return INFINITY_;
2110 }
2111
2112 return skcms_MaxRoundtripError(curve, &tf_inv_again);
2113 }
2114
2115 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)2116 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2117 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2118 auto fixup_tf = [tf]() {
2119 // a must be non-negative. That ensures the function is monotonically increasing.
2120 // We don't really know how to fix up a if it goes negative.
2121 if (tf->a < 0) {
2122 return false;
2123 }
2124 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2125 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2126 if (tf->a * tf->d + tf->b < 0) {
2127 tf->b = -tf->a * tf->d;
2128 }
2129 assert (tf->a >= 0 &&
2130 tf->a * tf->d + tf->b >= 0);
2131
2132 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2133 // parameter so we can guarantee this.
2134 tf->e = tf->c*tf->d + tf->f
2135 - powf_(tf->a*tf->d + tf->b, tf->g);
2136
2137 return true;
2138 };
2139
2140 if (!fixup_tf()) {
2141 return false;
2142 }
2143
2144 // No matter where we start, dx should always represent N even steps from 0 to 1.
2145 const float dx = 1.0f / (N-1);
2146
2147 skcms_TransferFunction best_tf = *tf;
2148 float best_max_error = INFINITY_;
2149
2150 // Need this or several curves get worse... *sigh*
2151 float init_error = max_roundtrip_error_checked(curve, tf);
2152 if (init_error < best_max_error) {
2153 best_max_error = init_error;
2154 best_tf = *tf;
2155 }
2156
2157 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2158 for (int j = 0; j < 8; j++) {
2159 if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
2160 *tf = best_tf;
2161 return isfinitef_(best_max_error);
2162 }
2163
2164 float max_error = max_roundtrip_error_checked(curve, tf);
2165 if (max_error < best_max_error) {
2166 best_max_error = max_error;
2167 best_tf = *tf;
2168 }
2169 }
2170
2171 *tf = best_tf;
2172 return isfinitef_(best_max_error);
2173 }
2174
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)2175 bool skcms_ApproximateCurve(const skcms_Curve* curve,
2176 skcms_TransferFunction* approx,
2177 float* max_error) {
2178 if (!curve || !approx || !max_error) {
2179 return false;
2180 }
2181
2182 if (curve->table_entries == 0) {
2183 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2184 return false;
2185 }
2186
2187 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2188 // We need at least two points, and must put some reasonable cap on the maximum number.
2189 return false;
2190 }
2191
2192 int N = (int)curve->table_entries;
2193 const float dx = 1.0f / (N - 1);
2194
2195 *max_error = INFINITY_;
2196 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2197 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2198 skcms_TransferFunction tf,
2199 tf_inv;
2200
2201 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2202 tf.f = 0.0f;
2203 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2204
2205 if (L == N) {
2206 // If the entire data set was linear, move the coefficients to the nonlinear portion
2207 // with G == 1. This lets use a canonical representation with d == 0.
2208 tf.g = 1;
2209 tf.a = tf.c;
2210 tf.b = tf.f;
2211 tf.c = tf.d = tf.e = tf.f = 0;
2212 } else if (L == N - 1) {
2213 // Degenerate case with only two points in the nonlinear segment. Solve directly.
2214 tf.g = 1;
2215 tf.a = (eval_curve(curve, (N-1)*dx) -
2216 eval_curve(curve, (N-2)*dx))
2217 / dx;
2218 tf.b = eval_curve(curve, (N-2)*dx)
2219 - tf.a * (N-2)*dx;
2220 tf.e = 0;
2221 } else {
2222 // Start by guessing a gamma-only curve through the midpoint.
2223 int mid = (L + N) / 2;
2224 float mid_x = mid / (N - 1.0f);
2225 float mid_y = eval_curve(curve, mid_x);
2226 tf.g = log2f_(mid_y) / log2f_(mid_x);
2227 tf.a = 1;
2228 tf.b = 0;
2229 tf.e = tf.c*tf.d + tf.f
2230 - powf_(tf.a*tf.d + tf.b, tf.g);
2231
2232
2233 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2234 !fit_nonlinear(curve, L,N, &tf_inv)) {
2235 continue;
2236 }
2237
2238 // We fit tf_inv, so calculate tf to keep in sync.
2239 // fit_nonlinear() should guarantee invertibility.
2240 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2241 assert(false);
2242 continue;
2243 }
2244 }
2245
2246 // We'd better have a sane, sRGB-ish TF by now.
2247 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2248 // anything else is just some accident of math and the way we pun tf.g as a type flag.
2249 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2250 if (sRGBish != classify(tf)) {
2251 continue;
2252 }
2253
2254 // We find our error by roundtripping the table through tf_inv.
2255 //
2256 // (The most likely use case for this approximation is to be inverted and
2257 // used as the transfer function for a destination color space.)
2258 //
2259 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2260 // invertible, so re-verify that here (and use the new inverse for testing).
2261 // fit_nonlinear() should guarantee this, but the special cases that don't use
2262 // it may fail this test.
2263 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2264 continue;
2265 }
2266
2267 float err = skcms_MaxRoundtripError(curve, &tf_inv);
2268 if (*max_error > err) {
2269 *max_error = err;
2270 *approx = tf;
2271 }
2272 }
2273 return isfinitef_(*max_error);
2274 }
2275
2276 // ~~~~ Impl. of skcms_Transform() ~~~~
2277
2278 typedef enum {
2279 Op_load_a8,
2280 Op_load_g8,
2281 Op_load_8888_palette8,
2282 Op_load_4444,
2283 Op_load_565,
2284 Op_load_888,
2285 Op_load_8888,
2286 Op_load_1010102,
2287 Op_load_161616LE,
2288 Op_load_16161616LE,
2289 Op_load_161616BE,
2290 Op_load_16161616BE,
2291 Op_load_hhh,
2292 Op_load_hhhh,
2293 Op_load_fff,
2294 Op_load_ffff,
2295
2296 Op_swap_rb,
2297 Op_clamp,
2298 Op_invert,
2299 Op_force_opaque,
2300 Op_premul,
2301 Op_unpremul,
2302 Op_matrix_3x3,
2303 Op_matrix_3x4,
2304
2305 Op_lab_to_xyz,
2306 Op_xyz_to_lab,
2307
2308 Op_tf_r,
2309 Op_tf_g,
2310 Op_tf_b,
2311 Op_tf_a,
2312
2313 Op_pq_r,
2314 Op_pq_g,
2315 Op_pq_b,
2316 Op_pq_a,
2317
2318 Op_hlg_r,
2319 Op_hlg_g,
2320 Op_hlg_b,
2321 Op_hlg_a,
2322
2323 Op_hlginv_r,
2324 Op_hlginv_g,
2325 Op_hlginv_b,
2326 Op_hlginv_a,
2327
2328 Op_table_r,
2329 Op_table_g,
2330 Op_table_b,
2331 Op_table_a,
2332
2333 Op_clut_A2B,
2334 Op_clut_B2A,
2335
2336 Op_store_a8,
2337 Op_store_g8,
2338 Op_store_4444,
2339 Op_store_565,
2340 Op_store_888,
2341 Op_store_8888,
2342 Op_store_1010102,
2343 Op_store_161616LE,
2344 Op_store_16161616LE,
2345 Op_store_161616BE,
2346 Op_store_16161616BE,
2347 Op_store_hhh,
2348 Op_store_hhhh,
2349 Op_store_fff,
2350 Op_store_ffff,
2351 } Op;
2352
2353 #if defined(__clang__)
2354 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2355 #elif defined(__GNUC__)
2356 // For some reason GCC accepts this nonsense, but not the more straightforward version,
2357 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2358 template <int N, typename T>
2359 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2360
2361 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2362 #endif
2363
2364 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2365
2366 namespace baseline {
2367 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2368 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2369 #define N 1
2370 template <typename T> using V = T;
2371 using Color = float;
2372 #elif defined(__AVX512F__)
2373 #define N 16
2374 template <typename T> using V = Vec<N,T>;
2375 using Color = float;
2376 #elif defined(__AVX__)
2377 #define N 8
2378 template <typename T> using V = Vec<N,T>;
2379 using Color = float;
2380 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2381 #define N 8
2382 template <typename T> using V = Vec<N,T>;
2383 using Color = _Float16;
2384 #else
2385 #define N 4
2386 template <typename T> using V = Vec<N,T>;
2387 using Color = float;
2388 #endif
2389
2390 #include "src/Transform_inl.h"
2391 #undef N
2392 }
2393
2394 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2395 #if !defined(SKCMS_PORTABLE) && \
2396 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2397 (( defined(__clang__) && __clang_major__ >= 5) || \
2398 (!defined(__clang__) && defined(__GNUC__))) \
2399 && defined(__x86_64__)
2400
2401 #if !defined(__AVX2__)
2402 #if defined(__clang__)
2403 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2404 #elif defined(__GNUC__)
2405 #pragma GCC push_options
2406 #pragma GCC target("avx2,f16c")
2407 #endif
2408
2409 namespace hsw {
2410 #define USING_AVX
2411 #define USING_AVX_F16C
2412 #define USING_AVX2
2413 #define N 8
2414 template <typename T> using V = Vec<N,T>;
2415 using Color = float;
2416
2417 #include "src/Transform_inl.h"
2418
2419 // src/Transform_inl.h will undefine USING_* for us.
2420 #undef N
2421 }
2422
2423 #if defined(__clang__)
2424 #pragma clang attribute pop
2425 #elif defined(__GNUC__)
2426 #pragma GCC pop_options
2427 #endif
2428
2429 #define TEST_FOR_HSW
2430 #endif
2431
2432 #if !defined(__AVX512F__)
2433 #if defined(__clang__)
2434 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2435 #elif defined(__GNUC__)
2436 #pragma GCC push_options
2437 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2438 #endif
2439
2440 namespace skx {
2441 #define USING_AVX512F
2442 #define N 16
2443 template <typename T> using V = Vec<N,T>;
2444 using Color = float;
2445
2446 #include "src/Transform_inl.h"
2447
2448 // src/Transform_inl.h will undefine USING_* for us.
2449 #undef N
2450 }
2451
2452 #if defined(__clang__)
2453 #pragma clang attribute pop
2454 #elif defined(__GNUC__)
2455 #pragma GCC pop_options
2456 #endif
2457
2458 #define TEST_FOR_SKX
2459 #endif
2460
2461 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2462 enum class CpuType { None, HSW, SKX };
cpu_type()2463 static CpuType cpu_type() {
2464 static const CpuType type = []{
2465 if (!runtime_cpu_detection) {
2466 return CpuType::None;
2467 }
2468 // See http://www.sandpile.org/x86/cpuid.htm
2469
2470 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2471 uint32_t eax, ebx, ecx, edx;
2472 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2473 : "0"(1), "2"(0));
2474 if ((edx & (1u<<25)) && // SSE
2475 (edx & (1u<<26)) && // SSE2
2476 (ecx & (1u<< 0)) && // SSE3
2477 (ecx & (1u<< 9)) && // SSSE3
2478 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2479 (ecx & (1u<<19)) && // SSE4.1
2480 (ecx & (1u<<20)) && // SSE4.2
2481 (ecx & (1u<<26)) && // XSAVE
2482 (ecx & (1u<<27)) && // OSXSAVE
2483 (ecx & (1u<<28)) && // AVX
2484 (ecx & (1u<<29))) { // F16C
2485
2486 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2487 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2488 : "0"(7), "2"(0));
2489 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2490 uint32_t xcr0, dont_need_edx;
2491 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2492
2493 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2494 (xcr0 & (1u<<2)) && // YMM register state saved?
2495 (ebx & (1u<<5))) { // AVX2
2496 // At this point we're at least HSW. Continue checking for SKX.
2497 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2498 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2499 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2500 (ebx & (1u<<16)) && // AVX512F
2501 (ebx & (1u<<17)) && // AVX512DQ
2502 (ebx & (1u<<28)) && // AVX512CD
2503 (ebx & (1u<<30)) && // AVX512BW
2504 (ebx & (1u<<31))) { // AVX512VL
2505 return CpuType::SKX;
2506 }
2507 return CpuType::HSW;
2508 }
2509 }
2510 return CpuType::None;
2511 }();
2512 return type;
2513 }
2514 #endif
2515
2516 #endif
2517
2518 typedef struct {
2519 Op op;
2520 const void* arg;
2521 } OpAndArg;
2522
select_curve_op(const skcms_Curve * curve,int channel)2523 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2524 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2525 { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2526 { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2527 { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2528 { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2529 };
2530 const auto& op = ops[channel];
2531
2532 if (curve->table_entries == 0) {
2533 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2534
2535 const skcms_TransferFunction& tf = curve->parametric;
2536
2537 if (tf.g == 1 && tf.a == 1 &&
2538 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2539 return noop;
2540 }
2541
2542 switch (classify(tf)) {
2543 case Bad: return noop;
2544 case sRGBish: return OpAndArg{op.sRGBish, &tf};
2545 case PQish: return OpAndArg{op.PQish, &tf};
2546 case HLGish: return OpAndArg{op.HLGish, &tf};
2547 case HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2548 }
2549 }
2550 return OpAndArg{op.table, curve};
2551 }
2552
bytes_per_pixel(skcms_PixelFormat fmt)2553 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2554 switch (fmt >> 1) { // ignore rgb/bgr
2555 case skcms_PixelFormat_A_8 >> 1: return 1;
2556 case skcms_PixelFormat_G_8 >> 1: return 1;
2557 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2558 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2559 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2560 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2561 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2562 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2563 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2564 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2565 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2566 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2567 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2568 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2569 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2570 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2571 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2572 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2573 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2574 }
2575 assert(false);
2576 return 0;
2577 }
2578
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2579 static bool prep_for_destination(const skcms_ICCProfile* profile,
2580 skcms_Matrix3x3* fromXYZD50,
2581 skcms_TransferFunction* invR,
2582 skcms_TransferFunction* invG,
2583 skcms_TransferFunction* invB) {
2584 // skcms_Transform() supports B2A destinations...
2585 if (profile->has_B2A) { return true; }
2586 // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2587 return profile->has_trc
2588 && profile->has_toXYZD50
2589 && profile->trc[0].table_entries == 0
2590 && profile->trc[1].table_entries == 0
2591 && profile->trc[2].table_entries == 0
2592 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2593 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2594 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2595 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2596 }
2597
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2598 bool skcms_Transform(const void* src,
2599 skcms_PixelFormat srcFmt,
2600 skcms_AlphaFormat srcAlpha,
2601 const skcms_ICCProfile* srcProfile,
2602 void* dst,
2603 skcms_PixelFormat dstFmt,
2604 skcms_AlphaFormat dstAlpha,
2605 const skcms_ICCProfile* dstProfile,
2606 size_t npixels) {
2607 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2608 dst, dstFmt, dstAlpha, dstProfile,
2609 npixels, nullptr);
2610 }
2611
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2612 bool skcms_TransformWithPalette(const void* src,
2613 skcms_PixelFormat srcFmt,
2614 skcms_AlphaFormat srcAlpha,
2615 const skcms_ICCProfile* srcProfile,
2616 void* dst,
2617 skcms_PixelFormat dstFmt,
2618 skcms_AlphaFormat dstAlpha,
2619 const skcms_ICCProfile* dstProfile,
2620 size_t nz,
2621 const void* palette) {
2622 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2623 src_bpp = bytes_per_pixel(srcFmt);
2624 // Let's just refuse if the request is absurdly big.
2625 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2626 return false;
2627 }
2628 int n = (int)nz;
2629
2630 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2631 if (!srcProfile) {
2632 srcProfile = skcms_sRGB_profile();
2633 }
2634 if (!dstProfile) {
2635 dstProfile = skcms_sRGB_profile();
2636 }
2637
2638 // We can't transform in place unless the PixelFormats are the same size.
2639 if (dst == src && dst_bpp != src_bpp) {
2640 return false;
2641 }
2642 // TODO: more careful alias rejection (like, dst == src + 1)?
2643
2644 if (needs_palette(srcFmt) && !palette) {
2645 return false;
2646 }
2647
2648 Op program [32];
2649 const void* arguments[32];
2650
2651 Op* ops = program;
2652 const void** args = arguments;
2653
2654 // These are always parametric curves of some sort.
2655 skcms_Curve dst_curves[3];
2656 dst_curves[0].table_entries =
2657 dst_curves[1].table_entries =
2658 dst_curves[2].table_entries = 0;
2659
2660 skcms_Matrix3x3 from_xyz;
2661
2662 switch (srcFmt >> 1) {
2663 default: return false;
2664 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2665 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2666 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2667 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2668 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2669 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2670 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2671 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2672 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2673 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2674 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2675 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2676 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2677 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2678 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2679 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2680 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2681
2682 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2683 *args++ = palette;
2684 break;
2685 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2686 *ops++ = Op_load_8888;
2687 *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction();
2688 *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction();
2689 *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction();
2690 break;
2691 }
2692 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2693 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2694 *ops++ = Op_clamp;
2695 }
2696 if (srcFmt & 1) {
2697 *ops++ = Op_swap_rb;
2698 }
2699 skcms_ICCProfile gray_dst_profile;
2700 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2701 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2702 // luminance (Y) by the destination transfer function.
2703 gray_dst_profile = *dstProfile;
2704 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2705 dstProfile = &gray_dst_profile;
2706 }
2707
2708 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2709 // Photoshop creates CMYK images as inverse CMYK.
2710 // These happen to be the only ones we've _ever_ seen.
2711 *ops++ = Op_invert;
2712 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2713 srcAlpha = skcms_AlphaFormat_Unpremul;
2714 }
2715
2716 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2717 *ops++ = Op_force_opaque;
2718 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2719 *ops++ = Op_unpremul;
2720 }
2721
2722 if (dstProfile != srcProfile) {
2723
2724 if (!prep_for_destination(dstProfile,
2725 &from_xyz,
2726 &dst_curves[0].parametric,
2727 &dst_curves[1].parametric,
2728 &dst_curves[2].parametric)) {
2729 return false;
2730 }
2731
2732 if (srcProfile->has_A2B) {
2733 if (srcProfile->A2B.input_channels) {
2734 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2735 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2736 if (oa.arg) {
2737 *ops++ = oa.op;
2738 *args++ = oa.arg;
2739 }
2740 }
2741 *ops++ = Op_clamp;
2742 *ops++ = Op_clut_A2B;
2743 *args++ = &srcProfile->A2B;
2744 }
2745
2746 if (srcProfile->A2B.matrix_channels == 3) {
2747 for (int i = 0; i < 3; i++) {
2748 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2749 if (oa.arg) {
2750 *ops++ = oa.op;
2751 *args++ = oa.arg;
2752 }
2753 }
2754
2755 static const skcms_Matrix3x4 I = {{
2756 {1,0,0,0},
2757 {0,1,0,0},
2758 {0,0,1,0},
2759 }};
2760 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2761 *ops++ = Op_matrix_3x4;
2762 *args++ = &srcProfile->A2B.matrix;
2763 }
2764 }
2765
2766 if (srcProfile->A2B.output_channels == 3) {
2767 for (int i = 0; i < 3; i++) {
2768 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2769 if (oa.arg) {
2770 *ops++ = oa.op;
2771 *args++ = oa.arg;
2772 }
2773 }
2774 }
2775
2776 if (srcProfile->pcs == skcms_Signature_Lab) {
2777 *ops++ = Op_lab_to_xyz;
2778 }
2779
2780 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2781 for (int i = 0; i < 3; i++) {
2782 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2783 if (oa.arg) {
2784 *ops++ = oa.op;
2785 *args++ = oa.arg;
2786 }
2787 }
2788 } else {
2789 return false;
2790 }
2791
2792 // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2793 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2794
2795 if (dstProfile->has_B2A) {
2796 // B2A needs its input in XYZD50, so transform TRC sources now.
2797 if (!srcProfile->has_A2B) {
2798 *ops++ = Op_matrix_3x3;
2799 *args++ = &srcProfile->toXYZD50;
2800 }
2801
2802 if (dstProfile->pcs == skcms_Signature_Lab) {
2803 *ops++ = Op_xyz_to_lab;
2804 }
2805
2806 if (dstProfile->B2A.input_channels == 3) {
2807 for (int i = 0; i < 3; i++) {
2808 OpAndArg oa = select_curve_op(&dstProfile->B2A.input_curves[i], i);
2809 if (oa.arg) {
2810 *ops++ = oa.op;
2811 *args++ = oa.arg;
2812 }
2813 }
2814 }
2815
2816 if (dstProfile->B2A.matrix_channels == 3) {
2817 static const skcms_Matrix3x4 I = {{
2818 {1,0,0,0},
2819 {0,1,0,0},
2820 {0,0,1,0},
2821 }};
2822 if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2823 *ops++ = Op_matrix_3x4;
2824 *args++ = &dstProfile->B2A.matrix;
2825 }
2826
2827 for (int i = 0; i < 3; i++) {
2828 OpAndArg oa = select_curve_op(&dstProfile->B2A.matrix_curves[i], i);
2829 if (oa.arg) {
2830 *ops++ = oa.op;
2831 *args++ = oa.arg;
2832 }
2833 }
2834 }
2835
2836 if (dstProfile->B2A.output_channels) {
2837 *ops++ = Op_clamp;
2838 *ops++ = Op_clut_B2A;
2839 *args++ = &dstProfile->B2A;
2840 for (int i = 0; i < (int)dstProfile->B2A.output_channels; i++) {
2841 OpAndArg oa = select_curve_op(&dstProfile->B2A.output_curves[i], i);
2842 if (oa.arg) {
2843 *ops++ = oa.op;
2844 *args++ = oa.arg;
2845 }
2846 }
2847 }
2848 } else {
2849 // This is a TRC destination.
2850 // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2851 // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2852 static const skcms_Matrix3x3 I = {{
2853 { 1.0f, 0.0f, 0.0f },
2854 { 0.0f, 1.0f, 0.0f },
2855 { 0.0f, 0.0f, 1.0f },
2856 }};
2857 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2858
2859 // There's a chance the source and destination gamuts are identical,
2860 // in which case we can skip the gamut transform.
2861 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2862 // Concat the entire gamut transform into from_xyz,
2863 // now slightly misnamed but it's a handy spot to stash the result.
2864 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2865 *ops++ = Op_matrix_3x3;
2866 *args++ = &from_xyz;
2867 }
2868
2869 // Encode back to dst RGB using its parametric transfer functions.
2870 for (int i = 0; i < 3; i++) {
2871 OpAndArg oa = select_curve_op(dst_curves+i, i);
2872 if (oa.arg) {
2873 assert (oa.op != Op_table_r &&
2874 oa.op != Op_table_g &&
2875 oa.op != Op_table_b &&
2876 oa.op != Op_table_a);
2877 *ops++ = oa.op;
2878 *args++ = oa.arg;
2879 }
2880 }
2881 }
2882 }
2883
2884 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2885 // not just to values that fit in [0,1].
2886 //
2887 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2888 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2889 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2890 *ops++ = Op_clamp;
2891 }
2892
2893 if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2894 // Photoshop creates CMYK images as inverse CMYK.
2895 // These happen to be the only ones we've _ever_ seen.
2896 *ops++ = Op_invert;
2897
2898 // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2899 dstAlpha = skcms_AlphaFormat_Unpremul;
2900 }
2901
2902 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2903 *ops++ = Op_force_opaque;
2904 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2905 *ops++ = Op_premul;
2906 }
2907 if (dstFmt & 1) {
2908 *ops++ = Op_swap_rb;
2909 }
2910 switch (dstFmt >> 1) {
2911 default: return false;
2912 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2913 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2914 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2915 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2916 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2917 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2918 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2919 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2920 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2921 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2922 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2923 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2924 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2925 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2926 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2927 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2928 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2929
2930 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2931 *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction();
2932 *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction();
2933 *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction();
2934 *ops++ = Op_store_8888;
2935 break;
2936 }
2937
2938 auto run = baseline::run_program;
2939 #if defined(TEST_FOR_HSW)
2940 switch (cpu_type()) {
2941 case CpuType::None: break;
2942 case CpuType::HSW: run = hsw::run_program; break;
2943 case CpuType::SKX: run = hsw::run_program; break;
2944 }
2945 #endif
2946 #if defined(TEST_FOR_SKX)
2947 switch (cpu_type()) {
2948 case CpuType::None: break;
2949 case CpuType::HSW: break;
2950 case CpuType::SKX: run = skx::run_program; break;
2951 }
2952 #endif
2953 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2954 return true;
2955 }
2956
assert_usable_as_destination(const skcms_ICCProfile * profile)2957 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2958 #if defined(NDEBUG)
2959 (void)profile;
2960 #else
2961 skcms_Matrix3x3 fromXYZD50;
2962 skcms_TransferFunction invR, invG, invB;
2963 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2964 #endif
2965 }
2966
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2967 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2968 if (!profile->has_B2A) {
2969 skcms_Matrix3x3 fromXYZD50;
2970 if (!profile->has_trc || !profile->has_toXYZD50
2971 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2972 return false;
2973 }
2974
2975 skcms_TransferFunction tf[3];
2976 for (int i = 0; i < 3; i++) {
2977 skcms_TransferFunction inv;
2978 if (profile->trc[i].table_entries == 0
2979 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2980 tf[i] = profile->trc[i].parametric;
2981 continue;
2982 }
2983
2984 float max_error;
2985 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2986 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2987 return false;
2988 }
2989 }
2990
2991 for (int i = 0; i < 3; ++i) {
2992 profile->trc[i].table_entries = 0;
2993 profile->trc[i].parametric = tf[i];
2994 }
2995 }
2996 assert_usable_as_destination(profile);
2997 return true;
2998 }
2999
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)3000 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
3001 // Call skcms_MakeUsableAsDestination() with B2A disabled;
3002 // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
3003 skcms_ICCProfile result = *profile;
3004 result.has_B2A = false;
3005 if (!skcms_MakeUsableAsDestination(&result)) {
3006 return false;
3007 }
3008
3009 // Of the three, pick the transfer function that best fits the other two.
3010 int best_tf = 0;
3011 float min_max_error = INFINITY_;
3012 for (int i = 0; i < 3; i++) {
3013 skcms_TransferFunction inv;
3014 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
3015 return false;
3016 }
3017
3018 float err = 0;
3019 for (int j = 0; j < 3; ++j) {
3020 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
3021 }
3022 if (min_max_error > err) {
3023 min_max_error = err;
3024 best_tf = i;
3025 }
3026 }
3027
3028 for (int i = 0; i < 3; i++) {
3029 result.trc[i].parametric = result.trc[best_tf].parametric;
3030 }
3031
3032 *profile = result;
3033 assert_usable_as_destination(profile);
3034 return true;
3035 }
3036