1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #if defined(__ARM_NEON)
17 #include <arm_neon.h>
18 #elif defined(__SSE__)
19 #include <immintrin.h>
20 #endif
21
22 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
23 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
24 //
25 // Please do not use sizeof() directly, and size_t only when required.
26 // (We have no way of enforcing these requests...)
27 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
28
29 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
30 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
31
32 static const union {
33 uint32_t bits;
34 float f;
35 } inf_ = { 0x7f800000 };
36 #define INFINITY_ inf_.f
37
fmaxf_(float x,float y)38 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)39 static float fminf_(float x, float y) { return x < y ? x : y; }
40
isfinitef_(float x)41 static bool isfinitef_(float x) { return 0 == x*0; }
42
minus_1_ulp(float x)43 static float minus_1_ulp(float x) {
44 int32_t bits;
45 memcpy(&bits, &x, sizeof(bits));
46 bits = bits - 1;
47 memcpy(&x, &bits, sizeof(bits));
48 return x;
49 }
50
eval_curve(const skcms_Curve * curve,float x)51 static float eval_curve(const skcms_Curve* curve, float x) {
52 if (curve->table_entries == 0) {
53 return skcms_TransferFunction_eval(&curve->parametric, x);
54 }
55
56 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
57 int lo = (int) ix ,
58 hi = (int)(float)minus_1_ulp(ix + 1.0f);
59 float t = ix - (float)lo;
60
61 float l, h;
62 if (curve->table_8) {
63 l = curve->table_8[lo] * (1/255.0f);
64 h = curve->table_8[hi] * (1/255.0f);
65 } else {
66 uint16_t be_l, be_h;
67 memcpy(&be_l, curve->table_16 + 2*lo, 2);
68 memcpy(&be_h, curve->table_16 + 2*hi, 2);
69 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
70 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
71 l = le_l * (1/65535.0f);
72 h = le_h * (1/65535.0f);
73 }
74 return l + (h-l)*t;
75 }
76
max_roundtrip_error(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)77 static float max_roundtrip_error(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
78 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
79 const float dx = 1.0f / (N - 1);
80 float err = 0;
81 for (uint32_t i = 0; i < N; i++) {
82 float x = i * dx,
83 y = eval_curve(curve, x);
84 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
85 }
86 return err;
87 }
88
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)89 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
90 return max_roundtrip_error(curve, inv_tf) < (1/512.0f);
91 }
92
93 // Additional ICC signature values that are only used internally
94 enum {
95 // File signature
96 skcms_Signature_acsp = 0x61637370,
97
98 // Tag signatures
99 skcms_Signature_rTRC = 0x72545243,
100 skcms_Signature_gTRC = 0x67545243,
101 skcms_Signature_bTRC = 0x62545243,
102 skcms_Signature_kTRC = 0x6B545243,
103
104 skcms_Signature_rXYZ = 0x7258595A,
105 skcms_Signature_gXYZ = 0x6758595A,
106 skcms_Signature_bXYZ = 0x6258595A,
107
108 skcms_Signature_A2B0 = 0x41324230,
109 skcms_Signature_A2B1 = 0x41324231,
110 skcms_Signature_mAB = 0x6D414220,
111
112 skcms_Signature_CHAD = 0x63686164,
113
114 // Type signatures
115 skcms_Signature_curv = 0x63757276,
116 skcms_Signature_mft1 = 0x6D667431,
117 skcms_Signature_mft2 = 0x6D667432,
118 skcms_Signature_para = 0x70617261,
119 skcms_Signature_sf32 = 0x73663332,
120 // XYZ is also a PCS signature, so it's defined in skcms.h
121 // skcms_Signature_XYZ = 0x58595A20,
122 };
123
read_big_u16(const uint8_t * ptr)124 static uint16_t read_big_u16(const uint8_t* ptr) {
125 uint16_t be;
126 memcpy(&be, ptr, sizeof(be));
127 #if defined(_MSC_VER)
128 return _byteswap_ushort(be);
129 #else
130 return __builtin_bswap16(be);
131 #endif
132 }
133
read_big_u32(const uint8_t * ptr)134 static uint32_t read_big_u32(const uint8_t* ptr) {
135 uint32_t be;
136 memcpy(&be, ptr, sizeof(be));
137 #if defined(_MSC_VER)
138 return _byteswap_ulong(be);
139 #else
140 return __builtin_bswap32(be);
141 #endif
142 }
143
read_big_i32(const uint8_t * ptr)144 static int32_t read_big_i32(const uint8_t* ptr) {
145 return (int32_t)read_big_u32(ptr);
146 }
147
read_big_fixed(const uint8_t * ptr)148 static float read_big_fixed(const uint8_t* ptr) {
149 return read_big_i32(ptr) * (1.0f / 65536.0f);
150 }
151
152 // Maps to an in-memory profile so that fields line up to the locations specified
153 // in ICC.1:2010, section 7.2
154 typedef struct {
155 uint8_t size [ 4];
156 uint8_t cmm_type [ 4];
157 uint8_t version [ 4];
158 uint8_t profile_class [ 4];
159 uint8_t data_color_space [ 4];
160 uint8_t pcs [ 4];
161 uint8_t creation_date_time [12];
162 uint8_t signature [ 4];
163 uint8_t platform [ 4];
164 uint8_t flags [ 4];
165 uint8_t device_manufacturer [ 4];
166 uint8_t device_model [ 4];
167 uint8_t device_attributes [ 8];
168 uint8_t rendering_intent [ 4];
169 uint8_t illuminant_X [ 4];
170 uint8_t illuminant_Y [ 4];
171 uint8_t illuminant_Z [ 4];
172 uint8_t creator [ 4];
173 uint8_t profile_id [16];
174 uint8_t reserved [28];
175 uint8_t tag_count [ 4]; // Technically not part of header, but required
176 } header_Layout;
177
178 typedef struct {
179 uint8_t signature [4];
180 uint8_t offset [4];
181 uint8_t size [4];
182 } tag_Layout;
183
get_tag_table(const skcms_ICCProfile * profile)184 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
185 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
186 }
187
188 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
189 // use of the type is for the CHAD tag that stores exactly nine values.
190 typedef struct {
191 uint8_t type [ 4];
192 uint8_t reserved [ 4];
193 uint8_t values [36];
194 } sf32_Layout;
195
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)196 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
197 skcms_ICCTag tag;
198 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
199 return false;
200 }
201
202 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
203 return false;
204 }
205
206 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
207 const uint8_t* values = sf32Tag->values;
208 for (int r = 0; r < 3; ++r)
209 for (int c = 0; c < 3; ++c, values += 4) {
210 m->vals[r][c] = read_big_fixed(values);
211 }
212 return true;
213 }
214
215 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
216 // the type are for tags/data that store exactly one triple.
217 typedef struct {
218 uint8_t type [4];
219 uint8_t reserved [4];
220 uint8_t X [4];
221 uint8_t Y [4];
222 uint8_t Z [4];
223 } XYZ_Layout;
224
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)225 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
226 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
227 return false;
228 }
229
230 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
231
232 *x = read_big_fixed(xyzTag->X);
233 *y = read_big_fixed(xyzTag->Y);
234 *z = read_big_fixed(xyzTag->Z);
235 return true;
236 }
237
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)238 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
239 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
240 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
241 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
242 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
243 }
244
tf_is_valid(const skcms_TransferFunction * tf)245 static bool tf_is_valid(const skcms_TransferFunction* tf) {
246 // Reject obviously malformed inputs
247 if (!isfinitef_(tf->a + tf->b + tf->c + tf->d + tf->e + tf->f + tf->g)) {
248 return false;
249 }
250
251 // All of these parameters should be non-negative
252 if (tf->a < 0 || tf->c < 0 || tf->d < 0 || tf->g < 0) {
253 return false;
254 }
255
256 return true;
257 }
258
259 typedef struct {
260 uint8_t type [4];
261 uint8_t reserved_a [4];
262 uint8_t function_type [2];
263 uint8_t reserved_b [2];
264 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
265 } para_Layout;
266
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)267 static bool read_curve_para(const uint8_t* buf, uint32_t size,
268 skcms_Curve* curve, uint32_t* curve_size) {
269 if (size < SAFE_FIXED_SIZE(para_Layout)) {
270 return false;
271 }
272
273 const para_Layout* paraTag = (const para_Layout*)buf;
274
275 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
276 uint16_t function_type = read_big_u16(paraTag->function_type);
277 if (function_type > kGABCDEF) {
278 return false;
279 }
280
281 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
282 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
283 return false;
284 }
285
286 if (curve_size) {
287 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
288 }
289
290 curve->table_entries = 0;
291 curve->parametric.a = 1.0f;
292 curve->parametric.b = 0.0f;
293 curve->parametric.c = 0.0f;
294 curve->parametric.d = 0.0f;
295 curve->parametric.e = 0.0f;
296 curve->parametric.f = 0.0f;
297 curve->parametric.g = read_big_fixed(paraTag->variable);
298
299 switch (function_type) {
300 case kGAB:
301 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
302 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
303 if (curve->parametric.a == 0) {
304 return false;
305 }
306 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
307 break;
308 case kGABC:
309 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
310 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
311 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
312 if (curve->parametric.a == 0) {
313 return false;
314 }
315 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
316 curve->parametric.f = curve->parametric.e;
317 break;
318 case kGABCD:
319 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
320 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
321 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
322 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
323 break;
324 case kGABCDEF:
325 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
326 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
327 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
328 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
329 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
330 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
331 break;
332 }
333 return tf_is_valid(&curve->parametric);
334 }
335
336 typedef struct {
337 uint8_t type [4];
338 uint8_t reserved [4];
339 uint8_t value_count [4];
340 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
341 } curv_Layout;
342
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)343 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
344 skcms_Curve* curve, uint32_t* curve_size) {
345 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
346 return false;
347 }
348
349 const curv_Layout* curvTag = (const curv_Layout*)buf;
350
351 uint32_t value_count = read_big_u32(curvTag->value_count);
352 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
353 return false;
354 }
355
356 if (curve_size) {
357 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
358 }
359
360 if (value_count < 2) {
361 curve->table_entries = 0;
362 curve->parametric.a = 1.0f;
363 curve->parametric.b = 0.0f;
364 curve->parametric.c = 0.0f;
365 curve->parametric.d = 0.0f;
366 curve->parametric.e = 0.0f;
367 curve->parametric.f = 0.0f;
368 if (value_count == 0) {
369 // Empty tables are a shorthand for an identity curve
370 curve->parametric.g = 1.0f;
371 } else {
372 // Single entry tables are a shorthand for simple gamma
373 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
374 }
375 } else {
376 curve->table_8 = nullptr;
377 curve->table_16 = curvTag->variable;
378 curve->table_entries = value_count;
379 }
380
381 return true;
382 }
383
384 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
385 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)386 static bool read_curve(const uint8_t* buf, uint32_t size,
387 skcms_Curve* curve, uint32_t* curve_size) {
388 if (!buf || size < 4 || !curve) {
389 return false;
390 }
391
392 uint32_t type = read_big_u32(buf);
393 if (type == skcms_Signature_para) {
394 return read_curve_para(buf, size, curve, curve_size);
395 } else if (type == skcms_Signature_curv) {
396 return read_curve_curv(buf, size, curve, curve_size);
397 }
398
399 return false;
400 }
401
402 // mft1 and mft2 share a large chunk of data
403 typedef struct {
404 uint8_t type [ 4];
405 uint8_t reserved_a [ 4];
406 uint8_t input_channels [ 1];
407 uint8_t output_channels [ 1];
408 uint8_t grid_points [ 1];
409 uint8_t reserved_b [ 1];
410 uint8_t matrix [36];
411 } mft_CommonLayout;
412
413 typedef struct {
414 mft_CommonLayout common [1];
415
416 uint8_t variable [1/*variable*/];
417 } mft1_Layout;
418
419 typedef struct {
420 mft_CommonLayout common [1];
421
422 uint8_t input_table_entries [2];
423 uint8_t output_table_entries [2];
424 uint8_t variable [1/*variable*/];
425 } mft2_Layout;
426
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)427 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
428 // MFT matrices are applied before the first set of curves, but must be identity unless the
429 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
430 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
431 // field/flag.
432 a2b->matrix_channels = 0;
433
434 a2b->input_channels = mftTag->input_channels[0];
435 a2b->output_channels = mftTag->output_channels[0];
436
437 // We require exactly three (ie XYZ/Lab/RGB) output channels
438 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
439 return false;
440 }
441 // We require at least one, and no more than four (ie CMYK) input channels
442 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
443 return false;
444 }
445
446 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
447 a2b->grid_points[i] = mftTag->grid_points[0];
448 }
449 // The grid only makes sense with at least two points along each axis
450 if (a2b->grid_points[0] < 2) {
451 return false;
452 }
453
454 return true;
455 }
456
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)457 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
458 uint32_t input_table_entries, uint32_t output_table_entries,
459 skcms_A2B* a2b) {
460 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
461 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
462 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
463
464 // [input|output]_channels are <= 4, so still no overflow
465 uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
466 uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
467
468 uint64_t grid_size = a2b->output_channels * byte_width;
469 for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
470 grid_size *= a2b->grid_points[axis];
471 }
472
473 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
474 return false;
475 }
476
477 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
478 a2b->input_curves[i].table_entries = input_table_entries;
479 if (byte_width == 1) {
480 a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
481 a2b->input_curves[i].table_16 = nullptr;
482 } else {
483 a2b->input_curves[i].table_8 = nullptr;
484 a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
485 }
486 }
487
488 if (byte_width == 1) {
489 a2b->grid_8 = table_base + byte_len_all_input_tables;
490 a2b->grid_16 = nullptr;
491 } else {
492 a2b->grid_8 = nullptr;
493 a2b->grid_16 = table_base + byte_len_all_input_tables;
494 }
495
496 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
497 for (uint32_t i = 0; i < a2b->output_channels; ++i) {
498 a2b->output_curves[i].table_entries = output_table_entries;
499 if (byte_width == 1) {
500 a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
501 a2b->output_curves[i].table_16 = nullptr;
502 } else {
503 a2b->output_curves[i].table_8 = nullptr;
504 a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
505 }
506 }
507
508 return true;
509 }
510
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)511 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
512 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
513 return false;
514 }
515
516 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
517 if (!read_mft_common(mftTag->common, a2b)) {
518 return false;
519 }
520
521 uint32_t input_table_entries = 256;
522 uint32_t output_table_entries = 256;
523
524 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
525 input_table_entries, output_table_entries, a2b);
526 }
527
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)528 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
529 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
530 return false;
531 }
532
533 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
534 if (!read_mft_common(mftTag->common, a2b)) {
535 return false;
536 }
537
538 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
539 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
540
541 // ICC spec mandates that 2 <= table_entries <= 4096
542 if (input_table_entries < 2 || input_table_entries > 4096 ||
543 output_table_entries < 2 || output_table_entries > 4096) {
544 return false;
545 }
546
547 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
548 input_table_entries, output_table_entries, a2b);
549 }
550
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)551 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
552 uint32_t num_curves, skcms_Curve* curves) {
553 for (uint32_t i = 0; i < num_curves; ++i) {
554 if (curve_offset > size) {
555 return false;
556 }
557
558 uint32_t curve_bytes;
559 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
560 return false;
561 }
562
563 if (curve_bytes > UINT32_MAX - 3) {
564 return false;
565 }
566 curve_bytes = (curve_bytes + 3) & ~3U;
567
568 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
569 curve_offset = (uint32_t)new_offset_64;
570 if (new_offset_64 != curve_offset) {
571 return false;
572 }
573 }
574
575 return true;
576 }
577
578 typedef struct {
579 uint8_t type [ 4];
580 uint8_t reserved_a [ 4];
581 uint8_t input_channels [ 1];
582 uint8_t output_channels [ 1];
583 uint8_t reserved_b [ 2];
584 uint8_t b_curve_offset [ 4];
585 uint8_t matrix_offset [ 4];
586 uint8_t m_curve_offset [ 4];
587 uint8_t clut_offset [ 4];
588 uint8_t a_curve_offset [ 4];
589 } mAB_Layout;
590
591 typedef struct {
592 uint8_t grid_points [16];
593 uint8_t grid_byte_width [ 1];
594 uint8_t reserved [ 3];
595 uint8_t variable [1/*variable*/];
596 } mABCLUT_Layout;
597
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)598 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
599 if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
600 return false;
601 }
602
603 const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
604
605 a2b->input_channels = mABTag->input_channels[0];
606 a2b->output_channels = mABTag->output_channels[0];
607
608 // We require exactly three (ie XYZ/Lab/RGB) output channels
609 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
610 return false;
611 }
612 // We require no more than four (ie CMYK) input channels
613 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
614 return false;
615 }
616
617 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
618 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
619 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
620 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
621 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
622
623 // "B" curves must be present
624 if (0 == b_curve_offset) {
625 return false;
626 }
627
628 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
629 a2b->output_curves)) {
630 return false;
631 }
632
633 // "M" curves and Matrix must be used together
634 if (0 != m_curve_offset) {
635 if (0 == matrix_offset) {
636 return false;
637 }
638 a2b->matrix_channels = a2b->output_channels;
639 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
640 a2b->matrix_curves)) {
641 return false;
642 }
643
644 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
645 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
646 return false;
647 }
648 float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
649 const uint8_t* mtx_buf = tag->buf + matrix_offset;
650 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
651 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
652 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
653 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
654 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
655 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
656 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
657 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
658 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
659 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
660 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
661 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
662 } else {
663 if (0 != matrix_offset) {
664 return false;
665 }
666 a2b->matrix_channels = 0;
667 }
668
669 // "A" curves and CLUT must be used together
670 if (0 != a_curve_offset) {
671 if (0 == clut_offset) {
672 return false;
673 }
674 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
675 a2b->input_curves)) {
676 return false;
677 }
678
679 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
680 return false;
681 }
682 const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
683
684 if (clut->grid_byte_width[0] == 1) {
685 a2b->grid_8 = clut->variable;
686 a2b->grid_16 = nullptr;
687 } else if (clut->grid_byte_width[0] == 2) {
688 a2b->grid_8 = nullptr;
689 a2b->grid_16 = clut->variable;
690 } else {
691 return false;
692 }
693
694 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
695 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
696 a2b->grid_points[i] = clut->grid_points[i];
697 // The grid only makes sense with at least two points along each axis
698 if (a2b->grid_points[i] < 2) {
699 return false;
700 }
701 grid_size *= a2b->grid_points[i];
702 }
703 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
704 return false;
705 }
706 } else {
707 if (0 != clut_offset) {
708 return false;
709 }
710
711 // If there is no CLUT, the number of input and output channels must match
712 if (a2b->input_channels != a2b->output_channels) {
713 return false;
714 }
715
716 // Zero out the number of input channels to signal that we're skipping this stage
717 a2b->input_channels = 0;
718 }
719
720 return true;
721 }
722
723 // If you pass f, we'll fit a possibly-non-zero value for *f.
724 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)725 static int fit_linear(const skcms_Curve* curve, int N, float tol,
726 float* c, float* d, float* f = nullptr) {
727 assert(N > 1);
728 // We iteratively fit the first points to the TF's linear piece.
729 // We want the cx + f line to pass through the first and last points we fit exactly.
730 //
731 // As we walk along the points we find the minimum and maximum slope of the line before the
732 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
733 // emtpy, when we definitely can't add any more points.
734 //
735 // Some points' error intervals may intersect the running interval but not lie fully
736 // within it. So we keep track of the last point we saw that is a valid end point candidate,
737 // and once the search is done, back up to build the line through *that* point.
738 const float dx = 1.0f / (N - 1);
739
740 int lin_points = 1;
741
742 float f_zero = 0.0f;
743 if (f) {
744 *f = eval_curve(curve, 0);
745 } else {
746 f = &f_zero;
747 }
748
749
750 float slope_min = -INFINITY_;
751 float slope_max = +INFINITY_;
752 for (int i = 1; i < N; ++i) {
753 float x = i * dx;
754 float y = eval_curve(curve, x);
755
756 float slope_max_i = (y + tol - *f) / x,
757 slope_min_i = (y - tol - *f) / x;
758 if (slope_max_i < slope_min || slope_max < slope_min_i) {
759 // Slope intervals would no longer overlap.
760 break;
761 }
762 slope_max = fminf_(slope_max, slope_max_i);
763 slope_min = fmaxf_(slope_min, slope_min_i);
764
765 float cur_slope = (y - *f) / x;
766 if (slope_min <= cur_slope && cur_slope <= slope_max) {
767 lin_points = i + 1;
768 *c = cur_slope;
769 }
770 }
771
772 // Set D to the last point that met our tolerance.
773 *d = (lin_points - 1) * dx;
774 return lin_points;
775 }
776
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)777 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
778 bool ok = false;
779 if (tag->type == skcms_Signature_mft1) {
780 ok = read_tag_mft1(tag, a2b);
781 } else if (tag->type == skcms_Signature_mft2) {
782 ok = read_tag_mft2(tag, a2b);
783 } else if (tag->type == skcms_Signature_mAB) {
784 ok = read_tag_mab(tag, a2b, pcs_is_xyz);
785 }
786 if (!ok) {
787 return false;
788 }
789
790 // Detect and canonicalize identity tables.
791 skcms_Curve* curves[] = {
792 a2b->input_channels > 0 ? a2b->input_curves + 0 : nullptr,
793 a2b->input_channels > 1 ? a2b->input_curves + 1 : nullptr,
794 a2b->input_channels > 2 ? a2b->input_curves + 2 : nullptr,
795 a2b->input_channels > 3 ? a2b->input_curves + 3 : nullptr,
796 a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
797 a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
798 a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
799 a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
800 a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
801 a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
802 };
803
804 for (int i = 0; i < ARRAY_COUNT(curves); i++) {
805 skcms_Curve* curve = curves[i];
806
807 if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
808 int N = (int)curve->table_entries;
809
810 float c = 0.0f, d = 0.0f, f = 0.0f;
811 if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
812 && c == 1.0f
813 && f == 0.0f) {
814 curve->table_entries = 0;
815 curve->table_8 = nullptr;
816 curve->table_16 = nullptr;
817 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
818 }
819 }
820 }
821
822 return true;
823 }
824
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)825 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
826 if (!profile || !profile->buffer || !tag) { return; }
827 if (idx > profile->tag_count) { return; }
828 const tag_Layout* tags = get_tag_table(profile);
829 tag->signature = read_big_u32(tags[idx].signature);
830 tag->size = read_big_u32(tags[idx].size);
831 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
832 tag->type = read_big_u32(tag->buf);
833 }
834
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)835 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
836 if (!profile || !profile->buffer || !tag) { return false; }
837 const tag_Layout* tags = get_tag_table(profile);
838 for (uint32_t i = 0; i < profile->tag_count; ++i) {
839 if (read_big_u32(tags[i].signature) == sig) {
840 tag->signature = sig;
841 tag->size = read_big_u32(tags[i].size);
842 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
843 tag->type = read_big_u32(tag->buf);
844 return true;
845 }
846 }
847 return false;
848 }
849
usable_as_src(const skcms_ICCProfile * profile)850 static bool usable_as_src(const skcms_ICCProfile* profile) {
851 return profile->has_A2B
852 || (profile->has_trc && profile->has_toXYZD50);
853 }
854
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)855 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
856 assert(SAFE_SIZEOF(header_Layout) == 132);
857
858 if (!profile) {
859 return false;
860 }
861 memset(profile, 0, SAFE_SIZEOF(*profile));
862
863 if (len < SAFE_SIZEOF(header_Layout)) {
864 return false;
865 }
866
867 // Byte-swap all header fields
868 const header_Layout* header = (const header_Layout*)buf;
869 profile->buffer = (const uint8_t*)buf;
870 profile->size = read_big_u32(header->size);
871 uint32_t version = read_big_u32(header->version);
872 profile->data_color_space = read_big_u32(header->data_color_space);
873 profile->pcs = read_big_u32(header->pcs);
874 uint32_t signature = read_big_u32(header->signature);
875 float illuminant_X = read_big_fixed(header->illuminant_X);
876 float illuminant_Y = read_big_fixed(header->illuminant_Y);
877 float illuminant_Z = read_big_fixed(header->illuminant_Z);
878 profile->tag_count = read_big_u32(header->tag_count);
879
880 // Validate signature, size (smaller than buffer, large enough to hold tag table),
881 // and major version
882 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
883 if (signature != skcms_Signature_acsp ||
884 profile->size > len ||
885 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
886 (version >> 24) > 4) {
887 return false;
888 }
889
890 // Validate that illuminant is D50 white
891 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
892 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
893 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
894 return false;
895 }
896
897 // Validate that all tag entries have sane offset + size
898 const tag_Layout* tags = get_tag_table(profile);
899 for (uint32_t i = 0; i < profile->tag_count; ++i) {
900 uint32_t tag_offset = read_big_u32(tags[i].offset);
901 uint32_t tag_size = read_big_u32(tags[i].size);
902 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
903 if (tag_size < 4 || tag_end > profile->size) {
904 return false;
905 }
906 }
907
908 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
909 return false;
910 }
911
912 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
913
914 // Pre-parse commonly used tags.
915 skcms_ICCTag kTRC;
916 if (profile->data_color_space == skcms_Signature_Gray &&
917 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
918 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
919 // Malformed tag
920 return false;
921 }
922 profile->trc[1] = profile->trc[0];
923 profile->trc[2] = profile->trc[0];
924 profile->has_trc = true;
925
926 if (pcs_is_xyz) {
927 profile->toXYZD50.vals[0][0] = illuminant_X;
928 profile->toXYZD50.vals[1][1] = illuminant_Y;
929 profile->toXYZD50.vals[2][2] = illuminant_Z;
930 profile->has_toXYZD50 = true;
931 }
932 } else {
933 skcms_ICCTag rTRC, gTRC, bTRC;
934 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
935 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
936 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
937 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
938 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
939 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
940 // Malformed TRC tags
941 return false;
942 }
943 profile->has_trc = true;
944 }
945
946 skcms_ICCTag rXYZ, gXYZ, bXYZ;
947 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
948 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
949 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
950 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
951 // Malformed XYZ tags
952 return false;
953 }
954 profile->has_toXYZD50 = true;
955 }
956 }
957
958 skcms_ICCTag a2b_tag;
959
960 // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
961 // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
962 // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
963 // and all our known users are thinking exclusively in terms of relative colormetric.
964 const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
965 for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
966 if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
967 if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
968 // Malformed A2B tag
969 return false;
970 }
971 profile->has_A2B = true;
972 break;
973 }
974 }
975
976 return usable_as_src(profile);
977 }
978
979
skcms_sRGB_profile()980 const skcms_ICCProfile* skcms_sRGB_profile() {
981 static const skcms_ICCProfile sRGB_profile = {
982 nullptr, // buffer, moot here
983
984 0, // size, moot here
985 skcms_Signature_RGB, // data_color_space
986 skcms_Signature_XYZ, // pcs
987 0, // tag count, moot here
988
989 // We choose to represent sRGB with its canonical transfer function,
990 // and with its canonical XYZD50 gamut matrix.
991 true, // has_trc, followed by the 3 trc curves
992 {
993 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
994 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
995 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
996 },
997
998 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
999 {{
1000 { 0.436065674f, 0.385147095f, 0.143066406f },
1001 { 0.222488403f, 0.716873169f, 0.060607910f },
1002 { 0.013916016f, 0.097076416f, 0.714096069f },
1003 }},
1004
1005 false, // has_A2B, followed by a2b itself which we don't care about.
1006 {
1007 0,
1008 {
1009 {{0, {0,0, 0,0,0,0,0}}},
1010 {{0, {0,0, 0,0,0,0,0}}},
1011 {{0, {0,0, 0,0,0,0,0}}},
1012 {{0, {0,0, 0,0,0,0,0}}},
1013 },
1014 {0,0,0,0},
1015 nullptr,
1016 nullptr,
1017
1018 0,
1019 {
1020 {{0, {0,0, 0,0,0,0,0}}},
1021 {{0, {0,0, 0,0,0,0,0}}},
1022 {{0, {0,0, 0,0,0,0,0}}},
1023 },
1024 {{
1025 { 0,0,0,0 },
1026 { 0,0,0,0 },
1027 { 0,0,0,0 },
1028 }},
1029
1030 0,
1031 {
1032 {{0, {0,0, 0,0,0,0,0}}},
1033 {{0, {0,0, 0,0,0,0,0}}},
1034 {{0, {0,0, 0,0,0,0,0}}},
1035 },
1036 },
1037 };
1038 return &sRGB_profile;
1039 }
1040
skcms_XYZD50_profile()1041 const skcms_ICCProfile* skcms_XYZD50_profile() {
1042 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1043 static const skcms_ICCProfile XYZD50_profile = {
1044 nullptr, // buffer, moot here
1045
1046 0, // size, moot here
1047 skcms_Signature_RGB, // data_color_space
1048 skcms_Signature_XYZ, // pcs
1049 0, // tag count, moot here
1050
1051 true, // has_trc, followed by the 3 trc curves
1052 {
1053 {{0, {1,1, 0,0,0,0,0}}},
1054 {{0, {1,1, 0,0,0,0,0}}},
1055 {{0, {1,1, 0,0,0,0,0}}},
1056 },
1057
1058 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1059 {{
1060 { 1,0,0 },
1061 { 0,1,0 },
1062 { 0,0,1 },
1063 }},
1064
1065 false, // has_A2B, followed by a2b itself which we don't care about.
1066 {
1067 0,
1068 {
1069 {{0, {0,0, 0,0,0,0,0}}},
1070 {{0, {0,0, 0,0,0,0,0}}},
1071 {{0, {0,0, 0,0,0,0,0}}},
1072 {{0, {0,0, 0,0,0,0,0}}},
1073 },
1074 {0,0,0,0},
1075 nullptr,
1076 nullptr,
1077
1078 0,
1079 {
1080 {{0, {0,0, 0,0,0,0,0}}},
1081 {{0, {0,0, 0,0,0,0,0}}},
1082 {{0, {0,0, 0,0,0,0,0}}},
1083 },
1084 {{
1085 { 0,0,0,0 },
1086 { 0,0,0,0 },
1087 { 0,0,0,0 },
1088 }},
1089
1090 0,
1091 {
1092 {{0, {0,0, 0,0,0,0,0}}},
1093 {{0, {0,0, 0,0,0,0,0}}},
1094 {{0, {0,0, 0,0,0,0,0}}},
1095 },
1096 },
1097 };
1098
1099 return &XYZD50_profile;
1100 }
1101
skcms_sRGB_TransferFunction()1102 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1103 return &skcms_sRGB_profile()->trc[0].parametric;
1104 }
1105
skcms_sRGB_Inverse_TransferFunction()1106 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1107 static const skcms_TransferFunction sRGB_inv =
1108 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1109 return &sRGB_inv;
1110 }
1111
skcms_Identity_TransferFunction()1112 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1113 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1114 return &identity;
1115 }
1116
1117 const uint8_t skcms_252_random_bytes[] = {
1118 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1119 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1120 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1121 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1122 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1123 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1124 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1125 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1126 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1127 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1128 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1129 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1130 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1131 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1132 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1133 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1134 };
1135
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1136 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1137 // Test for exactly equal profiles first.
1138 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1139 return true;
1140 }
1141
1142 // For now this is the essentially the same strategy we use in test_only.c
1143 // for our skcms_Transform() smoke tests:
1144 // 1) transform A to XYZD50
1145 // 2) transform B to XYZD50
1146 // 3) return true if they're similar enough
1147 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1148
1149 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1150 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1151
1152 if (A->data_color_space != B->data_color_space) {
1153 return false;
1154 }
1155
1156 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1157 // TODO: working with RGBA_8888 either way is probably fastest.
1158 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1159 size_t npixels = 84;
1160 if (A->data_color_space == skcms_Signature_CMYK) {
1161 fmt = skcms_PixelFormat_RGBA_8888;
1162 npixels = 63;
1163 }
1164
1165 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1166 // use pre-canned results and skip that skcms_Transform() call?
1167 uint8_t dstA[252],
1168 dstB[252];
1169 if (!skcms_Transform(
1170 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1171 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1172 npixels)) {
1173 return false;
1174 }
1175 if (!skcms_Transform(
1176 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1177 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1178 npixels)) {
1179 return false;
1180 }
1181
1182 // TODO: make sure this final check has reasonable codegen.
1183 for (size_t i = 0; i < 252; i++) {
1184 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1185 return false;
1186 }
1187 }
1188 return true;
1189 }
1190
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1191 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1192 const skcms_TransferFunction* inv_tf) {
1193 if (!profile || !profile->has_trc) {
1194 return false;
1195 }
1196
1197 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1198 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1199 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1200 }
1201
is_zero_to_one(float x)1202 static bool is_zero_to_one(float x) {
1203 return 0 <= x && x <= 1;
1204 }
1205
1206 typedef struct { float vals[3]; } skcms_Vector3;
1207
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1208 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1209 skcms_Vector3 dst = {{0,0,0}};
1210 for (int row = 0; row < 3; ++row) {
1211 dst.vals[row] = m->vals[row][0] * v->vals[0]
1212 + m->vals[row][1] * v->vals[1]
1213 + m->vals[row][2] * v->vals[2];
1214 }
1215 return dst;
1216 }
1217
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1218 bool skcms_PrimariesToXYZD50(float rx, float ry,
1219 float gx, float gy,
1220 float bx, float by,
1221 float wx, float wy,
1222 skcms_Matrix3x3* toXYZD50) {
1223 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1224 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1225 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1226 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1227 !toXYZD50) {
1228 return false;
1229 }
1230
1231 // First, we need to convert xy values (primaries) to XYZ.
1232 skcms_Matrix3x3 primaries = {{
1233 { rx, gx, bx },
1234 { ry, gy, by },
1235 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1236 }};
1237 skcms_Matrix3x3 primaries_inv;
1238 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1239 return false;
1240 }
1241
1242 // Assumes that Y is 1.0f.
1243 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1244 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1245
1246 skcms_Matrix3x3 toXYZ = {{
1247 { XYZ.vals[0], 0, 0 },
1248 { 0, XYZ.vals[1], 0 },
1249 { 0, 0, XYZ.vals[2] },
1250 }};
1251 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1252
1253 // Now convert toXYZ matrix to toXYZD50.
1254 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1255
1256 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1257 // the matrices below. The Bradford method is used by Adobe and is widely considered
1258 // to be the best.
1259 skcms_Matrix3x3 xyz_to_lms = {{
1260 { 0.8951f, 0.2664f, -0.1614f },
1261 { -0.7502f, 1.7135f, 0.0367f },
1262 { 0.0389f, -0.0685f, 1.0296f },
1263 }};
1264 skcms_Matrix3x3 lms_to_xyz = {{
1265 { 0.9869929f, -0.1470543f, 0.1599627f },
1266 { 0.4323053f, 0.5183603f, 0.0492912f },
1267 { -0.0085287f, 0.0400428f, 0.9684867f },
1268 }};
1269
1270 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1271 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1272
1273 skcms_Matrix3x3 DXtoD50 = {{
1274 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1275 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1276 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1277 }};
1278 DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms);
1279 DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50);
1280
1281 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1282 return true;
1283 }
1284
1285
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1286 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1287 double a00 = src->vals[0][0],
1288 a01 = src->vals[1][0],
1289 a02 = src->vals[2][0],
1290 a10 = src->vals[0][1],
1291 a11 = src->vals[1][1],
1292 a12 = src->vals[2][1],
1293 a20 = src->vals[0][2],
1294 a21 = src->vals[1][2],
1295 a22 = src->vals[2][2];
1296
1297 double b0 = a00*a11 - a01*a10,
1298 b1 = a00*a12 - a02*a10,
1299 b2 = a01*a12 - a02*a11,
1300 b3 = a20,
1301 b4 = a21,
1302 b5 = a22;
1303
1304 double determinant = b0*b5
1305 - b1*b4
1306 + b2*b3;
1307
1308 if (determinant == 0) {
1309 return false;
1310 }
1311
1312 double invdet = 1.0 / determinant;
1313 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1314 return false;
1315 }
1316
1317 b0 *= invdet;
1318 b1 *= invdet;
1319 b2 *= invdet;
1320 b3 *= invdet;
1321 b4 *= invdet;
1322 b5 *= invdet;
1323
1324 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1325 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1326 dst->vals[2][0] = (float)( + b2 );
1327 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1328 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1329 dst->vals[2][1] = (float)( - b1 );
1330 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1331 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1332 dst->vals[2][2] = (float)( + b0 );
1333
1334 for (int r = 0; r < 3; ++r)
1335 for (int c = 0; c < 3; ++c) {
1336 if (!isfinitef_(dst->vals[r][c])) {
1337 return false;
1338 }
1339 }
1340 return true;
1341 }
1342
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1343 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1344 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1345 for (int r = 0; r < 3; r++)
1346 for (int c = 0; c < 3; c++) {
1347 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1348 + A->vals[r][1] * B->vals[1][c]
1349 + A->vals[r][2] * B->vals[2][c];
1350 }
1351 return m;
1352 }
1353
1354 #if defined(__clang__) || defined(__GNUC__)
1355 #define small_memcpy __builtin_memcpy
1356 #else
1357 #define small_memcpy memcpy
1358 #endif
1359
log2f_(float x)1360 static float log2f_(float x) {
1361 // The first approximation of log2(x) is its exponent 'e', minus 127.
1362 int32_t bits;
1363 small_memcpy(&bits, &x, sizeof(bits));
1364
1365 float e = (float)bits * (1.0f / (1<<23));
1366
1367 // If we use the mantissa too we can refine the error signficantly.
1368 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
1369 float m;
1370 small_memcpy(&m, &m_bits, sizeof(m));
1371
1372 return (e - 124.225514990f
1373 - 1.498030302f*m
1374 - 1.725879990f/(0.3520887068f + m));
1375 }
1376
exp2f_(float x)1377 static float exp2f_(float x) {
1378 float fract = x - floorf_(x);
1379
1380 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
1381 - 1.490129070f*fract
1382 + 27.728023300f/(4.84252568f - fract));
1383 if (fbits > INT_MAX) {
1384 return INFINITY_;
1385 } else if (fbits < INT_MIN) {
1386 return -INFINITY_;
1387 }
1388 int32_t bits = (int32_t)fbits;
1389 small_memcpy(&x, &bits, sizeof(x));
1390 return x;
1391 }
1392
powf_(float x,float y)1393 float powf_(float x, float y) {
1394 return (x == 0) || (x == 1) ? x
1395 : exp2f_(log2f_(x) * y);
1396 }
1397
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)1398 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
1399 float sign = x < 0 ? -1.0f : 1.0f;
1400 x *= sign;
1401
1402 return sign * (x < tf->d ? tf->c * x + tf->f
1403 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
1404 }
1405
1406 #if defined(__clang__)
1407 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by tf_is_valid() on the way out.
1408 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1409 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1410 if (!tf_is_valid(src)) {
1411 return false;
1412 }
1413
1414 // We're inverting this function, solving for x in terms of y.
1415 // y = (cx + f) x < d
1416 // (ax + b)^g + e x ≥ d
1417 // The inverse of this function can be expressed in the same piecewise form.
1418 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1419
1420 // We'll start by finding the new threshold inv.d.
1421 // In principle we should be able to find that by solving for y at x=d from either side.
1422 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1423 float d_l = src->c * src->d + src->f,
1424 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1425 if (fabsf_(d_l - d_r) > 1/512.0f) {
1426 return false;
1427 }
1428 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1429
1430 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1431 if (inv.d > 0) {
1432 // Inverting the linear section is pretty straightfoward:
1433 // y = cx + f
1434 // y - f = cx
1435 // (1/c)y - f/c = x
1436 inv.c = 1.0f/src->c;
1437 inv.f = -src->f/src->c;
1438 }
1439
1440 // The interesting part is inverting the nonlinear section:
1441 // y = (ax + b)^g + e.
1442 // y - e = (ax + b)^g
1443 // (y - e)^1/g = ax + b
1444 // (y - e)^1/g - b = ax
1445 // (1/a)(y - e)^1/g - b/a = x
1446 //
1447 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1448 // let k = (1/a)^g
1449 // (1/a)( y - e)^1/g - b/a = x
1450 // (ky - ke)^1/g - b/a = x
1451
1452 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1453 inv.g = 1.0f / src->g;
1454 inv.a = k;
1455 inv.b = -k * src->e;
1456 inv.e = -src->b / src->a;
1457
1458 // Now in principle we're done.
1459 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f,
1460 // we'll tweak e. These two values should be close to each other,
1461 // just down to numerical precision issues, especially from powf_.
1462 float s = powf_(src->a + src->b, src->g) + src->e;
1463 inv.e = 1.0f - powf_(inv.a * s + inv.b, inv.g);
1464
1465 *dst = inv;
1466 return tf_is_valid(dst);
1467 }
1468
1469 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1470
1471 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1472 //
1473 // tf(x) = cx + f x < d
1474 // tf(x) = (ax + b)^g + e x ≥ d
1475 //
1476 // When fitting, we add the additional constraint that both pieces meet at d:
1477 //
1478 // cd + f = (ad + b)^g + e
1479 //
1480 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1481 //
1482 // tf(x) = cx + f x < d
1483 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1484 //
1485 // Our overall strategy is then:
1486 // For a couple tolerances,
1487 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1488 // - invert c,d,f
1489 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1490 // (and by constraint, inverted e) to the inverse of the table.
1491 // Return the parameters with least maximum error.
1492 //
1493 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1494 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1495 //
1496 // let y = Table(x)
1497 // r(x) = x - f_inv(y)
1498 //
1499 // ∂r/∂g = ln(ay + b)*(ay + b)^g
1500 // - ln(ad + b)*(ad + b)^g
1501 // ∂r/∂a = yg(ay + b)^(g-1)
1502 // - dg(ad + b)^(g-1)
1503 // ∂r/∂b = g(ay + b)^(g-1)
1504 // - g(ad + b)^(g-1)
1505
1506 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1507 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,const float P[3],float dfdP[3])1508 static float rg_nonlinear(float x,
1509 const skcms_Curve* curve,
1510 const skcms_TransferFunction* tf,
1511 const float P[3],
1512 float dfdP[3]) {
1513 const float y = eval_curve(curve, x);
1514
1515 const float g = P[0], a = P[1], b = P[2],
1516 c = tf->c, d = tf->d, f = tf->f;
1517
1518 const float Y = fmaxf_(a*y + b, 0.0f),
1519 D = a*d + b;
1520 assert (D >= 0);
1521
1522 // The gradient.
1523 dfdP[0] = 0.69314718f*log2f_(Y)*powf_(Y, g)
1524 - 0.69314718f*log2f_(D)*powf_(D, g);
1525 dfdP[1] = y*g*powf_(Y, g-1)
1526 - d*g*powf_(D, g-1);
1527 dfdP[2] = g*powf_(Y, g-1)
1528 - g*powf_(D, g-1);
1529
1530 // The residual.
1531 const float f_inv = powf_(Y, g)
1532 - powf_(D, g)
1533 + c*d + f;
1534 return x - f_inv;
1535 }
1536
gauss_newton_step(const skcms_Curve * curve,const skcms_TransferFunction * tf,float P[3],float x0,float dx,int N)1537 static bool gauss_newton_step(const skcms_Curve* curve,
1538 const skcms_TransferFunction* tf,
1539 float P[3],
1540 float x0, float dx, int N) {
1541 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1542 //
1543 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1544 // where r(P) is the residual vector
1545 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
1546 //
1547 // Let's review the shape of each of these expressions:
1548 // r(P) is [N x 1], a column vector with one entry per value of x tested
1549 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
1550 // Jf^T is [3 x N], the transpose of Jf
1551 //
1552 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1553 // and so is its inverse (Jf^T Jf)^-1
1554 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1555 //
1556 // Our implementation strategy to get to the final ∆P is
1557 // 1) evaluate Jf^T Jf, call that lhs
1558 // 2) evaluate Jf^T r(P), call that rhs
1559 // 3) invert lhs
1560 // 4) multiply inverse lhs by rhs
1561 //
1562 // This is a friendly implementation strategy because we don't have to have any
1563 // buffers that scale with N, and equally nice don't have to perform any matrix
1564 // operations that are variable size.
1565 //
1566 // Other implementation strategies could trade this off, e.g. evaluating the
1567 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1568 // the residuals. That would probably require implementing singular value
1569 // decomposition, and would create a [3 x N] matrix to be multiplied by the
1570 // [N x 1] residual vector, but on the upside I think that'd eliminate the
1571 // possibility of this gauss_newton_step() function ever failing.
1572
1573 // 0) start off with lhs and rhs safely zeroed.
1574 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1575 skcms_Vector3 rhs = { {0,0,0} };
1576
1577 // 1,2) evaluate lhs and evaluate rhs
1578 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1579 // so we'll have to update lhs and rhs at the same time.
1580 for (int i = 0; i < N; i++) {
1581 float x = x0 + i*dx;
1582
1583 float dfdP[3] = {0,0,0};
1584 float resid = rg_nonlinear(x,curve,tf,P, dfdP);
1585
1586 for (int r = 0; r < 3; r++) {
1587 for (int c = 0; c < 3; c++) {
1588 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1589 }
1590 rhs.vals[r] += dfdP[r] * resid;
1591 }
1592 }
1593
1594 // If any of the 3 P parameters are unused, this matrix will be singular.
1595 // Detect those cases and fix them up to indentity instead, so we can invert.
1596 for (int k = 0; k < 3; k++) {
1597 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1598 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1599 lhs.vals[k][k] = 1;
1600 }
1601 }
1602
1603 // 3) invert lhs
1604 skcms_Matrix3x3 lhs_inv;
1605 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1606 return false;
1607 }
1608
1609 // 4) multiply inverse lhs by rhs
1610 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1611 P[0] += dP.vals[0];
1612 P[1] += dP.vals[1];
1613 P[2] += dP.vals[2];
1614 return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]);
1615 }
1616
1617
1618 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1619 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1620 float P[3] = { tf->g, tf->a, tf->b };
1621
1622 // No matter where we start, dx should always represent N even steps from 0 to 1.
1623 const float dx = 1.0f / (N-1);
1624
1625 for (int j = 0; j < 3/*TODO: tune*/; j++) {
1626 // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
1627 // We don't really know how to fix up a if it goes negative.
1628 if (P[1] < 0) {
1629 return false;
1630 }
1631 // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
1632 if (P[1] * tf->d + P[2] < 0) {
1633 P[2] = -P[1] * tf->d;
1634 }
1635 assert (P[1] >= 0 &&
1636 P[1] * tf->d + P[2] >= 0);
1637
1638 if (!gauss_newton_step(curve, tf,
1639 P,
1640 L*dx, dx, N-L)) {
1641 return false;
1642 }
1643 }
1644
1645 // We need to apply our fixups one last time
1646 if (P[1] < 0) {
1647 return false;
1648 }
1649 if (P[1] * tf->d + P[2] < 0) {
1650 P[2] = -P[1] * tf->d;
1651 }
1652
1653 tf->g = P[0];
1654 tf->a = P[1];
1655 tf->b = P[2];
1656 tf->e = tf->c*tf->d + tf->f
1657 - powf_(tf->a*tf->d + tf->b, tf->g);
1658 return true;
1659 }
1660
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1661 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1662 skcms_TransferFunction* approx,
1663 float* max_error) {
1664 if (!curve || !approx || !max_error) {
1665 return false;
1666 }
1667
1668 if (curve->table_entries == 0) {
1669 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1670 return false;
1671 }
1672
1673 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1674 // We need at least two points, and must put some reasonable cap on the maximum number.
1675 return false;
1676 }
1677
1678 int N = (int)curve->table_entries;
1679 const float dx = 1.0f / (N - 1);
1680
1681 *max_error = INFINITY_;
1682 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1683 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1684 skcms_TransferFunction tf,
1685 tf_inv;
1686
1687 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1688 tf.f = 0.0f;
1689 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1690
1691 if (L == N) {
1692 // If the entire data set was linear, move the coefficients to the nonlinear portion
1693 // with G == 1. This lets use a canonical representation with d == 0.
1694 tf.g = 1;
1695 tf.a = tf.c;
1696 tf.b = tf.f;
1697 tf.c = tf.d = tf.e = tf.f = 0;
1698 } else if (L == N - 1) {
1699 // Degenerate case with only two points in the nonlinear segment. Solve directly.
1700 tf.g = 1;
1701 tf.a = (eval_curve(curve, (N-1)*dx) -
1702 eval_curve(curve, (N-2)*dx))
1703 / dx;
1704 tf.b = eval_curve(curve, (N-2)*dx)
1705 - tf.a * (N-2)*dx;
1706 tf.e = 0;
1707 } else {
1708 // Start by guessing a gamma-only curve through the midpoint.
1709 int mid = (L + N) / 2;
1710 float mid_x = mid / (N - 1.0f);
1711 float mid_y = eval_curve(curve, mid_x);
1712 tf.g = log2f_(mid_y) / log2f_(mid_x);;
1713 tf.a = 1;
1714 tf.b = 0;
1715 tf.e = tf.c*tf.d + tf.f
1716 - powf_(tf.a*tf.d + tf.b, tf.g);
1717
1718
1719 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1720 !fit_nonlinear(curve, L,N, &tf_inv)) {
1721 continue;
1722 }
1723
1724 // We fit tf_inv, so calculate tf to keep in sync.
1725 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1726 continue;
1727 }
1728 }
1729
1730 // We find our error by roundtripping the table through tf_inv.
1731 //
1732 // (The most likely use case for this approximation is to be inverted and
1733 // used as the transfer function for a destination color space.)
1734 //
1735 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1736 // invertible, so re-verify that here (and use the new inverse for testing).
1737 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1738 continue;
1739 }
1740
1741 float err = max_roundtrip_error(curve, &tf_inv);
1742 if (*max_error > err) {
1743 *max_error = err;
1744 *approx = tf;
1745 }
1746 }
1747 return isfinitef_(*max_error);
1748 }
1749
1750 // ~~~~ Impl. of skcms_Transform() ~~~~
1751
1752 typedef enum {
1753 Op_load_a8,
1754 Op_load_g8,
1755 Op_load_8888_palette8,
1756 Op_load_4444,
1757 Op_load_565,
1758 Op_load_888,
1759 Op_load_8888,
1760 Op_load_1010102,
1761 Op_load_161616LE,
1762 Op_load_16161616LE,
1763 Op_load_161616BE,
1764 Op_load_16161616BE,
1765 Op_load_hhh,
1766 Op_load_hhhh,
1767 Op_load_fff,
1768 Op_load_ffff,
1769
1770 Op_swap_rb,
1771 Op_clamp,
1772 Op_invert,
1773 Op_force_opaque,
1774 Op_premul,
1775 Op_unpremul,
1776 Op_matrix_3x3,
1777 Op_matrix_3x4,
1778 Op_lab_to_xyz,
1779
1780 Op_tf_r,
1781 Op_tf_g,
1782 Op_tf_b,
1783 Op_tf_a,
1784
1785 Op_table_r,
1786 Op_table_g,
1787 Op_table_b,
1788 Op_table_a,
1789
1790 Op_clut,
1791
1792 Op_store_a8,
1793 Op_store_g8,
1794 Op_store_4444,
1795 Op_store_565,
1796 Op_store_888,
1797 Op_store_8888,
1798 Op_store_1010102,
1799 Op_store_161616LE,
1800 Op_store_16161616LE,
1801 Op_store_161616BE,
1802 Op_store_16161616BE,
1803 Op_store_hhh,
1804 Op_store_hhhh,
1805 Op_store_fff,
1806 Op_store_ffff,
1807 } Op;
1808
1809 // Without this wasm would try to use the N=4 128-bit vector code path,
1810 // which while ideal, causes tons of compiler problems. This would be
1811 // a good thing to revisit as emcc matures (currently 1.38.5).
1812 #if 1 && defined(__EMSCRIPTEN_major__)
1813 #if !defined(SKCMS_PORTABLE)
1814 #define SKCMS_PORTABLE
1815 #endif
1816 #endif
1817
1818 #if defined(__clang__)
1819 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
1820 #elif defined(__GNUC__)
1821 // For some reason GCC accepts this nonsense, but not the more straightforward version,
1822 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
1823 template <int N, typename T>
1824 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
1825
1826 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
1827 #endif
1828
1829 // First, instantiate our default exec_ops() implementation using the default compiliation target.
1830
1831 namespace baseline {
1832 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__))
1833 #define N 1
1834 using F = float;
1835 using U64 = uint64_t;
1836 using U32 = uint32_t;
1837 using I32 = int32_t;
1838 using U16 = uint16_t;
1839 using U8 = uint8_t;
1840
1841 #elif defined(__AVX512F__)
1842 #define N 16
1843 using F = Vec<N,float>;
1844 using I32 = Vec<N,int32_t>;
1845 using U64 = Vec<N,uint64_t>;
1846 using U32 = Vec<N,uint32_t>;
1847 using U16 = Vec<N,uint16_t>;
1848 using U8 = Vec<N,uint8_t>;
1849 #elif defined(__AVX__)
1850 #define N 8
1851 using F = Vec<N,float>;
1852 using I32 = Vec<N,int32_t>;
1853 using U64 = Vec<N,uint64_t>;
1854 using U32 = Vec<N,uint32_t>;
1855 using U16 = Vec<N,uint16_t>;
1856 using U8 = Vec<N,uint8_t>;
1857 #else
1858 #define N 4
1859 using F = Vec<N,float>;
1860 using I32 = Vec<N,int32_t>;
1861 using U64 = Vec<N,uint64_t>;
1862 using U32 = Vec<N,uint32_t>;
1863 using U16 = Vec<N,uint16_t>;
1864 using U8 = Vec<N,uint8_t>;
1865 #endif
1866
1867 #include "src/Transform_inl.h"
1868 #undef N
1869 }
1870
1871 // Now, instantiate any other versions of run_program() we may want for runtime detection.
1872 #if !defined(SKCMS_PORTABLE) && \
1873 (( defined(__clang__) && __clang_major__ >= 5) || \
1874 (!defined(__clang__) && defined(__GNUC__))) \
1875 && defined(__x86_64__) && !defined(__AVX2__)
1876
1877 #if defined(__clang__)
1878 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
1879 #elif defined(__GNUC__)
1880 #pragma GCC push_options
1881 #pragma GCC target("avx2,f16c")
1882 #endif
1883
1884 namespace hsw {
1885 #define USING_AVX
1886 #define USING_AVX_F16C
1887 #define USING_AVX2
1888 #define N 8
1889 using F = Vec<N,float>;
1890 using I32 = Vec<N,int32_t>;
1891 using U64 = Vec<N,uint64_t>;
1892 using U32 = Vec<N,uint32_t>;
1893 using U16 = Vec<N,uint16_t>;
1894 using U8 = Vec<N,uint8_t>;
1895
1896 #include "src/Transform_inl.h"
1897
1898 // src/Transform_inl.h will undefine USING_* for us.
1899 #undef N
1900 }
1901
1902 #if defined(__clang__)
1903 #pragma clang attribute pop
1904 #elif defined(__GNUC__)
1905 #pragma GCC pop_options
1906 #endif
1907
1908 #define TEST_FOR_HSW
1909
hsw_ok()1910 static bool hsw_ok() {
1911 static const bool ok = []{
1912 // See http://www.sandpile.org/x86/cpuid.htm
1913
1914 // First, a basic cpuid(1).
1915 uint32_t eax, ebx, ecx, edx;
1916 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1917 : "0"(1), "2"(0));
1918
1919 // Sanity check for prerequisites.
1920 if ((edx & (1<<25)) != (1<<25)) { return false; } // SSE
1921 if ((edx & (1<<26)) != (1<<26)) { return false; } // SSE2
1922 if ((ecx & (1<< 0)) != (1<< 0)) { return false; } // SSE3
1923 if ((ecx & (1<< 9)) != (1<< 9)) { return false; } // SSSE3
1924 if ((ecx & (1<<19)) != (1<<19)) { return false; } // SSE4.1
1925 if ((ecx & (1<<20)) != (1<<20)) { return false; } // SSE4.2
1926
1927 if ((ecx & (3<<26)) != (3<<26)) { return false; } // XSAVE + OSXSAVE
1928
1929 {
1930 uint32_t eax_xgetbv, edx_xgetbv;
1931 __asm__ __volatile__("xgetbv" : "=a"(eax_xgetbv), "=d"(edx_xgetbv) : "c"(0));
1932 if ((eax_xgetbv & (3<<1)) != (3<<1)) { return false; } // XMM+YMM state saved?
1933 }
1934
1935 if ((ecx & (1<<28)) != (1<<28)) { return false; } // AVX
1936 if ((ecx & (1<<29)) != (1<<29)) { return false; } // F16C
1937 if ((ecx & (1<<12)) != (1<<12)) { return false; } // FMA (TODO: not currently used)
1938
1939 // Call cpuid(7) to check for our final AVX2 feature bit!
1940 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1941 : "0"(7), "2"(0));
1942 if ((ebx & (1<< 5)) != (1<< 5)) { return false; } // AVX2
1943
1944 return true;
1945 }();
1946
1947 return ok;
1948 }
1949
1950 #endif
1951
is_identity_tf(const skcms_TransferFunction * tf)1952 static bool is_identity_tf(const skcms_TransferFunction* tf) {
1953 return tf->g == 1 && tf->a == 1
1954 && tf->b == 0 && tf->c == 0 && tf->d == 0 && tf->e == 0 && tf->f == 0;
1955 }
1956
1957 typedef struct {
1958 Op op;
1959 const void* arg;
1960 } OpAndArg;
1961
select_curve_op(const skcms_Curve * curve,int channel)1962 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
1963 static const struct { Op parametric, table; } ops[] = {
1964 { Op_tf_r, Op_table_r },
1965 { Op_tf_g, Op_table_g },
1966 { Op_tf_b, Op_table_b },
1967 { Op_tf_a, Op_table_a },
1968 };
1969
1970 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
1971
1972 if (curve->table_entries == 0) {
1973 return is_identity_tf(&curve->parametric)
1974 ? noop
1975 : OpAndArg{ ops[channel].parametric, &curve->parametric };
1976 }
1977
1978 return OpAndArg{ ops[channel].table, curve };
1979 }
1980
bytes_per_pixel(skcms_PixelFormat fmt)1981 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
1982 switch (fmt >> 1) { // ignore rgb/bgr
1983 case skcms_PixelFormat_A_8 >> 1: return 1;
1984 case skcms_PixelFormat_G_8 >> 1: return 1;
1985 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
1986 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
1987 case skcms_PixelFormat_RGB_565 >> 1: return 2;
1988 case skcms_PixelFormat_RGB_888 >> 1: return 3;
1989 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
1990 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
1991 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
1992 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
1993 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
1994 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
1995 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
1996 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
1997 case skcms_PixelFormat_RGB_fff >> 1: return 12;
1998 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
1999 }
2000 assert(false);
2001 return 0;
2002 }
2003
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2004 static bool prep_for_destination(const skcms_ICCProfile* profile,
2005 skcms_Matrix3x3* fromXYZD50,
2006 skcms_TransferFunction* invR,
2007 skcms_TransferFunction* invG,
2008 skcms_TransferFunction* invB) {
2009 // We only support destinations with parametric transfer functions
2010 // and with gamuts that can be transformed from XYZD50.
2011 return profile->has_trc
2012 && profile->has_toXYZD50
2013 && profile->trc[0].table_entries == 0
2014 && profile->trc[1].table_entries == 0
2015 && profile->trc[2].table_entries == 0
2016 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2017 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2018 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2019 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2020 }
2021
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2022 bool skcms_Transform(const void* src,
2023 skcms_PixelFormat srcFmt,
2024 skcms_AlphaFormat srcAlpha,
2025 const skcms_ICCProfile* srcProfile,
2026 void* dst,
2027 skcms_PixelFormat dstFmt,
2028 skcms_AlphaFormat dstAlpha,
2029 const skcms_ICCProfile* dstProfile,
2030 size_t npixels) {
2031 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2032 dst, dstFmt, dstAlpha, dstProfile,
2033 npixels, nullptr);
2034 }
2035
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2036 bool skcms_TransformWithPalette(const void* src,
2037 skcms_PixelFormat srcFmt,
2038 skcms_AlphaFormat srcAlpha,
2039 const skcms_ICCProfile* srcProfile,
2040 void* dst,
2041 skcms_PixelFormat dstFmt,
2042 skcms_AlphaFormat dstAlpha,
2043 const skcms_ICCProfile* dstProfile,
2044 size_t nz,
2045 const void* palette) {
2046 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2047 src_bpp = bytes_per_pixel(srcFmt);
2048 // Let's just refuse if the request is absurdly big.
2049 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2050 return false;
2051 }
2052 int n = (int)nz;
2053
2054 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2055 if (!srcProfile) {
2056 srcProfile = skcms_sRGB_profile();
2057 }
2058 if (!dstProfile) {
2059 dstProfile = skcms_sRGB_profile();
2060 }
2061
2062 // We can't transform in place unless the PixelFormats are the same size.
2063 if (dst == src && dst_bpp != src_bpp) {
2064 return false;
2065 }
2066 // TODO: more careful alias rejection (like, dst == src + 1)?
2067
2068 if (needs_palette(srcFmt) && !palette) {
2069 return false;
2070 }
2071
2072 Op program [32];
2073 const void* arguments[32];
2074
2075 Op* ops = program;
2076 const void** args = arguments;
2077
2078 skcms_TransferFunction inv_dst_tf_r, inv_dst_tf_g, inv_dst_tf_b;
2079 skcms_Matrix3x3 from_xyz;
2080
2081 switch (srcFmt >> 1) {
2082 default: return false;
2083 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2084 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2085 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2086 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2087 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2088 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2089 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2090 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2091 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2092 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2093 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2094 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2095 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2096 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2097 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2098
2099 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2100 *args++ = palette;
2101 break;
2102 }
2103 if (srcFmt & 1) {
2104 *ops++ = Op_swap_rb;
2105 }
2106 skcms_ICCProfile gray_dst_profile;
2107 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2108 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2109 // luminance (Y) by the destination transfer function.
2110 gray_dst_profile = *dstProfile;
2111 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2112 dstProfile = &gray_dst_profile;
2113 }
2114
2115 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2116 // Photoshop creates CMYK images as inverse CMYK.
2117 // These happen to be the only ones we've _ever_ seen.
2118 *ops++ = Op_invert;
2119 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2120 srcAlpha = skcms_AlphaFormat_Unpremul;
2121 }
2122
2123 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2124 *ops++ = Op_force_opaque;
2125 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2126 *ops++ = Op_unpremul;
2127 }
2128
2129 if (dstProfile != srcProfile) {
2130
2131 if (!prep_for_destination(dstProfile,
2132 &from_xyz, &inv_dst_tf_r, &inv_dst_tf_b, &inv_dst_tf_g)) {
2133 return false;
2134 }
2135
2136 if (srcProfile->has_A2B) {
2137 if (srcProfile->A2B.input_channels) {
2138 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2139 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2140 if (oa.arg) {
2141 *ops++ = oa.op;
2142 *args++ = oa.arg;
2143 }
2144 }
2145 *ops++ = Op_clamp;
2146 *ops++ = Op_clut;
2147 *args++ = &srcProfile->A2B;
2148 }
2149
2150 if (srcProfile->A2B.matrix_channels == 3) {
2151 for (int i = 0; i < 3; i++) {
2152 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2153 if (oa.arg) {
2154 *ops++ = oa.op;
2155 *args++ = oa.arg;
2156 }
2157 }
2158
2159 static const skcms_Matrix3x4 I = {{
2160 {1,0,0,0},
2161 {0,1,0,0},
2162 {0,0,1,0},
2163 }};
2164 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2165 *ops++ = Op_matrix_3x4;
2166 *args++ = &srcProfile->A2B.matrix;
2167 }
2168 }
2169
2170 if (srcProfile->A2B.output_channels == 3) {
2171 for (int i = 0; i < 3; i++) {
2172 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2173 if (oa.arg) {
2174 *ops++ = oa.op;
2175 *args++ = oa.arg;
2176 }
2177 }
2178 }
2179
2180 if (srcProfile->pcs == skcms_Signature_Lab) {
2181 *ops++ = Op_lab_to_xyz;
2182 }
2183
2184 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2185 for (int i = 0; i < 3; i++) {
2186 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2187 if (oa.arg) {
2188 *ops++ = oa.op;
2189 *args++ = oa.arg;
2190 }
2191 }
2192 } else {
2193 return false;
2194 }
2195
2196 // A2B sources should already be in XYZD50 at this point.
2197 // Others still need to be transformed using their toXYZD50 matrix.
2198 // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2199 // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2200 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2201 static const skcms_Matrix3x3 I = {{
2202 { 1.0f, 0.0f, 0.0f },
2203 { 0.0f, 1.0f, 0.0f },
2204 { 0.0f, 0.0f, 1.0f },
2205 }};
2206 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2207
2208 // There's a chance the source and destination gamuts are identical,
2209 // in which case we can skip the gamut transform.
2210 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2211 // Concat the entire gamut transform into from_xyz,
2212 // now slightly misnamed but it's a handy spot to stash the result.
2213 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2214 *ops++ = Op_matrix_3x3;
2215 *args++ = &from_xyz;
2216 }
2217
2218 // Encode back to dst RGB using its parametric transfer functions.
2219 if (!is_identity_tf(&inv_dst_tf_r)) { *ops++ = Op_tf_r; *args++ = &inv_dst_tf_r; }
2220 if (!is_identity_tf(&inv_dst_tf_g)) { *ops++ = Op_tf_g; *args++ = &inv_dst_tf_g; }
2221 if (!is_identity_tf(&inv_dst_tf_b)) { *ops++ = Op_tf_b; *args++ = &inv_dst_tf_b; }
2222 }
2223
2224 // Clamp here before premul to make sure we're clamping to fixed-point values _and_ gamut,
2225 // not just to values that fit in the fixed point representation.
2226 //
2227 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2228 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2229 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2230 *ops++ = Op_clamp;
2231 }
2232 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2233 *ops++ = Op_force_opaque;
2234 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2235 *ops++ = Op_premul;
2236 }
2237 if (dstFmt & 1) {
2238 *ops++ = Op_swap_rb;
2239 }
2240 switch (dstFmt >> 1) {
2241 default: return false;
2242 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2243 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2244 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2245 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2246 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2247 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2248 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2249 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2250 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2251 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2252 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2253 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2254 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2255 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2256 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2257 }
2258
2259 auto run = baseline::run_program;
2260 #if defined(TEST_FOR_HSW)
2261 if (hsw_ok()) { run = hsw::run_program; }
2262 #endif
2263 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2264 return true;
2265 }
2266
assert_usable_as_destination(const skcms_ICCProfile * profile)2267 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2268 #if defined(NDEBUG)
2269 (void)profile;
2270 #else
2271 skcms_Matrix3x3 fromXYZD50;
2272 skcms_TransferFunction invR, invG, invB;
2273 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2274 #endif
2275 }
2276
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2277 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2278 skcms_Matrix3x3 fromXYZD50;
2279 if (!profile->has_trc || !profile->has_toXYZD50
2280 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2281 return false;
2282 }
2283
2284 skcms_TransferFunction tf[3];
2285 for (int i = 0; i < 3; i++) {
2286 skcms_TransferFunction inv;
2287 if (profile->trc[i].table_entries == 0
2288 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2289 tf[i] = profile->trc[i].parametric;
2290 continue;
2291 }
2292
2293 float max_error;
2294 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2295 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2296 return false;
2297 }
2298 }
2299
2300 for (int i = 0; i < 3; ++i) {
2301 profile->trc[i].table_entries = 0;
2302 profile->trc[i].parametric = tf[i];
2303 }
2304
2305 assert_usable_as_destination(profile);
2306 return true;
2307 }
2308
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2309 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2310 // Operate on a copy of profile, so we can choose the best TF for the original curves
2311 skcms_ICCProfile result = *profile;
2312 if (!skcms_MakeUsableAsDestination(&result)) {
2313 return false;
2314 }
2315
2316 int best_tf = 0;
2317 float min_max_error = INFINITY_;
2318 for (int i = 0; i < 3; i++) {
2319 skcms_TransferFunction inv;
2320 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2321 return false;
2322 }
2323
2324 float err = 0;
2325 for (int j = 0; j < 3; ++j) {
2326 err = fmaxf_(err, max_roundtrip_error(&profile->trc[j], &inv));
2327 }
2328 if (min_max_error > err) {
2329 min_max_error = err;
2330 best_tf = i;
2331 }
2332 }
2333
2334 for (int i = 0; i < 3; i++) {
2335 result.trc[i].parametric = result.trc[best_tf].parametric;
2336 }
2337
2338 *profile = result;
2339 assert_usable_as_destination(profile);
2340 return true;
2341 }
2342