• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* vim: set ts=8 sw=8 noexpandtab: */
2 //  qcms
3 //  Copyright (C) 2009 Mozilla Corporation
4 //  Copyright (C) 1998-2007 Marti Maria
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining
7 // a copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the Software
11 // is furnished to do so, subject to the following conditions:
12 //
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 
24 #include <stdlib.h>
25 #include <math.h>
26 #include <assert.h>
27 #include <string.h> //memcpy
28 #include "qcmsint.h"
29 #include "chain.h"
30 #include "matrix.h"
31 #include "transform_util.h"
32 
33 /* for MSVC, GCC, Intel, and Sun compilers */
34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
35 #define X86
36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
37 
38 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
39 // This is just an approximation, I am not handling all the non-linear
40 // aspects of the RGB to XYZ process, and assumming that the gamma correction
41 // has transitive property in the tranformation chain.
42 //
43 // the alghoritm:
44 //
45 //            - First I build the absolute conversion matrix using
46 //              primaries in XYZ. This matrix is next inverted
47 //            - Then I eval the source white point across this matrix
48 //              obtaining the coeficients of the transformation
49 //            - Then, I apply these coeficients to the original matrix
build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white,qcms_CIE_xyYTRIPLE primrs)50 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
51 {
52 	struct matrix primaries;
53 	struct matrix primaries_invert;
54 	struct matrix result;
55 	struct vector white_point;
56 	struct vector coefs;
57 
58 	double xn, yn;
59 	double xr, yr;
60 	double xg, yg;
61 	double xb, yb;
62 
63 	xn = white.x;
64 	yn = white.y;
65 
66 	if (yn == 0.0)
67 		return matrix_invalid();
68 
69 	xr = primrs.red.x;
70 	yr = primrs.red.y;
71 	xg = primrs.green.x;
72 	yg = primrs.green.y;
73 	xb = primrs.blue.x;
74 	yb = primrs.blue.y;
75 
76 	primaries.m[0][0] = xr;
77 	primaries.m[0][1] = xg;
78 	primaries.m[0][2] = xb;
79 
80 	primaries.m[1][0] = yr;
81 	primaries.m[1][1] = yg;
82 	primaries.m[1][2] = yb;
83 
84 	primaries.m[2][0] = 1 - xr - yr;
85 	primaries.m[2][1] = 1 - xg - yg;
86 	primaries.m[2][2] = 1 - xb - yb;
87 	primaries.invalid = false;
88 
89 	white_point.v[0] = xn/yn;
90 	white_point.v[1] = 1.;
91 	white_point.v[2] = (1.0-xn-yn)/yn;
92 
93 	primaries_invert = matrix_invert(primaries);
94 
95 	coefs = matrix_eval(primaries_invert, white_point);
96 
97 	result.m[0][0] = coefs.v[0]*xr;
98 	result.m[0][1] = coefs.v[1]*xg;
99 	result.m[0][2] = coefs.v[2]*xb;
100 
101 	result.m[1][0] = coefs.v[0]*yr;
102 	result.m[1][1] = coefs.v[1]*yg;
103 	result.m[1][2] = coefs.v[2]*yb;
104 
105 	result.m[2][0] = coefs.v[0]*(1.-xr-yr);
106 	result.m[2][1] = coefs.v[1]*(1.-xg-yg);
107 	result.m[2][2] = coefs.v[2]*(1.-xb-yb);
108 	result.invalid = primaries_invert.invalid;
109 
110 	return result;
111 }
112 
113 struct CIE_XYZ {
114 	double X;
115 	double Y;
116 	double Z;
117 };
118 
119 /* CIE Illuminant D50 */
120 static const struct CIE_XYZ D50_XYZ = {
121 	0.9642,
122 	1.0000,
123 	0.8249
124 };
125 
126 /* from lcms: xyY2XYZ()
127  * corresponds to argyll: icmYxy2XYZ() */
xyY2XYZ(qcms_CIE_xyY source)128 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
129 {
130 	struct CIE_XYZ dest;
131 	dest.X = (source.x / source.y) * source.Y;
132 	dest.Y = source.Y;
133 	dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
134 	return dest;
135 }
136 
137 /* from lcms: ComputeChromaticAdaption */
138 // Compute chromatic adaption matrix using chad as cone matrix
139 static struct matrix
compute_chromatic_adaption(struct CIE_XYZ source_white_point,struct CIE_XYZ dest_white_point,struct matrix chad)140 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
141                            struct CIE_XYZ dest_white_point,
142                            struct matrix chad)
143 {
144 	struct matrix chad_inv;
145 	struct vector cone_source_XYZ, cone_source_rgb;
146 	struct vector cone_dest_XYZ, cone_dest_rgb;
147 	struct matrix cone, tmp;
148 
149 	tmp = chad;
150 	chad_inv = matrix_invert(tmp);
151 
152 	cone_source_XYZ.v[0] = source_white_point.X;
153 	cone_source_XYZ.v[1] = source_white_point.Y;
154 	cone_source_XYZ.v[2] = source_white_point.Z;
155 
156 	cone_dest_XYZ.v[0] = dest_white_point.X;
157 	cone_dest_XYZ.v[1] = dest_white_point.Y;
158 	cone_dest_XYZ.v[2] = dest_white_point.Z;
159 
160 	cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
161 	cone_dest_rgb   = matrix_eval(chad, cone_dest_XYZ);
162 
163 	cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
164 	cone.m[0][1] = 0;
165 	cone.m[0][2] = 0;
166 	cone.m[1][0] = 0;
167 	cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
168 	cone.m[1][2] = 0;
169 	cone.m[2][0] = 0;
170 	cone.m[2][1] = 0;
171 	cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
172 	cone.invalid = false;
173 
174 	// Normalize
175 	return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
176 }
177 
178 /* from lcms: cmsAdaptionMatrix */
179 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
180 // Bradford is assumed
181 static struct matrix
adaption_matrix(struct CIE_XYZ source_illumination,struct CIE_XYZ target_illumination)182 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
183 {
184 #if defined (_MSC_VER)
185 #pragma warning(push)
186 /* Disable double to float truncation warning 4305 */
187 #pragma warning(disable:4305)
188 #endif
189 	struct matrix lam_rigg = {{ // Bradford matrix
190 	                         {  0.8951,  0.2664, -0.1614 },
191 	                         { -0.7502,  1.7135,  0.0367 },
192 	                         {  0.0389, -0.0685,  1.0296 }
193 	                         }};
194 #if defined (_MSC_VER)
195 /* Restore warnings */
196 #pragma warning(pop)
197 #endif
198 	return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
199 }
200 
201 /* from lcms: cmsAdaptMatrixToD50 */
adapt_matrix_to_D50(struct matrix r,qcms_CIE_xyY source_white_pt)202 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
203 {
204 	struct CIE_XYZ Dn;
205 	struct matrix Bradford;
206 
207 	if (source_white_pt.y == 0.0)
208 		return matrix_invalid();
209 
210 	Dn = xyY2XYZ(source_white_pt);
211 
212 	Bradford = adaption_matrix(Dn, D50_XYZ);
213 	return matrix_multiply(Bradford, r);
214 }
215 
set_rgb_colorants(qcms_profile * profile,qcms_CIE_xyY white_point,qcms_CIE_xyYTRIPLE primaries)216 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
217 {
218 	struct matrix colorants;
219 	colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
220 	colorants = adapt_matrix_to_D50(colorants, white_point);
221 
222 	if (colorants.invalid)
223 		return false;
224 
225 	/* note: there's a transpose type of operation going on here */
226 	profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
227 	profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
228 	profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
229 
230 	profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
231 	profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
232 	profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
233 
234 	profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
235 	profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
236 	profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
237 
238 	return true;
239 }
240 
241 #if 0
242 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
243 {
244 	const int r_out = output_format.r;
245 	const int b_out = output_format.b;
246 
247 	int i;
248 	float (*mat)[4] = transform->matrix;
249 	for (i=0; i<length; i++) {
250 		unsigned char device_r = *src++;
251 		unsigned char device_g = *src++;
252 		unsigned char device_b = *src++;
253 
254 		float linear_r = transform->input_gamma_table_r[device_r];
255 		float linear_g = transform->input_gamma_table_g[device_g];
256 		float linear_b = transform->input_gamma_table_b[device_b];
257 
258 		float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
259 		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
260 		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
261 
262 		float out_device_r = pow(out_linear_r, transform->out_gamma_r);
263 		float out_device_g = pow(out_linear_g, transform->out_gamma_g);
264 		float out_device_b = pow(out_linear_b, transform->out_gamma_b);
265 
266 		dest[r_out] = clamp_u8(out_device_r*255);
267 		dest[1]     = clamp_u8(out_device_g*255);
268 		dest[b_out] = clamp_u8(out_device_b*255);
269 		dest += 3;
270 	}
271 }
272 #endif
273 
qcms_transform_data_gray_out_lut(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)274 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
275 {
276 	const int r_out = output_format.r;
277 	const int b_out = output_format.b;
278 
279 	unsigned int i;
280 	for (i = 0; i < length; i++) {
281 		float out_device_r, out_device_g, out_device_b;
282 		unsigned char device = *src++;
283 
284 		float linear = transform->input_gamma_table_gray[device];
285 
286 		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
287 		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
288 		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
289 
290 		dest[r_out] = clamp_u8(out_device_r*255);
291 		dest[1]     = clamp_u8(out_device_g*255);
292 		dest[b_out] = clamp_u8(out_device_b*255);
293 		dest += 3;
294 	}
295 }
296 
297 /* Alpha is not corrected.
298    A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
299    RGB Is?" Tech Memo 17 (December 14, 1998).
300 	See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
301 */
302 
qcms_transform_data_graya_out_lut(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)303 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
304 {
305 	const int r_out = output_format.r;
306 	const int b_out = output_format.b;
307 
308 	unsigned int i;
309 	for (i = 0; i < length; i++) {
310 		float out_device_r, out_device_g, out_device_b;
311 		unsigned char device = *src++;
312 		unsigned char alpha = *src++;
313 
314 		float linear = transform->input_gamma_table_gray[device];
315 
316 		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
317 		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
318 		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
319 
320 		dest[r_out] = clamp_u8(out_device_r*255);
321 		dest[1]     = clamp_u8(out_device_g*255);
322 		dest[b_out] = clamp_u8(out_device_b*255);
323 		dest[3]     = alpha;
324 		dest += 4;
325 	}
326 }
327 
328 
qcms_transform_data_gray_out_precache(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)329 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
330 {
331 	const int r_out = output_format.r;
332 	const int b_out = output_format.b;
333 
334 	unsigned int i;
335 	for (i = 0; i < length; i++) {
336 		unsigned char device = *src++;
337 		uint16_t gray;
338 
339 		float linear = transform->input_gamma_table_gray[device];
340 
341 		/* we could round here... */
342 		gray = linear * PRECACHE_OUTPUT_MAX;
343 
344 		dest[r_out] = transform->output_table_r->data[gray];
345 		dest[1]     = transform->output_table_g->data[gray];
346 		dest[b_out] = transform->output_table_b->data[gray];
347 		dest += 3;
348 	}
349 }
350 
351 
qcms_transform_data_graya_out_precache(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)352 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
353 {
354 	const int r_out = output_format.r;
355 	const int b_out = output_format.b;
356 
357 	unsigned int i;
358 	for (i = 0; i < length; i++) {
359 		unsigned char device = *src++;
360 		unsigned char alpha = *src++;
361 		uint16_t gray;
362 
363 		float linear = transform->input_gamma_table_gray[device];
364 
365 		/* we could round here... */
366 		gray = linear * PRECACHE_OUTPUT_MAX;
367 
368 		dest[r_out] = transform->output_table_r->data[gray];
369 		dest[1]     = transform->output_table_g->data[gray];
370 		dest[b_out] = transform->output_table_b->data[gray];
371 		dest[3]     = alpha;
372 		dest += 4;
373 	}
374 }
375 
qcms_transform_data_rgb_out_lut_precache(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)376 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
377 {
378 	const int r_out = output_format.r;
379 	const int b_out = output_format.b;
380 
381 	unsigned int i;
382 	float (*mat)[4] = transform->matrix;
383 	for (i = 0; i < length; i++) {
384 		unsigned char device_r = *src++;
385 		unsigned char device_g = *src++;
386 		unsigned char device_b = *src++;
387 		uint16_t r, g, b;
388 
389 		float linear_r = transform->input_gamma_table_r[device_r];
390 		float linear_g = transform->input_gamma_table_g[device_g];
391 		float linear_b = transform->input_gamma_table_b[device_b];
392 
393 		float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
394 		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
395 		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
396 
397 		out_linear_r = clamp_float(out_linear_r);
398 		out_linear_g = clamp_float(out_linear_g);
399 		out_linear_b = clamp_float(out_linear_b);
400 
401 		/* we could round here... */
402 		r = out_linear_r * PRECACHE_OUTPUT_MAX;
403 		g = out_linear_g * PRECACHE_OUTPUT_MAX;
404 		b = out_linear_b * PRECACHE_OUTPUT_MAX;
405 
406 		dest[r_out] = transform->output_table_r->data[r];
407 		dest[1]     = transform->output_table_g->data[g];
408 		dest[b_out] = transform->output_table_b->data[b];
409 		dest += 3;
410 	}
411 }
412 
qcms_transform_data_rgba_out_lut_precache(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)413 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
414 {
415 	const int r_out = output_format.r;
416 	const int b_out = output_format.b;
417 
418 	unsigned int i;
419 	float (*mat)[4] = transform->matrix;
420 	for (i = 0; i < length; i++) {
421 		unsigned char device_r = *src++;
422 		unsigned char device_g = *src++;
423 		unsigned char device_b = *src++;
424 		unsigned char alpha = *src++;
425 		uint16_t r, g, b;
426 
427 		float linear_r = transform->input_gamma_table_r[device_r];
428 		float linear_g = transform->input_gamma_table_g[device_g];
429 		float linear_b = transform->input_gamma_table_b[device_b];
430 
431 		float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
432 		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
433 		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
434 
435 		out_linear_r = clamp_float(out_linear_r);
436 		out_linear_g = clamp_float(out_linear_g);
437 		out_linear_b = clamp_float(out_linear_b);
438 
439 		/* we could round here... */
440 		r = out_linear_r * PRECACHE_OUTPUT_MAX;
441 		g = out_linear_g * PRECACHE_OUTPUT_MAX;
442 		b = out_linear_b * PRECACHE_OUTPUT_MAX;
443 
444 		dest[r_out] = transform->output_table_r->data[r];
445 		dest[1]     = transform->output_table_g->data[g];
446 		dest[b_out] = transform->output_table_b->data[b];
447 		dest[3]     = alpha;
448 		dest += 4;
449 	}
450 }
451 
452 // Not used
453 /*
454 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
455 {
456 	const int r_out = output_format.r;
457 	const int b_out = output_format.b;
458 
459 	unsigned int i;
460 	int xy_len = 1;
461 	int x_len = transform->grid_size;
462 	int len = x_len * x_len;
463 	float* r_table = transform->r_clut;
464 	float* g_table = transform->g_clut;
465 	float* b_table = transform->b_clut;
466 
467 	for (i = 0; i < length; i++) {
468 		unsigned char in_r = *src++;
469 		unsigned char in_g = *src++;
470 		unsigned char in_b = *src++;
471 		float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
472 
473 		int x = floor(linear_r * (transform->grid_size-1));
474 		int y = floor(linear_g * (transform->grid_size-1));
475 		int z = floor(linear_b * (transform->grid_size-1));
476 		int x_n = ceil(linear_r * (transform->grid_size-1));
477 		int y_n = ceil(linear_g * (transform->grid_size-1));
478 		int z_n = ceil(linear_b * (transform->grid_size-1));
479 		float x_d = linear_r * (transform->grid_size-1) - x;
480 		float y_d = linear_g * (transform->grid_size-1) - y;
481 		float z_d = linear_b * (transform->grid_size-1) - z;
482 
483 		float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
484 		float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
485 		float r_y1 = lerp(r_x1, r_x2, y_d);
486 		float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
487 		float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
488 		float r_y2 = lerp(r_x3, r_x4, y_d);
489 		float clut_r = lerp(r_y1, r_y2, z_d);
490 
491 		float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
492 		float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
493 		float g_y1 = lerp(g_x1, g_x2, y_d);
494 		float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
495 		float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
496 		float g_y2 = lerp(g_x3, g_x4, y_d);
497 		float clut_g = lerp(g_y1, g_y2, z_d);
498 
499 		float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
500 		float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
501 		float b_y1 = lerp(b_x1, b_x2, y_d);
502 		float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
503 		float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
504 		float b_y2 = lerp(b_x3, b_x4, y_d);
505 		float clut_b = lerp(b_y1, b_y2, z_d);
506 
507 		dest[r_out] = clamp_u8(clut_r*255.0f);
508 		dest[1]     = clamp_u8(clut_g*255.0f);
509 		dest[b_out] = clamp_u8(clut_b*255.0f);
510 		dest += 3;
511 	}
512 }
513 */
514 
515 // Using lcms' tetra interpolation algorithm.
qcms_transform_data_tetra_clut_rgba(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)516 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
517 {
518 	const int r_out = output_format.r;
519 	const int b_out = output_format.b;
520 
521 	unsigned int i;
522 	int xy_len = 1;
523 	int x_len = transform->grid_size;
524 	int len = x_len * x_len;
525 	float* r_table = transform->r_clut;
526 	float* g_table = transform->g_clut;
527 	float* b_table = transform->b_clut;
528 	float c0_r, c1_r, c2_r, c3_r;
529 	float c0_g, c1_g, c2_g, c3_g;
530 	float c0_b, c1_b, c2_b, c3_b;
531 	float clut_r, clut_g, clut_b;
532 	for (i = 0; i < length; i++) {
533 		unsigned char in_r = *src++;
534 		unsigned char in_g = *src++;
535 		unsigned char in_b = *src++;
536 		unsigned char in_a = *src++;
537 		float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
538 
539 		int x = floor(linear_r * (transform->grid_size-1));
540 		int y = floor(linear_g * (transform->grid_size-1));
541 		int z = floor(linear_b * (transform->grid_size-1));
542 		int x_n = ceil(linear_r * (transform->grid_size-1));
543 		int y_n = ceil(linear_g * (transform->grid_size-1));
544 		int z_n = ceil(linear_b * (transform->grid_size-1));
545 		float rx = linear_r * (transform->grid_size-1) - x;
546 		float ry = linear_g * (transform->grid_size-1) - y;
547 		float rz = linear_b * (transform->grid_size-1) - z;
548 
549 		c0_r = CLU(r_table, x, y, z);
550 		c0_g = CLU(g_table, x, y, z);
551 		c0_b = CLU(b_table, x, y, z);
552 
553 		if( rx >= ry ) {
554 			if (ry >= rz) { //rx >= ry && ry >= rz
555 				c1_r = CLU(r_table, x_n, y, z) - c0_r;
556 				c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
557 				c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
558 				c1_g = CLU(g_table, x_n, y, z) - c0_g;
559 				c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
560 				c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
561 				c1_b = CLU(b_table, x_n, y, z) - c0_b;
562 				c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
563 				c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
564 			} else {
565 				if (rx >= rz) { //rx >= rz && rz >= ry
566 					c1_r = CLU(r_table, x_n, y, z) - c0_r;
567 					c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
568 					c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
569 					c1_g = CLU(g_table, x_n, y, z) - c0_g;
570 					c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
571 					c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
572 					c1_b = CLU(b_table, x_n, y, z) - c0_b;
573 					c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
574 					c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
575 				} else { //rz > rx && rx >= ry
576 					c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
577 					c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
578 					c3_r = CLU(r_table, x, y, z_n) - c0_r;
579 					c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
580 					c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
581 					c3_g = CLU(g_table, x, y, z_n) - c0_g;
582 					c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
583 					c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
584 					c3_b = CLU(b_table, x, y, z_n) - c0_b;
585 				}
586 			}
587 		} else {
588 			if (rx >= rz) { //ry > rx && rx >= rz
589 				c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
590 				c2_r = CLU(r_table, x, y_n, z) - c0_r;
591 				c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
592 				c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
593 				c2_g = CLU(g_table, x, y_n, z) - c0_g;
594 				c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
595 				c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
596 				c2_b = CLU(b_table, x, y_n, z) - c0_b;
597 				c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
598 			} else {
599 				if (ry >= rz) { //ry >= rz && rz > rx
600 					c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
601 					c2_r = CLU(r_table, x, y_n, z) - c0_r;
602 					c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
603 					c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
604 					c2_g = CLU(g_table, x, y_n, z) - c0_g;
605 					c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
606 					c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
607 					c2_b = CLU(b_table, x, y_n, z) - c0_b;
608 					c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
609 				} else { //rz > ry && ry > rx
610 					c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
611 					c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
612 					c3_r = CLU(r_table, x, y, z_n) - c0_r;
613 					c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
614 					c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
615 					c3_g = CLU(g_table, x, y, z_n) - c0_g;
616 					c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
617 					c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
618 					c3_b = CLU(b_table, x, y, z_n) - c0_b;
619 				}
620 			}
621 		}
622 
623 		clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
624 		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
625 		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
626 
627 		dest[r_out] = clamp_u8(clut_r*255.0f);
628 		dest[1]     = clamp_u8(clut_g*255.0f);
629 		dest[b_out] = clamp_u8(clut_b*255.0f);
630 		dest[3]     = in_a;
631 		dest += 4;
632 	}
633 }
634 
635 // Using lcms' tetra interpolation code.
qcms_transform_data_tetra_clut(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)636 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
637 {
638 	const int r_out = output_format.r;
639 	const int b_out = output_format.b;
640 
641 	unsigned int i;
642 	int xy_len = 1;
643 	int x_len = transform->grid_size;
644 	int len = x_len * x_len;
645 	float* r_table = transform->r_clut;
646 	float* g_table = transform->g_clut;
647 	float* b_table = transform->b_clut;
648 	float c0_r, c1_r, c2_r, c3_r;
649 	float c0_g, c1_g, c2_g, c3_g;
650 	float c0_b, c1_b, c2_b, c3_b;
651 	float clut_r, clut_g, clut_b;
652 	for (i = 0; i < length; i++) {
653 		unsigned char in_r = *src++;
654 		unsigned char in_g = *src++;
655 		unsigned char in_b = *src++;
656 		float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
657 
658 		int x = floor(linear_r * (transform->grid_size-1));
659 		int y = floor(linear_g * (transform->grid_size-1));
660 		int z = floor(linear_b * (transform->grid_size-1));
661 		int x_n = ceil(linear_r * (transform->grid_size-1));
662 		int y_n = ceil(linear_g * (transform->grid_size-1));
663 		int z_n = ceil(linear_b * (transform->grid_size-1));
664 		float rx = linear_r * (transform->grid_size-1) - x;
665 		float ry = linear_g * (transform->grid_size-1) - y;
666 		float rz = linear_b * (transform->grid_size-1) - z;
667 
668 		c0_r = CLU(r_table, x, y, z);
669 		c0_g = CLU(g_table, x, y, z);
670 		c0_b = CLU(b_table, x, y, z);
671 
672 		if( rx >= ry ) {
673 			if (ry >= rz) { //rx >= ry && ry >= rz
674 				c1_r = CLU(r_table, x_n, y, z) - c0_r;
675 				c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
676 				c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
677 				c1_g = CLU(g_table, x_n, y, z) - c0_g;
678 				c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
679 				c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
680 				c1_b = CLU(b_table, x_n, y, z) - c0_b;
681 				c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
682 				c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
683 			} else {
684 				if (rx >= rz) { //rx >= rz && rz >= ry
685 					c1_r = CLU(r_table, x_n, y, z) - c0_r;
686 					c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
687 					c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
688 					c1_g = CLU(g_table, x_n, y, z) - c0_g;
689 					c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
690 					c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
691 					c1_b = CLU(b_table, x_n, y, z) - c0_b;
692 					c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
693 					c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
694 				} else { //rz > rx && rx >= ry
695 					c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
696 					c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
697 					c3_r = CLU(r_table, x, y, z_n) - c0_r;
698 					c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
699 					c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
700 					c3_g = CLU(g_table, x, y, z_n) - c0_g;
701 					c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
702 					c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
703 					c3_b = CLU(b_table, x, y, z_n) - c0_b;
704 				}
705 			}
706 		} else {
707 			if (rx >= rz) { //ry > rx && rx >= rz
708 				c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
709 				c2_r = CLU(r_table, x, y_n, z) - c0_r;
710 				c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
711 				c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
712 				c2_g = CLU(g_table, x, y_n, z) - c0_g;
713 				c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
714 				c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
715 				c2_b = CLU(b_table, x, y_n, z) - c0_b;
716 				c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
717 			} else {
718 				if (ry >= rz) { //ry >= rz && rz > rx
719 					c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
720 					c2_r = CLU(r_table, x, y_n, z) - c0_r;
721 					c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
722 					c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
723 					c2_g = CLU(g_table, x, y_n, z) - c0_g;
724 					c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
725 					c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
726 					c2_b = CLU(b_table, x, y_n, z) - c0_b;
727 					c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
728 				} else { //rz > ry && ry > rx
729 					c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
730 					c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
731 					c3_r = CLU(r_table, x, y, z_n) - c0_r;
732 					c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
733 					c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
734 					c3_g = CLU(g_table, x, y, z_n) - c0_g;
735 					c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
736 					c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
737 					c3_b = CLU(b_table, x, y, z_n) - c0_b;
738 				}
739 			}
740 		}
741 
742 		clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
743 		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
744 		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
745 
746 		dest[r_out] = clamp_u8(clut_r*255.0f);
747 		dest[1]     = clamp_u8(clut_g*255.0f);
748 		dest[b_out] = clamp_u8(clut_b*255.0f);
749 		dest += 3;
750 	}
751 }
752 
qcms_transform_data_rgb_out_lut(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)753 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
754 {
755 	const int r_out = output_format.r;
756 	const int b_out = output_format.b;
757 
758 	unsigned int i;
759 	float (*mat)[4] = transform->matrix;
760 	for (i = 0; i < length; i++) {
761 		unsigned char device_r = *src++;
762 		unsigned char device_g = *src++;
763 		unsigned char device_b = *src++;
764 		float out_device_r, out_device_g, out_device_b;
765 
766 		float linear_r = transform->input_gamma_table_r[device_r];
767 		float linear_g = transform->input_gamma_table_g[device_g];
768 		float linear_b = transform->input_gamma_table_b[device_b];
769 
770 		float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
771 		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
772 		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
773 
774 		out_linear_r = clamp_float(out_linear_r);
775 		out_linear_g = clamp_float(out_linear_g);
776 		out_linear_b = clamp_float(out_linear_b);
777 
778 		out_device_r = lut_interp_linear(out_linear_r,
779 				transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
780 		out_device_g = lut_interp_linear(out_linear_g,
781 				transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
782 		out_device_b = lut_interp_linear(out_linear_b,
783 				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
784 
785 		dest[r_out] = clamp_u8(out_device_r*255);
786 		dest[1]     = clamp_u8(out_device_g*255);
787 		dest[b_out] = clamp_u8(out_device_b*255);
788 		dest += 3;
789 	}
790 }
791 
qcms_transform_data_rgba_out_lut(qcms_transform * transform,unsigned char * src,unsigned char * dest,size_t length,qcms_format_type output_format)792 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
793 {
794 	const int r_out = output_format.r;
795 	const int b_out = output_format.b;
796 
797 	unsigned int i;
798 	float (*mat)[4] = transform->matrix;
799 	for (i = 0; i < length; i++) {
800 		unsigned char device_r = *src++;
801 		unsigned char device_g = *src++;
802 		unsigned char device_b = *src++;
803 		unsigned char alpha = *src++;
804 		float out_device_r, out_device_g, out_device_b;
805 
806 		float linear_r = transform->input_gamma_table_r[device_r];
807 		float linear_g = transform->input_gamma_table_g[device_g];
808 		float linear_b = transform->input_gamma_table_b[device_b];
809 
810 		float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
811 		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
812 		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
813 
814 		out_linear_r = clamp_float(out_linear_r);
815 		out_linear_g = clamp_float(out_linear_g);
816 		out_linear_b = clamp_float(out_linear_b);
817 
818 		out_device_r = lut_interp_linear(out_linear_r,
819 				transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
820 		out_device_g = lut_interp_linear(out_linear_g,
821 				transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
822 		out_device_b = lut_interp_linear(out_linear_b,
823 				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
824 
825 		dest[r_out] = clamp_u8(out_device_r*255);
826 		dest[1]     = clamp_u8(out_device_g*255);
827 		dest[b_out] = clamp_u8(out_device_b*255);
828 		dest[3]     = alpha;
829 		dest += 4;
830 	}
831 }
832 
833 #if 0
834 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
835 {
836 	const int r_out = output_format.r;
837 	const int b_out = output_format.b;
838 
839 	int i;
840 	float (*mat)[4] = transform->matrix;
841 	for (i = 0; i < length; i++) {
842 		unsigned char device_r = *src++;
843 		unsigned char device_g = *src++;
844 		unsigned char device_b = *src++;
845 
846 		float linear_r = transform->input_gamma_table_r[device_r];
847 		float linear_g = transform->input_gamma_table_g[device_g];
848 		float linear_b = transform->input_gamma_table_b[device_b];
849 
850 		float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
851 		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
852 		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
853 
854 		dest[r_out] = clamp_u8(out_linear_r*255);
855 		dest[1]     = clamp_u8(out_linear_g*255);
856 		dest[b_out] = clamp_u8(out_linear_b*255);
857 		dest += 3;
858 	}
859 }
860 #endif
861 
862 /*
863  * If users create and destroy objects on different threads, even if the same
864  * objects aren't used on different threads at the same time, we can still run
865  * in to trouble with refcounts if they aren't atomic.
866  *
867  * This can lead to us prematurely deleting the precache if threads get unlucky
868  * and write the wrong value to the ref count.
869  */
precache_reference(struct precache_output * p)870 static struct precache_output *precache_reference(struct precache_output *p)
871 {
872 	qcms_atomic_increment(p->ref_count);
873 	return p;
874 }
875 
precache_create()876 static struct precache_output *precache_create()
877 {
878 	struct precache_output *p = malloc(sizeof(struct precache_output));
879 	if (p)
880 		p->ref_count = 1;
881 	return p;
882 }
883 
precache_release(struct precache_output * p)884 void precache_release(struct precache_output *p)
885 {
886 	if (qcms_atomic_decrement(p->ref_count) == 0) {
887 		free(p);
888 	}
889 }
890 
891 #ifdef HAVE_POSIX_MEMALIGN
transform_alloc(void)892 static qcms_transform *transform_alloc(void)
893 {
894 	qcms_transform *t;
895 	if (!posix_memalign(&t, 16, sizeof(*t))) {
896 		return t;
897 	} else {
898 		return NULL;
899 	}
900 }
transform_free(qcms_transform * t)901 static void transform_free(qcms_transform *t)
902 {
903 	free(t);
904 }
905 #else
transform_alloc(void)906 static qcms_transform *transform_alloc(void)
907 {
908 	/* transform needs to be aligned on a 16byte boundrary */
909 	char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
910 	/* make room for a pointer to the block returned by calloc */
911 	void *transform_start = original_block + sizeof(void*);
912 	/* align transform_start */
913 	qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
914 
915 	/* store a pointer to the block returned by calloc so that we can free it later */
916 	void **(original_block_ptr) = (void**)transform_aligned;
917 	if (!original_block)
918 		return NULL;
919 	original_block_ptr--;
920 	*original_block_ptr = original_block;
921 
922 	return transform_aligned;
923 }
transform_free(qcms_transform * t)924 static void transform_free(qcms_transform *t)
925 {
926 	/* get at the pointer to the unaligned block returned by calloc */
927 	void **p = (void**)t;
928 	p--;
929 	free(*p);
930 }
931 #endif
932 
qcms_transform_release(qcms_transform * t)933 void qcms_transform_release(qcms_transform *t)
934 {
935 	/* ensure we only free the gamma tables once even if there are
936 	 * multiple references to the same data */
937 
938 	if (t->output_table_r)
939 		precache_release(t->output_table_r);
940 	if (t->output_table_g)
941 		precache_release(t->output_table_g);
942 	if (t->output_table_b)
943 		precache_release(t->output_table_b);
944 
945 	free(t->input_gamma_table_r);
946 	if (t->input_gamma_table_g != t->input_gamma_table_r)
947 		free(t->input_gamma_table_g);
948 	if (t->input_gamma_table_g != t->input_gamma_table_r &&
949 	    t->input_gamma_table_g != t->input_gamma_table_b)
950 		free(t->input_gamma_table_b);
951 
952 	free(t->input_gamma_table_gray);
953 
954 	free(t->output_gamma_lut_r);
955 	free(t->output_gamma_lut_g);
956 	free(t->output_gamma_lut_b);
957 
958 	transform_free(t);
959 }
960 
961 #ifdef X86
962 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
963 // mozilla/jpeg)
964  // -------------------------------------------------------------------------
965 #if defined(_M_IX86) && defined(_MSC_VER)
966 #define HAS_CPUID
967 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
968    register - I'm not sure if that ever happens on windows, but cpuid isn't
969    on the critical path so we just preserve the register to be safe and to be
970    consistent with the non-windows version. */
cpuid(uint32_t fxn,uint32_t * a,uint32_t * b,uint32_t * c,uint32_t * d)971 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
972        uint32_t a_, b_, c_, d_;
973        __asm {
974               xchg   ebx, esi
975               mov    eax, fxn
976               cpuid
977               mov    a_, eax
978               mov    b_, ebx
979               mov    c_, ecx
980               mov    d_, edx
981               xchg   ebx, esi
982        }
983        *a = a_;
984        *b = b_;
985        *c = c_;
986        *d = d_;
987 }
988 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
989 #define HAS_CPUID
990 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
991    some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
cpuid(uint32_t fxn,uint32_t * a,uint32_t * b,uint32_t * c,uint32_t * d)992 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
993 
994 	uint32_t a_, b_, c_, d_;
995        __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
996                              : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
997 	   *a = a_;
998 	   *b = b_;
999 	   *c = c_;
1000 	   *d = d_;
1001 }
1002 #endif
1003 
1004 // -------------------------Runtime SSEx Detection-----------------------------
1005 
1006 /* MMX is always supported per
1007  *  Gecko v1.9.1 minimum CPU requirements */
1008 #define SSE1_EDX_MASK (1UL << 25)
1009 #define SSE2_EDX_MASK (1UL << 26)
1010 #define SSE3_ECX_MASK (1UL <<  0)
1011 
sse_version_available(void)1012 static int sse_version_available(void)
1013 {
1014 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1015 	/* we know at build time that 64-bit CPUs always have SSE2
1016 	 * this tells the compiler that non-SSE2 branches will never be
1017 	 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1018 	return 2;
1019 #elif defined(HAS_CPUID)
1020 	static int sse_version = -1;
1021 	uint32_t a, b, c, d;
1022 	uint32_t function = 0x00000001;
1023 
1024 	if (sse_version == -1) {
1025 		sse_version = 0;
1026 		cpuid(function, &a, &b, &c, &d);
1027 		if (c & SSE3_ECX_MASK)
1028 			sse_version = 3;
1029 		else if (d & SSE2_EDX_MASK)
1030 			sse_version = 2;
1031 		else if (d & SSE1_EDX_MASK)
1032 			sse_version = 1;
1033 	}
1034 
1035 	return sse_version;
1036 #else
1037 	return 0;
1038 #endif
1039 }
1040 #endif
1041 
1042 static const struct matrix bradford_matrix = {{	{ 0.8951f, 0.2664f,-0.1614f},
1043 						{-0.7502f, 1.7135f, 0.0367f},
1044 						{ 0.0389f,-0.0685f, 1.0296f}},
1045 						false};
1046 
1047 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f},
1048 						    { 0.4323053f, 0.5183603f, 0.0492912f},
1049 						    {-0.0085287f, 0.0400428f, 0.9684867f}},
1050 						    false};
1051 
1052 // See ICCv4 E.3
compute_whitepoint_adaption(float X,float Y,float Z)1053 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1054 	float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) /
1055 		  (X*bradford_matrix.m[0][0]      + Y*bradford_matrix.m[1][0]      + Z*bradford_matrix.m[2][0]     );
1056 	float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) /
1057 		  (X*bradford_matrix.m[0][1]      + Y*bradford_matrix.m[1][1]      + Z*bradford_matrix.m[2][1]     );
1058 	float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) /
1059 		  (X*bradford_matrix.m[0][2]      + Y*bradford_matrix.m[1][2]      + Z*bradford_matrix.m[2][2]     );
1060 	struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1061 	return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) );
1062 }
1063 
qcms_profile_precache_output_transform(qcms_profile * profile)1064 void qcms_profile_precache_output_transform(qcms_profile *profile)
1065 {
1066 	/* we only support precaching on rgb profiles */
1067 	if (profile->color_space != RGB_SIGNATURE)
1068 		return;
1069 
1070 	if (qcms_supports_iccv4) {
1071 		/* don't precache since we will use the B2A LUT */
1072 		if (profile->B2A0)
1073 			return;
1074 
1075 		/* don't precache since we will use the mBA LUT */
1076 		if (profile->mBA)
1077 			return;
1078 	}
1079 
1080 	/* don't precache if we do not have the TRC curves */
1081 	if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1082 		return;
1083 
1084 	if (!profile->output_table_r) {
1085 		profile->output_table_r = precache_create();
1086 		if (profile->output_table_r &&
1087 				!compute_precache(profile->redTRC, profile->output_table_r->data)) {
1088 			precache_release(profile->output_table_r);
1089 			profile->output_table_r = NULL;
1090 		}
1091 	}
1092 	if (!profile->output_table_g) {
1093 		profile->output_table_g = precache_create();
1094 		if (profile->output_table_g &&
1095 				!compute_precache(profile->greenTRC, profile->output_table_g->data)) {
1096 			precache_release(profile->output_table_g);
1097 			profile->output_table_g = NULL;
1098 		}
1099 	}
1100 	if (!profile->output_table_b) {
1101 		profile->output_table_b = precache_create();
1102 		if (profile->output_table_b &&
1103 				!compute_precache(profile->blueTRC, profile->output_table_b->data)) {
1104 			precache_release(profile->output_table_b);
1105 			profile->output_table_b = NULL;
1106 		}
1107 	}
1108 }
1109 
1110 /* Replace the current transformation with a LUT transformation using a given number of sample points */
qcms_transform_precacheLUT_float(qcms_transform * transform,qcms_profile * in,qcms_profile * out,int samples,qcms_data_type in_type)1111 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out,
1112                                                  int samples, qcms_data_type in_type)
1113 {
1114 	/* The range between which 2 consecutive sample points can be used to interpolate */
1115 	uint16_t x,y,z;
1116 	uint32_t l;
1117 	uint32_t lutSize = 3 * samples * samples * samples;
1118 	float* src = NULL;
1119 	float* dest = NULL;
1120 	float* lut = NULL;
1121 
1122 	src = malloc(lutSize*sizeof(float));
1123 	dest = malloc(lutSize*sizeof(float));
1124 
1125 	if (src && dest) {
1126 		/* Prepare a list of points we want to sample */
1127 		l = 0;
1128 		for (x = 0; x < samples; x++) {
1129 			for (y = 0; y < samples; y++) {
1130 				for (z = 0; z < samples; z++) {
1131 					src[l++] = x / (float)(samples-1);
1132 					src[l++] = y / (float)(samples-1);
1133 					src[l++] = z / (float)(samples-1);
1134 				}
1135 			}
1136 		}
1137 
1138 		lut = qcms_chain_transform(in, out, src, dest, lutSize);
1139 		if (lut) {
1140 			transform->r_clut = &lut[0];
1141 			transform->g_clut = &lut[1];
1142 			transform->b_clut = &lut[2];
1143 			transform->grid_size = samples;
1144 			if (in_type == QCMS_DATA_RGBA_8) {
1145 				transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1146 			} else {
1147 				transform->transform_fn = qcms_transform_data_tetra_clut;
1148 			}
1149 		}
1150 	}
1151 
1152 
1153 	//XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
1154 	if (src && lut != src) {
1155 		free(src);
1156 	} else if (dest && lut != src) {
1157 		free(dest);
1158 	}
1159 
1160 	if (lut == NULL) {
1161 		return NULL;
1162 	}
1163 	return transform;
1164 }
1165 
1166 #define NO_MEM_TRANSFORM NULL
1167 
qcms_transform_create(qcms_profile * in,qcms_data_type in_type,qcms_profile * out,qcms_data_type out_type,qcms_intent intent)1168 qcms_transform* qcms_transform_create(
1169 		qcms_profile *in, qcms_data_type in_type,
1170 		qcms_profile *out, qcms_data_type out_type,
1171 		qcms_intent intent)
1172 {
1173 	bool precache = false;
1174 
1175         qcms_transform *transform = transform_alloc();
1176         if (!transform) {
1177 		return NULL;
1178 	}
1179 	if (out_type != QCMS_DATA_RGB_8 &&
1180                 out_type != QCMS_DATA_RGBA_8) {
1181             assert(0 && "output type");
1182 	    transform_free(transform);
1183             return NULL;
1184         }
1185 
1186 	if (out->output_table_r &&
1187 			out->output_table_g &&
1188 			out->output_table_b) {
1189 		precache = true;
1190 	}
1191 
1192 	if (qcms_supports_iccv4 && (in->A2B0 || out->B2A0 || in->mAB || out->mAB)) {
1193 		// Precache the transformation to a CLUT 33x33x33 in size.
1194 		// 33 is used by many profiles and works well in pratice.
1195 		// This evenly divides 256 into blocks of 8x8x8.
1196 		// TODO For transforming small data sets of about 200x200 or less
1197 		// precaching should be avoided.
1198 		qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type);
1199 		if (!result) {
1200             		assert(0 && "precacheLUT failed");
1201 			transform_free(transform);
1202 			return NULL;
1203 		}
1204 		return result;
1205 	}
1206 
1207 	if (precache) {
1208 		transform->output_table_r = precache_reference(out->output_table_r);
1209 		transform->output_table_g = precache_reference(out->output_table_g);
1210 		transform->output_table_b = precache_reference(out->output_table_b);
1211 	} else {
1212 		if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1213 			qcms_transform_release(transform);
1214 			return NO_MEM_TRANSFORM;
1215 		}
1216 		build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
1217 		build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1218 		build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
1219 		if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
1220 			qcms_transform_release(transform);
1221 			return NO_MEM_TRANSFORM;
1222 		}
1223 	}
1224 
1225         if (in->color_space == RGB_SIGNATURE) {
1226 		struct matrix in_matrix, out_matrix, result;
1227 
1228 		if (in_type != QCMS_DATA_RGB_8 &&
1229                     in_type != QCMS_DATA_RGBA_8){
1230                 	assert(0 && "input type");
1231 			transform_free(transform);
1232                 	return NULL;
1233             	}
1234 		if (precache) {
1235 #if defined(SSE2_ENABLE) && defined(X86)
1236 		    if (sse_version_available() >= 2) {
1237 			    if (in_type == QCMS_DATA_RGB_8)
1238 				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1239 			    else
1240 				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1241 
1242 #if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
1243                     /* Microsoft Compiler for x64 doesn't support MMX.
1244                      * SSE code uses MMX so that we disable on x64 */
1245 		    } else
1246 		    if (sse_version_available() >= 1) {
1247 			    if (in_type == QCMS_DATA_RGB_8)
1248 				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
1249 			    else
1250 				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
1251 #endif
1252 		    } else
1253 #endif
1254 			{
1255 				if (in_type == QCMS_DATA_RGB_8)
1256 					transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
1257 				else
1258 					transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
1259 			}
1260 		} else {
1261 			if (in_type == QCMS_DATA_RGB_8)
1262 				transform->transform_fn = qcms_transform_data_rgb_out_lut;
1263 			else
1264 				transform->transform_fn = qcms_transform_data_rgba_out_lut;
1265 		}
1266 
1267 		//XXX: avoid duplicating tables if we can
1268 		transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
1269 		transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
1270 		transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
1271 		if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
1272 			qcms_transform_release(transform);
1273 			return NO_MEM_TRANSFORM;
1274 		}
1275 
1276 
1277 		/* build combined colorant matrix */
1278 		in_matrix = build_colorant_matrix(in);
1279 		out_matrix = build_colorant_matrix(out);
1280 		out_matrix = matrix_invert(out_matrix);
1281 		if (out_matrix.invalid) {
1282 			qcms_transform_release(transform);
1283 			return NULL;
1284 		}
1285 		result = matrix_multiply(out_matrix, in_matrix);
1286 
1287 		/* store the results in column major mode
1288 		 * this makes doing the multiplication with sse easier */
1289 		transform->matrix[0][0] = result.m[0][0];
1290 		transform->matrix[1][0] = result.m[0][1];
1291 		transform->matrix[2][0] = result.m[0][2];
1292 		transform->matrix[0][1] = result.m[1][0];
1293 		transform->matrix[1][1] = result.m[1][1];
1294 		transform->matrix[2][1] = result.m[1][2];
1295 		transform->matrix[0][2] = result.m[2][0];
1296 		transform->matrix[1][2] = result.m[2][1];
1297 		transform->matrix[2][2] = result.m[2][2];
1298 
1299 	} else if (in->color_space == GRAY_SIGNATURE) {
1300 		if (in_type != QCMS_DATA_GRAY_8 &&
1301 				in_type != QCMS_DATA_GRAYA_8){
1302 			assert(0 && "input type");
1303 			transform_free(transform);
1304 			return NULL;
1305 		}
1306 
1307 		transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
1308 		if (!transform->input_gamma_table_gray) {
1309 			qcms_transform_release(transform);
1310 			return NO_MEM_TRANSFORM;
1311 		}
1312 
1313 		if (precache) {
1314 			if (in_type == QCMS_DATA_GRAY_8) {
1315 				transform->transform_fn = qcms_transform_data_gray_out_precache;
1316 			} else {
1317 				transform->transform_fn = qcms_transform_data_graya_out_precache;
1318 			}
1319 		} else {
1320 			if (in_type == QCMS_DATA_GRAY_8) {
1321 				transform->transform_fn = qcms_transform_data_gray_out_lut;
1322 			} else {
1323 				transform->transform_fn = qcms_transform_data_graya_out_lut;
1324 			}
1325 		}
1326 	} else {
1327 		assert(0 && "unexpected colorspace");
1328 		transform_free(transform);
1329 		return NULL;
1330 	}
1331 	return transform;
1332 }
1333 
1334 /* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
1335  * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
1336  * of the attribute but is currently only supported by clang */
1337 #if defined(__has_attribute)
1338 #define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
1339 #elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__) && !defined(__mips__)
1340 #define HAS_FORCE_ALIGN_ARG_POINTER 1
1341 #else
1342 #define HAS_FORCE_ALIGN_ARG_POINTER 0
1343 #endif
1344 
1345 #if HAS_FORCE_ALIGN_ARG_POINTER
1346 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1347 __attribute__((__force_align_arg_pointer__))
1348 #endif
qcms_transform_data(qcms_transform * transform,void * src,void * dest,size_t length)1349 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1350 {
1351 	static const struct _qcms_format_type output_rgbx = { 0, 2 };
1352 
1353 	transform->transform_fn(transform, src, dest, length, output_rgbx);
1354 }
1355 
qcms_transform_data_type(qcms_transform * transform,void * src,void * dest,size_t length,qcms_output_type type)1356 void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
1357 {
1358 	static const struct _qcms_format_type output_rgbx = { 0, 2 };
1359 	static const struct _qcms_format_type output_bgrx = { 2, 0 };
1360 
1361 	transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
1362 }
1363 
1364 qcms_bool qcms_supports_iccv4;
qcms_enable_iccv4()1365 void qcms_enable_iccv4()
1366 {
1367 	qcms_supports_iccv4 = true;
1368 }
1369