4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
8 * This file contains the forward-DCT management logic.
9 * This code selects a particular DCT implementation to be used,
10 * and it performs related housekeeping chores including coefficient
14 #define JPEG_INTERNALS
17 #include "jdct.h" /* Private declarations for DCT subsystem */
20 /* Private subobject for this module */
23 struct jpeg_forward_dct pub; /* public fields */
25 /* Pointer to the DCT routine actually in use */
26 forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
28 /* The actual post-DCT divisors --- not identical to the quant table
29 * entries, because of scaling (especially for an unnormalized DCT).
30 * Each table is given in normal array order.
32 DCTELEM * divisors[NUM_QUANT_TBLS];
34 #ifdef DCT_FLOAT_SUPPORTED
35 /* Same as above for the floating-point case. */
36 float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
37 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
41 typedef my_fdct_controller * my_fdct_ptr;
44 /* The current scaled-DCT routines require ISLOW-style divisor tables,
45 * so be sure to compile that code if either ISLOW or SCALING is requested.
47 #ifdef DCT_ISLOW_SUPPORTED
48 #define PROVIDE_ISLOW_TABLES
50 #ifdef DCT_SCALING_SUPPORTED
51 #define PROVIDE_ISLOW_TABLES
57 * Perform forward DCT on one or more blocks of a component.
59 * The input samples are taken from the sample_data[] array starting at
60 * position start_row/start_col, and moving to the right for any additional
61 * blocks. The quantized coefficients are returned in coef_blocks[].
65 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
66 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
67 JDIMENSION start_row, JDIMENSION start_col,
68 JDIMENSION num_blocks)
69 /* This version is used for integer DCT implementations. */
71 /* This routine is heavily used, so it's worth coding it tightly. */
72 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
73 forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
74 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
75 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
78 sample_data += start_row; /* fold in the vertical offset once */
80 for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
82 (*do_dct) (workspace, sample_data, start_col);
84 /* Quantize/descale the coefficients, and store into coef_blocks[] */
85 { register DCTELEM temp, qval;
87 register JCOEFPTR output_ptr = coef_blocks[bi];
89 for (i = 0; i < DCTSIZE2; i++) {
92 /* Divide the coefficient value by qval, ensuring proper rounding.
93 * Since C does not specify the direction of rounding for negative
94 * quotients, we have to force the dividend positive for portability.
96 * In most files, at least half of the output values will be zero
97 * (at default quantization settings, more like three-quarters...)
98 * so we should ensure that this case is fast. On many machines,
99 * a comparison is enough cheaper than a divide to make a special test
100 * a win. Since both inputs will be nonnegative, we need only test
101 * for a < b to discover whether a/b is 0.
102 * If your machine's division is fast enough, define FAST_DIVIDE.
105 #define DIVIDE_BY(a,b) a /= b
107 #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
111 temp += qval>>1; /* for rounding */
112 DIVIDE_BY(temp, qval);
115 temp += qval>>1; /* for rounding */
116 DIVIDE_BY(temp, qval);
118 output_ptr[i] = (JCOEF) temp;
125 #ifdef DCT_FLOAT_SUPPORTED
128 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
129 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
130 JDIMENSION start_row, JDIMENSION start_col,
131 JDIMENSION num_blocks)
132 /* This version is used for floating-point DCT implementations. */
134 /* This routine is heavily used, so it's worth coding it tightly. */
135 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
136 float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
137 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
138 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
141 sample_data += start_row; /* fold in the vertical offset once */
143 for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
144 /* Perform the DCT */
145 (*do_dct) (workspace, sample_data, start_col);
147 /* Quantize/descale the coefficients, and store into coef_blocks[] */
148 { register FAST_FLOAT temp;
150 register JCOEFPTR output_ptr = coef_blocks[bi];
152 for (i = 0; i < DCTSIZE2; i++) {
153 /* Apply the quantization and scaling factor */
154 temp = workspace[i] * divisors[i];
155 /* Round to nearest integer.
156 * Since C does not specify the direction of rounding for negative
157 * quotients, we have to force the dividend positive for portability.
158 * The maximum coefficient size is +-16K (for 12-bit data), so this
159 * code should work for either 16-bit or 32-bit ints.
161 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
167 #endif /* DCT_FLOAT_SUPPORTED */
171 * Initialize for a processing pass.
172 * Verify that all referenced Q-tables are present, and set up
173 * the divisor table for each one.
174 * In the current implementation, DCT of all components is done during
175 * the first pass, even if only some components will be output in the
176 * first scan. Hence all components should be examined here.
180 start_pass_fdctmgr (j_compress_ptr cinfo)
182 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
184 jpeg_component_info *compptr;
189 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
191 /* Select the proper DCT routine for this component's scaling */
192 switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
193 #ifdef DCT_SCALING_SUPPORTED
195 fdct->do_dct[ci] = jpeg_fdct_1x1;
196 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
199 fdct->do_dct[ci] = jpeg_fdct_2x2;
200 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
203 fdct->do_dct[ci] = jpeg_fdct_3x3;
204 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
207 fdct->do_dct[ci] = jpeg_fdct_4x4;
208 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
211 fdct->do_dct[ci] = jpeg_fdct_5x5;
212 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
215 fdct->do_dct[ci] = jpeg_fdct_6x6;
216 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
219 fdct->do_dct[ci] = jpeg_fdct_7x7;
220 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
223 fdct->do_dct[ci] = jpeg_fdct_9x9;
224 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
226 case ((10 << 8) + 10):
227 fdct->do_dct[ci] = jpeg_fdct_10x10;
228 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
230 case ((11 << 8) + 11):
231 fdct->do_dct[ci] = jpeg_fdct_11x11;
232 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
234 case ((12 << 8) + 12):
235 fdct->do_dct[ci] = jpeg_fdct_12x12;
236 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
238 case ((13 << 8) + 13):
239 fdct->do_dct[ci] = jpeg_fdct_13x13;
240 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
242 case ((14 << 8) + 14):
243 fdct->do_dct[ci] = jpeg_fdct_14x14;
244 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
246 case ((15 << 8) + 15):
247 fdct->do_dct[ci] = jpeg_fdct_15x15;
248 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
250 case ((16 << 8) + 16):
251 fdct->do_dct[ci] = jpeg_fdct_16x16;
252 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
254 case ((16 << 8) + 8):
255 fdct->do_dct[ci] = jpeg_fdct_16x8;
256 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
258 case ((14 << 8) + 7):
259 fdct->do_dct[ci] = jpeg_fdct_14x7;
260 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
262 case ((12 << 8) + 6):
263 fdct->do_dct[ci] = jpeg_fdct_12x6;
264 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
266 case ((10 << 8) + 5):
267 fdct->do_dct[ci] = jpeg_fdct_10x5;
268 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
271 fdct->do_dct[ci] = jpeg_fdct_8x4;
272 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
275 fdct->do_dct[ci] = jpeg_fdct_6x3;
276 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
279 fdct->do_dct[ci] = jpeg_fdct_4x2;
280 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
283 fdct->do_dct[ci] = jpeg_fdct_2x1;
284 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
286 case ((8 << 8) + 16):
287 fdct->do_dct[ci] = jpeg_fdct_8x16;
288 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
290 case ((7 << 8) + 14):
291 fdct->do_dct[ci] = jpeg_fdct_7x14;
292 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
294 case ((6 << 8) + 12):
295 fdct->do_dct[ci] = jpeg_fdct_6x12;
296 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
298 case ((5 << 8) + 10):
299 fdct->do_dct[ci] = jpeg_fdct_5x10;
300 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
303 fdct->do_dct[ci] = jpeg_fdct_4x8;
304 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
307 fdct->do_dct[ci] = jpeg_fdct_3x6;
308 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
311 fdct->do_dct[ci] = jpeg_fdct_2x4;
312 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
315 fdct->do_dct[ci] = jpeg_fdct_1x2;
316 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
319 case ((DCTSIZE << 8) + DCTSIZE):
320 switch (cinfo->dct_method) {
321 #ifdef DCT_ISLOW_SUPPORTED
323 fdct->do_dct[ci] = jpeg_fdct_islow;
327 #ifdef DCT_IFAST_SUPPORTED
329 fdct->do_dct[ci] = jpeg_fdct_ifast;
333 #ifdef DCT_FLOAT_SUPPORTED
335 fdct->do_float_dct[ci] = jpeg_fdct_float;
340 ERREXIT(cinfo, JERR_NOT_COMPILED);
345 ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
346 compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
349 qtblno = compptr->quant_tbl_no;
350 /* Make sure specified quantization table is present */
351 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
352 cinfo->quant_tbl_ptrs[qtblno] == NULL)
353 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
354 qtbl = cinfo->quant_tbl_ptrs[qtblno];
355 /* Compute divisors for this quant table */
356 /* We may do this more than once for same table, but it's not a big deal */
358 #ifdef PROVIDE_ISLOW_TABLES
360 /* For LL&M IDCT method, divisors are equal to raw quantization
361 * coefficients multiplied by 8 (to counteract scaling).
363 if (fdct->divisors[qtblno] == NULL) {
364 fdct->divisors[qtblno] = (DCTELEM *)
365 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
366 DCTSIZE2 * SIZEOF(DCTELEM));
368 dtbl = fdct->divisors[qtblno];
369 for (i = 0; i < DCTSIZE2; i++) {
370 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
372 fdct->pub.forward_DCT[ci] = forward_DCT;
375 #ifdef DCT_IFAST_SUPPORTED
378 /* For AA&N IDCT method, divisors are equal to quantization
379 * coefficients scaled by scalefactor[row]*scalefactor[col], where
381 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
382 * We apply a further scale factor of 8.
384 #define CONST_BITS 14
385 static const INT16 aanscales[DCTSIZE2] = {
386 /* precomputed values scaled up by 14 bits */
387 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
388 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
389 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
390 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
391 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
392 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
393 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
394 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
398 if (fdct->divisors[qtblno] == NULL) {
399 fdct->divisors[qtblno] = (DCTELEM *)
400 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
401 DCTSIZE2 * SIZEOF(DCTELEM));
403 dtbl = fdct->divisors[qtblno];
404 for (i = 0; i < DCTSIZE2; i++) {
406 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
407 (INT32) aanscales[i]),
411 fdct->pub.forward_DCT[ci] = forward_DCT;
414 #ifdef DCT_FLOAT_SUPPORTED
417 /* For float AA&N IDCT method, divisors are equal to quantization
418 * coefficients scaled by scalefactor[row]*scalefactor[col], where
420 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
421 * We apply a further scale factor of 8.
422 * What's actually stored is 1/divisor so that the inner loop can
423 * use a multiplication rather than a division.
427 static const double aanscalefactor[DCTSIZE] = {
428 1.0, 1.387039845, 1.306562965, 1.175875602,
429 1.0, 0.785694958, 0.541196100, 0.275899379
432 if (fdct->float_divisors[qtblno] == NULL) {
433 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
434 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
435 DCTSIZE2 * SIZEOF(FAST_FLOAT));
437 fdtbl = fdct->float_divisors[qtblno];
439 for (row = 0; row < DCTSIZE; row++) {
440 for (col = 0; col < DCTSIZE; col++) {
441 fdtbl[i] = (FAST_FLOAT)
442 (1.0 / (((double) qtbl->quantval[i] *
443 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
448 fdct->pub.forward_DCT[ci] = forward_DCT_float;
452 ERREXIT(cinfo, JERR_NOT_COMPILED);
460 * Initialize FDCT manager.
464 jinit_forward_dct (j_compress_ptr cinfo)
470 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
471 SIZEOF(my_fdct_controller));
472 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
473 fdct->pub.start_pass = start_pass_fdctmgr;
475 /* Mark divisor tables unallocated */
476 for (i = 0; i < NUM_QUANT_TBLS; i++) {
477 fdct->divisors[i] = NULL;
478 #ifdef DCT_FLOAT_SUPPORTED
479 fdct->float_divisors[i] = NULL;