1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  *
40  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
41  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
42  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
43  */
44 module arsd.jpeg;
45 
46 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
47 // This is slower, but results in higher quality on images with highly saturated colors.
48 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
49 
50 /// Input stream interface.
51 /// This delegate is called when the internal input buffer is empty.
52 /// Parameters:
53 ///   pBuf - input buffer
54 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
55 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
56 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
57 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
58 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
59 
60 
61 // ////////////////////////////////////////////////////////////////////////// //
62 private:
63 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
64 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
65 
66 // Success/failure error codes.
67 alias jpgd_status = int;
68 enum /*jpgd_status*/ {
69   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
70   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
71   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
72   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
73   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
74   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
75   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
76   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
77   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
78 }
79 
80 enum {
81   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
82   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
83 }
84 
85 // DCT coefficients are stored in this sequence.
86 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
87 
88 alias JPEG_MARKER = int;
89 enum /*JPEG_MARKER*/ {
90   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
91   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
92   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
93   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
94   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
95 }
96 
97 alias JPEG_SUBSAMPLING = int;
98 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }
99 
100 enum CONST_BITS = 13;
101 enum PASS1_BITS = 2;
102 enum SCALEDONE = cast(int)1;
103 
104 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
105 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
106 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
107 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
108 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
109 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
110 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
111 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
112 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
113 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
114 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
115 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
116 
117 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
118 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
119 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
120 
121 
122 // Compiler creates a fast path 1D IDCT for X non-zero columns
123 struct Row(int NONZERO_COLS) {
124 pure nothrow @trusted @nogc:
125   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
126     static if (NONZERO_COLS == 0) {
127       // nothing
128     } else static if (NONZERO_COLS == 1) {
129       immutable int dcval = (pSrc[0] << PASS1_BITS);
130       pTemp[0] = dcval;
131       pTemp[1] = dcval;
132       pTemp[2] = dcval;
133       pTemp[3] = dcval;
134       pTemp[4] = dcval;
135       pTemp[5] = dcval;
136       pTemp[6] = dcval;
137       pTemp[7] = dcval;
138     } else {
139       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
140       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
141       template ACCESS_COL(int x) {
142         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
143       }
144 
145       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
146 
147       immutable int z1 = (z2 + z3)*FIX_0_541196100;
148       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
149       immutable int tmp3 = z1 + z2*FIX_0_765366865;
150 
151       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
152       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
153 
154       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
155 
156       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
157 
158       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
159       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
160 
161       immutable int az1 = bz1*(-FIX_0_899976223);
162       immutable int az2 = bz2*(-FIX_2_562915447);
163       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
164       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
165 
166       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
167       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
168       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
169       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
170 
171       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
172       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
173       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
174       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
175       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
176       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
177       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
178       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
179     }
180   }
181 }
182 
183 
184 // Compiler creates a fast path 1D IDCT for X non-zero rows
185 struct Col (int NONZERO_ROWS) {
186 pure nothrow @trusted @nogc:
187   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
188     static assert(NONZERO_ROWS > 0);
189     static if (NONZERO_ROWS == 1) {
190       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
191       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
192       pDst_ptr[0*8] = dcval_clamped;
193       pDst_ptr[1*8] = dcval_clamped;
194       pDst_ptr[2*8] = dcval_clamped;
195       pDst_ptr[3*8] = dcval_clamped;
196       pDst_ptr[4*8] = dcval_clamped;
197       pDst_ptr[5*8] = dcval_clamped;
198       pDst_ptr[6*8] = dcval_clamped;
199       pDst_ptr[7*8] = dcval_clamped;
200     } else {
201       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
202       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
203       template ACCESS_ROW(int x) {
204         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
205       }
206 
207       immutable int z2 = mixin(ACCESS_ROW!2);
208       immutable int z3 = mixin(ACCESS_ROW!6);
209 
210       immutable int z1 = (z2 + z3)*FIX_0_541196100;
211       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
212       immutable int tmp3 = z1 + z2*FIX_0_765366865;
213 
214       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
215       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
216 
217       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
218 
219       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
220 
221       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
222       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
223 
224       immutable int az1 = bz1*(-FIX_0_899976223);
225       immutable int az2 = bz2*(-FIX_2_562915447);
226       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
227       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
228 
229       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
230       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
231       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
232       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
233 
234       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
235       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
236 
237       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
238       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
239 
240       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
241       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
242 
243       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
244       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
245 
246       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
247       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
248 
249       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
250       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
251 
252       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
253       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
254 
255       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
256       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
257     }
258   }
259 }
260 
261 
262 static immutable ubyte[512] s_idct_row_table = [
263   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
264   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
265   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
266   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
267   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
268   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
269   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
270   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
271 ];
272 
273 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
274 
275 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
276   assert(block_max_zag >= 1);
277   assert(block_max_zag <= 64);
278 
279   if (block_max_zag <= 1)
280   {
281     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
282     k = CLAMP(k);
283     k = k | (k<<8);
284     k = k | (k<<16);
285 
286     for (int i = 8; i > 0; i--)
287     {
288       *cast(int*)&pDst_ptr[0] = k;
289       *cast(int*)&pDst_ptr[4] = k;
290       pDst_ptr += 8;
291     }
292     return;
293   }
294 
295   int[64] temp;
296 
297   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
298   int* pTemp = temp.ptr;
299 
300   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
301   int i;
302   for (i = 8; i > 0; i--, pRow_tab++)
303   {
304     switch (*pRow_tab)
305     {
306       case 0: Row!(0).idct(pTemp, pSrc); break;
307       case 1: Row!(1).idct(pTemp, pSrc); break;
308       case 2: Row!(2).idct(pTemp, pSrc); break;
309       case 3: Row!(3).idct(pTemp, pSrc); break;
310       case 4: Row!(4).idct(pTemp, pSrc); break;
311       case 5: Row!(5).idct(pTemp, pSrc); break;
312       case 6: Row!(6).idct(pTemp, pSrc); break;
313       case 7: Row!(7).idct(pTemp, pSrc); break;
314       case 8: Row!(8).idct(pTemp, pSrc); break;
315       default: assert(0);
316     }
317 
318     pSrc += 8;
319     pTemp += 8;
320   }
321 
322   pTemp = temp.ptr;
323 
324   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
325   for (i = 8; i > 0; i--)
326   {
327     switch (nonzero_rows)
328     {
329       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
330       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
331       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
332       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
333       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
334       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
335       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
336       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
337       default: assert(0);
338     }
339 
340     pTemp++;
341     pDst_ptr++;
342   }
343 }
344 
345 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
346   int[64] temp;
347   int* pTemp = temp.ptr;
348   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
349 
350   for (int i = 4; i > 0; i--)
351   {
352     Row!(4).idct(pTemp, pSrc);
353     pSrc += 8;
354     pTemp += 8;
355   }
356 
357   pTemp = temp.ptr;
358   for (int i = 8; i > 0; i--)
359   {
360     Col!(4).idct(pDst_ptr, pTemp);
361     pTemp++;
362     pDst_ptr++;
363   }
364 }
365 
366 
367 // ////////////////////////////////////////////////////////////////////////// //
368 struct jpeg_decoder {
369 private import core.stdc.string : memcpy, memset;
370 private:
371   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
372   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
373 
374   alias jpgd_quant_t = short;
375   alias jpgd_block_t = short;
376   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
377 
378   static struct huff_tables {
379     bool ac_table;
380     uint[256] look_up;
381     uint[256] look_up2;
382     ubyte[256] code_size;
383     uint[512] tree;
384   }
385 
386   static struct coeff_buf {
387     ubyte* pData;
388     int block_num_x, block_num_y;
389     int block_len_x, block_len_y;
390     int block_size;
391   }
392 
393   static struct mem_block {
394     mem_block* m_pNext;
395     size_t m_used_count;
396     size_t m_size;
397     char[1] m_data;
398   }
399 
400   mem_block* m_pMem_blocks;
401   int m_image_x_size;
402   int m_image_y_size;
403   JpegStreamReadFunc readfn;
404   int m_progressive_flag;
405   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
406   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
407   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
408   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
409   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
410   int m_comps_in_frame;                         // # of components in frame
411   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
412   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
413   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
414   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
415   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
416   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
417   int m_comps_in_scan;                          // # of components in scan
418   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
419   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
420   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
421   int m_spectral_start;                         // spectral selection start
422   int m_spectral_end;                           // spectral selection end
423   int m_successive_low;                         // successive approximation low
424   int m_successive_high;                        // successive approximation high
425   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
426   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
427   int m_blocks_per_mcu;
428   int m_max_blocks_per_row;
429   int m_mcus_per_row, m_mcus_per_col;
430   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
431   int m_total_lines_left;                       // total # lines left in image
432   int m_mcu_lines_left;                         // total # lines left in this MCU
433   int m_real_dest_bytes_per_scan_line;
434   int m_dest_bytes_per_scan_line;               // rounded up
435   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
436   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
437   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
438   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
439   int m_eob_run;
440   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
441   ubyte* m_pIn_buf_ofs;
442   int m_in_buf_left;
443   int m_tem_flag;
444   bool m_eof_flag;
445   ubyte[128] m_in_buf_pad_start;
446   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
447   ubyte[128] m_in_buf_pad_end;
448   int m_bits_left;
449   uint m_bit_buf;
450   int m_restart_interval;
451   int m_restarts_left;
452   int m_next_restart_num;
453   int m_max_mcus_per_row;
454   int m_max_blocks_per_mcu;
455   int m_expanded_blocks_per_mcu;
456   int m_expanded_blocks_per_row;
457   int m_expanded_blocks_per_component;
458   bool m_freq_domain_chroma_upsample;
459   int m_max_mcus_per_col;
460   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
461   jpgd_block_t* m_pMCU_coefficients;
462   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
463   ubyte* m_pSample_buf;
464   int[256] m_crr;
465   int[256] m_cbb;
466   int[256] m_crg;
467   int[256] m_cbg;
468   ubyte* m_pScan_line_0;
469   ubyte* m_pScan_line_1;
470   jpgd_status m_error_code;
471   bool m_ready_flag;
472   int m_total_bytes_read;
473 
474 public:
475   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
476   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
477   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
478 
479   ~this () { free_all_blocks(); }
480 
481   @disable this (this); // no copies
482 
483   // Call this method after constructing the object to begin decompression.
484   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
485   int begin_decoding () {
486     if (m_ready_flag) return JPGD_SUCCESS;
487     if (m_error_code) return JPGD_FAILED;
488     try {
489       decode_start();
490       m_ready_flag = true;
491       return JPGD_SUCCESS;
492     } catch (Exception e) {
493       //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("ERROR: %.*s...\n", cast(int)e.msg.length, e.msg.ptr); }}
494       version(jpegd_test) {{ import std.stdio; stderr.writeln(e.toString); }}
495     }
496     return JPGD_FAILED;
497   }
498 
499   // Returns the next scan line.
500   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
501   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
502   // Returns JPGD_SUCCESS if a scan line has been returned.
503   // Returns JPGD_DONE if all scan lines have been returned.
504   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
505   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
506     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
507     if (m_total_lines_left == 0) return JPGD_DONE;
508     try {
509       if (m_mcu_lines_left == 0) {
510         if (m_progressive_flag) load_next_row(); else decode_next_row();
511         // Find the EOI marker if that was the last row.
512         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
513         m_mcu_lines_left = m_max_mcu_y_size;
514       }
515       if (m_freq_domain_chroma_upsample) {
516         expanded_convert();
517         *pScan_line = m_pScan_line_0;
518       } else {
519         switch (m_scan_type) {
520           case JPGD_YH2V2:
521             if ((m_mcu_lines_left & 1) == 0) {
522               H2V2Convert();
523               *pScan_line = m_pScan_line_0;
524             } else {
525               *pScan_line = m_pScan_line_1;
526             }
527             break;
528           case JPGD_YH2V1:
529             H2V1Convert();
530             *pScan_line = m_pScan_line_0;
531             break;
532           case JPGD_YH1V2:
533             if ((m_mcu_lines_left & 1) == 0) {
534               H1V2Convert();
535               *pScan_line = m_pScan_line_0;
536             } else {
537               *pScan_line = m_pScan_line_1;
538             }
539             break;
540           case JPGD_YH1V1:
541             H1V1Convert();
542             *pScan_line = m_pScan_line_0;
543             break;
544           case JPGD_GRAYSCALE:
545             gray_convert();
546             *pScan_line = m_pScan_line_0;
547             break;
548           default:
549         }
550       }
551       *pScan_line_len = m_real_dest_bytes_per_scan_line;
552       --m_mcu_lines_left;
553       --m_total_lines_left;
554       return JPGD_SUCCESS;
555     } catch (Exception) {}
556     return JPGD_FAILED;
557   }
558 
559   @property const pure nothrow @safe @nogc {
560     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
561 
562     int width () { pragma(inline, true); return m_image_x_size; }
563     int height () { pragma(inline, true); return m_image_y_size; }
564 
565     int num_components () { pragma(inline, true); return m_comps_in_frame; }
566 
567     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
568     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
569 
570     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
571     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
572   }
573 
574 private:
575   // Retrieve one character from the input stream.
576   uint get_char () {
577     // Any bytes remaining in buffer?
578     if (!m_in_buf_left) {
579       // Try to get more bytes.
580       prep_in_buffer();
581       // Still nothing to get?
582       if (!m_in_buf_left) {
583         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
584         int t = m_tem_flag;
585         m_tem_flag ^= 1;
586         return (t ? 0xD9 : 0xFF);
587       }
588     }
589     uint c = *m_pIn_buf_ofs++;
590     --m_in_buf_left;
591     return c;
592   }
593 
594   // Same as previous method, except can indicate if the character is a pad character or not.
595   uint get_char (bool* pPadding_flag) {
596     if (!m_in_buf_left) {
597       prep_in_buffer();
598       if (!m_in_buf_left) {
599         *pPadding_flag = true;
600         int t = m_tem_flag;
601         m_tem_flag ^= 1;
602         return (t ? 0xD9 : 0xFF);
603       }
604     }
605     *pPadding_flag = false;
606     uint c = *m_pIn_buf_ofs++;
607     --m_in_buf_left;
608     return c;
609   }
610 
611   // Inserts a previously retrieved character back into the input buffer.
612   void stuff_char (ubyte q) {
613     *(--m_pIn_buf_ofs) = q;
614     m_in_buf_left++;
615   }
616 
617   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
618   ubyte get_octet () {
619     bool padding_flag;
620     int c = get_char(&padding_flag);
621     if (c == 0xFF) {
622       if (padding_flag) return 0xFF;
623       c = get_char(&padding_flag);
624       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
625       if (c == 0x00) return 0xFF;
626       stuff_char(cast(ubyte)(c));
627       stuff_char(0xFF);
628       return 0xFF;
629     }
630     return cast(ubyte)(c);
631   }
632 
633   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
634   uint get_bits (int num_bits) {
635     if (!num_bits) return 0;
636     uint i = m_bit_buf >> (32 - num_bits);
637     if ((m_bits_left -= num_bits) <= 0) {
638       m_bit_buf <<= (num_bits += m_bits_left);
639       uint c1 = get_char();
640       uint c2 = get_char();
641       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
642       m_bit_buf <<= -m_bits_left;
643       m_bits_left += 16;
644       assert(m_bits_left >= 0);
645     } else {
646       m_bit_buf <<= num_bits;
647     }
648     return i;
649   }
650 
651   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
652   uint get_bits_no_markers (int num_bits) {
653     if (!num_bits) return 0;
654     uint i = m_bit_buf >> (32 - num_bits);
655     if ((m_bits_left -= num_bits) <= 0) {
656       m_bit_buf <<= (num_bits += m_bits_left);
657       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
658         uint c1 = get_octet();
659         uint c2 = get_octet();
660         m_bit_buf |= (c1 << 8) | c2;
661       } else {
662         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
663         m_in_buf_left -= 2;
664         m_pIn_buf_ofs += 2;
665       }
666       m_bit_buf <<= -m_bits_left;
667       m_bits_left += 16;
668       assert(m_bits_left >= 0);
669     } else {
670       m_bit_buf <<= num_bits;
671     }
672     return i;
673   }
674 
675   // Decodes a Huffman encoded symbol.
676   int huff_decode (huff_tables *pH) {
677     int symbol;
678     // Check first 8-bits: do we have a complete symbol?
679     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
680       // Decode more bits, use a tree traversal to find symbol.
681       int ofs = 23;
682       do {
683         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
684         --ofs;
685       } while (symbol < 0);
686       get_bits_no_markers(8 + (23 - ofs));
687     } else {
688       get_bits_no_markers(pH.code_size.ptr[symbol]);
689     }
690     return symbol;
691   }
692 
693   // Decodes a Huffman encoded symbol.
694   int huff_decode (huff_tables *pH, ref int extra_bits) {
695     int symbol;
696     // Check first 8-bits: do we have a complete symbol?
697     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
698       // Use a tree traversal to find symbol.
699       int ofs = 23;
700       do {
701         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
702         --ofs;
703       } while (symbol < 0);
704       get_bits_no_markers(8 + (23 - ofs));
705       extra_bits = get_bits_no_markers(symbol & 0xF);
706     } else {
707       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
708       if (symbol & 0x8000) {
709         get_bits_no_markers((symbol >> 8) & 31);
710         extra_bits = symbol >> 16;
711       } else {
712         int code_size = (symbol >> 8) & 31;
713         int num_extra_bits = symbol & 0xF;
714         int bits = code_size + num_extra_bits;
715         if (bits <= (m_bits_left + 16)) {
716           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
717         } else {
718           get_bits_no_markers(code_size);
719           extra_bits = get_bits_no_markers(num_extra_bits);
720         }
721       }
722       symbol &= 0xFF;
723     }
724     return symbol;
725   }
726 
727   // Tables and macro used to fully decode the DPCM differences.
728   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
729   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
730   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
731   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
732   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
733   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
734 
735   // Clamps a value between 0-255.
736   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
737   alias clamp = CLAMP;
738 
739   static struct DCT_Upsample {
740   static:
741     static struct Matrix44 {
742     pure nothrow @trusted @nogc:
743       alias Element_Type = int;
744       enum { NUM_ROWS = 4, NUM_COLS = 4 }
745 
746       Element_Type[NUM_COLS][NUM_ROWS] v;
747 
748       this() (const scope auto ref Matrix44 m) {
749         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
750       }
751 
752       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
753       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
754 
755       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
756 
757       ref Matrix44 opOpAssign(string op:"+") (const scope auto ref Matrix44 a) {
758         foreach (int r; 0..NUM_ROWS) {
759           at(r, 0) += a.at(r, 0);
760           at(r, 1) += a.at(r, 1);
761           at(r, 2) += a.at(r, 2);
762           at(r, 3) += a.at(r, 3);
763         }
764         return this;
765       }
766 
767       ref Matrix44 opOpAssign(string op:"-") (const scope auto ref Matrix44 a) {
768         foreach (int r; 0..NUM_ROWS) {
769           at(r, 0) -= a.at(r, 0);
770           at(r, 1) -= a.at(r, 1);
771           at(r, 2) -= a.at(r, 2);
772           at(r, 3) -= a.at(r, 3);
773         }
774         return this;
775       }
776 
777       Matrix44 opBinary(string op:"+") (const scope auto ref Matrix44 b) const {
778         alias a = this;
779         Matrix44 ret;
780         foreach (int r; 0..NUM_ROWS) {
781           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
782           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
783           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
784           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
785         }
786         return ret;
787       }
788 
789       Matrix44 opBinary(string op:"-") (const scope auto ref Matrix44 b) const {
790         alias a = this;
791         Matrix44 ret;
792         foreach (int r; 0..NUM_ROWS) {
793           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
794           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
795           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
796           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
797         }
798         return ret;
799       }
800 
801       static void add_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
802         foreach (int r; 0..4) {
803           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
804           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
805           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
806           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
807         }
808       }
809 
810       static void sub_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
811         foreach (int r; 0..4) {
812           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
813           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
814           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
815           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
816         }
817       }
818     }
819 
820     enum FRACT_BITS = 10;
821     enum SCALE = 1 << FRACT_BITS;
822 
823     alias Temp_Type = int;
824     //TODO: convert defines to mixins
825     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
826     //#define F(i) ((int)((i) * SCALE + .5f))
827     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
828     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
829 
830     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
831     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
832 
833     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
834     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
835       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
836         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
837         template AT(int c, int r) {
838           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
839         }
840         // 4x8 = 4x8 times 8x8, matrix 0 is constant
841         immutable Temp_Type X000 = mixin(AT!(0, 0));
842         immutable Temp_Type X001 = mixin(AT!(0, 1));
843         immutable Temp_Type X002 = mixin(AT!(0, 2));
844         immutable Temp_Type X003 = mixin(AT!(0, 3));
845         immutable Temp_Type X004 = mixin(AT!(0, 4));
846         immutable Temp_Type X005 = mixin(AT!(0, 5));
847         immutable Temp_Type X006 = mixin(AT!(0, 6));
848         immutable Temp_Type X007 = mixin(AT!(0, 7));
849         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
850         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
851         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
852         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
853         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
854         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
855         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
856         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
857         immutable Temp_Type X020 = mixin(AT!(4, 0));
858         immutable Temp_Type X021 = mixin(AT!(4, 1));
859         immutable Temp_Type X022 = mixin(AT!(4, 2));
860         immutable Temp_Type X023 = mixin(AT!(4, 3));
861         immutable Temp_Type X024 = mixin(AT!(4, 4));
862         immutable Temp_Type X025 = mixin(AT!(4, 5));
863         immutable Temp_Type X026 = mixin(AT!(4, 6));
864         immutable Temp_Type X027 = mixin(AT!(4, 7));
865         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
866         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
867         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
868         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
869         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
870         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
871         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
872         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
873 
874         // 4x4 = 4x8 times 8x4, matrix 1 is constant
875         P.at(0, 0) = X000;
876         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
877         P.at(0, 2) = X004;
878         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
879         P.at(1, 0) = X010;
880         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
881         P.at(1, 2) = X014;
882         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
883         P.at(2, 0) = X020;
884         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
885         P.at(2, 2) = X024;
886         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
887         P.at(3, 0) = X030;
888         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
889         P.at(3, 2) = X034;
890         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
891         // 40 muls 24 adds
892 
893         // 4x4 = 4x8 times 8x4, matrix 1 is constant
894         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
895         Q.at(0, 1) = X002;
896         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
897         Q.at(0, 3) = X006;
898         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
899         Q.at(1, 1) = X012;
900         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
901         Q.at(1, 3) = X016;
902         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
903         Q.at(2, 1) = X022;
904         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
905         Q.at(2, 3) = X026;
906         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
907         Q.at(3, 1) = X032;
908         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
909         Q.at(3, 3) = X036;
910         // 40 muls 24 adds
911       }
912     }
913 
914     static struct R_S(int NUM_ROWS, int NUM_COLS) {
915       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
916         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
917         template AT(int c, int r) {
918           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
919         }
920         // 4x8 = 4x8 times 8x8, matrix 0 is constant
921         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
922         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
923         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
924         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
925         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
926         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
927         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
928         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
929         immutable Temp_Type X110 = mixin(AT!(2, 0));
930         immutable Temp_Type X111 = mixin(AT!(2, 1));
931         immutable Temp_Type X112 = mixin(AT!(2, 2));
932         immutable Temp_Type X113 = mixin(AT!(2, 3));
933         immutable Temp_Type X114 = mixin(AT!(2, 4));
934         immutable Temp_Type X115 = mixin(AT!(2, 5));
935         immutable Temp_Type X116 = mixin(AT!(2, 6));
936         immutable Temp_Type X117 = mixin(AT!(2, 7));
937         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
938         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
939         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
940         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
941         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
942         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
943         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
944         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
945         immutable Temp_Type X130 = mixin(AT!(6, 0));
946         immutable Temp_Type X131 = mixin(AT!(6, 1));
947         immutable Temp_Type X132 = mixin(AT!(6, 2));
948         immutable Temp_Type X133 = mixin(AT!(6, 3));
949         immutable Temp_Type X134 = mixin(AT!(6, 4));
950         immutable Temp_Type X135 = mixin(AT!(6, 5));
951         immutable Temp_Type X136 = mixin(AT!(6, 6));
952         immutable Temp_Type X137 = mixin(AT!(6, 7));
953         // 80 muls 48 adds
954 
955         // 4x4 = 4x8 times 8x4, matrix 1 is constant
956         R.at(0, 0) = X100;
957         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
958         R.at(0, 2) = X104;
959         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
960         R.at(1, 0) = X110;
961         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
962         R.at(1, 2) = X114;
963         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
964         R.at(2, 0) = X120;
965         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
966         R.at(2, 2) = X124;
967         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
968         R.at(3, 0) = X130;
969         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
970         R.at(3, 2) = X134;
971         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
972         // 40 muls 24 adds
973         // 4x4 = 4x8 times 8x4, matrix 1 is constant
974         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
975         S.at(0, 1) = X102;
976         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
977         S.at(0, 3) = X106;
978         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
979         S.at(1, 1) = X112;
980         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
981         S.at(1, 3) = X116;
982         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
983         S.at(2, 1) = X122;
984         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
985         S.at(2, 3) = X126;
986         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
987         S.at(3, 1) = X132;
988         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
989         S.at(3, 3) = X136;
990         // 40 muls 24 adds
991       }
992     }
993   } // end namespace DCT_Upsample
994 
995   // Unconditionally frees all allocated m_blocks.
996   void free_all_blocks () {
997     //m_pStream = null;
998     readfn = null;
999     for (mem_block *b = m_pMem_blocks; b; ) {
1000       mem_block* n = b.m_pNext;
1001       jpgd_free(b);
1002       b = n;
1003     }
1004     m_pMem_blocks = null;
1005   }
1006 
1007   // This method handles all errors. It will never return.
1008   // It could easily be changed to use C++ exceptions.
1009   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status, size_t line=__LINE__) {
1010     m_error_code = status;
1011     free_all_blocks();
1012     //longjmp(m_jmp_state, status);
1013     throw new Exception("jpeg decoding error", __FILE__, line);
1014   }
1015 
1016   void* alloc (size_t nSize, bool zero=false) {
1017     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1018     char *rv = null;
1019     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1020     {
1021       if ((b.m_used_count + nSize) <= b.m_size)
1022       {
1023         rv = b.m_data.ptr + b.m_used_count;
1024         b.m_used_count += nSize;
1025         break;
1026       }
1027     }
1028     if (!rv)
1029     {
1030       size_t capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1031       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1032       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1033       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1034       b.m_used_count = nSize;
1035       b.m_size = capacity;
1036       rv = b.m_data.ptr;
1037     }
1038     if (zero) memset(rv, 0, nSize);
1039     return rv;
1040   }
1041 
1042   void word_clear (void *p, ushort c, uint n) {
1043     ubyte *pD = cast(ubyte*)p;
1044     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1045     while (n)
1046     {
1047       pD[0] = l; pD[1] = h; pD += 2;
1048       n--;
1049     }
1050   }
1051 
1052   // Refill the input buffer.
1053   // This method will sit in a loop until (A) the buffer is full or (B)
1054   // the stream's read() method reports and end of file condition.
1055   void prep_in_buffer () {
1056     m_in_buf_left = 0;
1057     m_pIn_buf_ofs = m_in_buf.ptr;
1058 
1059     if (m_eof_flag)
1060       return;
1061 
1062     do
1063     {
1064       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1065       if (bytes_read == -1)
1066         stop_decoding(JPGD_STREAM_READ);
1067 
1068       m_in_buf_left += bytes_read;
1069     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1070 
1071     m_total_bytes_read += m_in_buf_left;
1072 
1073     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1074     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1075     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1076   }
1077 
1078   // Read a Huffman code table.
1079   void read_dht_marker () {
1080     int i, index, count;
1081     ubyte[17] huff_num;
1082     ubyte[256] huff_val;
1083 
1084     uint num_left = get_bits(16);
1085 
1086     if (num_left < 2)
1087       stop_decoding(JPGD_BAD_DHT_MARKER);
1088 
1089     num_left -= 2;
1090 
1091     while (num_left)
1092     {
1093       index = get_bits(8);
1094 
1095       huff_num.ptr[0] = 0;
1096 
1097       count = 0;
1098 
1099       for (i = 1; i <= 16; i++)
1100       {
1101         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1102         count += huff_num.ptr[i];
1103       }
1104 
1105       if (count > 255)
1106         stop_decoding(JPGD_BAD_DHT_COUNTS);
1107 
1108       for (i = 0; i < count; i++)
1109         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1110 
1111       i = 1 + 16 + count;
1112 
1113       if (num_left < cast(uint)i)
1114         stop_decoding(JPGD_BAD_DHT_MARKER);
1115 
1116       num_left -= i;
1117 
1118       if ((index & 0x10) > 0x10)
1119         stop_decoding(JPGD_BAD_DHT_INDEX);
1120 
1121       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1122 
1123       if (index >= JPGD_MAX_HUFF_TABLES)
1124         stop_decoding(JPGD_BAD_DHT_INDEX);
1125 
1126       if (!m_huff_num.ptr[index])
1127         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1128 
1129       if (!m_huff_val.ptr[index])
1130         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1131 
1132       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1133       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1134       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1135     }
1136   }
1137 
1138   // Read a quantization table.
1139   void read_dqt_marker () {
1140     int n, i, prec;
1141     uint num_left;
1142     uint temp;
1143 
1144     num_left = get_bits(16);
1145 
1146     if (num_left < 2)
1147       stop_decoding(JPGD_BAD_DQT_MARKER);
1148 
1149     num_left -= 2;
1150 
1151     while (num_left)
1152     {
1153       n = get_bits(8);
1154       prec = n >> 4;
1155       n &= 0x0F;
1156 
1157       if (n >= JPGD_MAX_QUANT_TABLES)
1158         stop_decoding(JPGD_BAD_DQT_TABLE);
1159 
1160       if (!m_quant.ptr[n])
1161         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1162 
1163       // read quantization entries, in zag order
1164       for (i = 0; i < 64; i++)
1165       {
1166         temp = get_bits(8);
1167 
1168         if (prec)
1169           temp = (temp << 8) + get_bits(8);
1170 
1171         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1172       }
1173 
1174       i = 64 + 1;
1175 
1176       if (prec)
1177         i += 64;
1178 
1179       if (num_left < cast(uint)i)
1180         stop_decoding(JPGD_BAD_DQT_LENGTH);
1181 
1182       num_left -= i;
1183     }
1184   }
1185 
1186   // Read the start of frame (SOF) marker.
1187   void read_sof_marker () {
1188     int i;
1189     uint num_left;
1190 
1191     num_left = get_bits(16);
1192 
1193     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1194       stop_decoding(JPGD_BAD_PRECISION);
1195 
1196     m_image_y_size = get_bits(16);
1197 
1198     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1199       stop_decoding(JPGD_BAD_HEIGHT);
1200 
1201     m_image_x_size = get_bits(16);
1202 
1203     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1204       stop_decoding(JPGD_BAD_WIDTH);
1205 
1206     m_comps_in_frame = get_bits(8);
1207 
1208     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1209       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1210 
1211     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1212       stop_decoding(JPGD_BAD_SOF_LENGTH);
1213 
1214     for (i = 0; i < m_comps_in_frame; i++)
1215     {
1216       m_comp_ident.ptr[i]  = get_bits(8);
1217       m_comp_h_samp.ptr[i] = get_bits(4);
1218       m_comp_v_samp.ptr[i] = get_bits(4);
1219       m_comp_quant.ptr[i]  = get_bits(8);
1220     }
1221   }
1222 
1223   // Used to skip unrecognized markers.
1224   void skip_variable_marker () {
1225     uint num_left;
1226 
1227     num_left = get_bits(16);
1228 
1229     if (num_left < 2)
1230       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1231 
1232     num_left -= 2;
1233 
1234     while (num_left)
1235     {
1236       get_bits(8);
1237       num_left--;
1238     }
1239   }
1240 
1241   // Read a define restart interval (DRI) marker.
1242   void read_dri_marker () {
1243     if (get_bits(16) != 4)
1244       stop_decoding(JPGD_BAD_DRI_LENGTH);
1245 
1246     m_restart_interval = get_bits(16);
1247   }
1248 
1249   // Read a start of scan (SOS) marker.
1250   void read_sos_marker () {
1251     uint num_left;
1252     int i, ci, n, c, cc;
1253 
1254     num_left = get_bits(16);
1255 
1256     n = get_bits(8);
1257 
1258     m_comps_in_scan = n;
1259 
1260     num_left -= 3;
1261 
1262     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1263       stop_decoding(JPGD_BAD_SOS_LENGTH);
1264 
1265     for (i = 0; i < n; i++)
1266     {
1267       cc = get_bits(8);
1268       c = get_bits(8);
1269       num_left -= 2;
1270 
1271       for (ci = 0; ci < m_comps_in_frame; ci++)
1272         if (cc == m_comp_ident.ptr[ci])
1273           break;
1274 
1275       if (ci >= m_comps_in_frame)
1276         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1277 
1278       m_comp_list.ptr[i]    = ci;
1279       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1280       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1281     }
1282 
1283     m_spectral_start  = get_bits(8);
1284     m_spectral_end    = get_bits(8);
1285     m_successive_high = get_bits(4);
1286     m_successive_low  = get_bits(4);
1287 
1288     if (!m_progressive_flag)
1289     {
1290       m_spectral_start = 0;
1291       m_spectral_end = 63;
1292     }
1293 
1294     num_left -= 3;
1295 
1296     /* read past whatever is num_left */
1297     while (num_left)
1298     {
1299       get_bits(8);
1300       num_left--;
1301     }
1302   }
1303 
1304   // Finds the next marker.
1305   int next_marker () {
1306     uint c, bytes;
1307 
1308     bytes = 0;
1309 
1310     do
1311     {
1312       do
1313       {
1314         bytes++;
1315         c = get_bits(8);
1316       } while (c != 0xFF);
1317 
1318       do
1319       {
1320         c = get_bits(8);
1321       } while (c == 0xFF);
1322 
1323     } while (c == 0);
1324 
1325     // If bytes > 0 here, there where extra bytes before the marker (not good).
1326 
1327     return c;
1328   }
1329 
1330   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1331   // encountered.
1332   int process_markers (bool allow_restarts = false) {
1333     int c;
1334 
1335     for ( ; ; ) {
1336       c = next_marker();
1337 
1338       switch (c)
1339       {
1340         case M_SOF0:
1341         case M_SOF1:
1342         case M_SOF2:
1343         case M_SOF3:
1344         case M_SOF5:
1345         case M_SOF6:
1346         case M_SOF7:
1347         //case M_JPG:
1348         case M_SOF9:
1349         case M_SOF10:
1350         case M_SOF11:
1351         case M_SOF13:
1352         case M_SOF14:
1353         case M_SOF15:
1354         case M_SOI:
1355         case M_EOI:
1356         case M_SOS:
1357           return c;
1358         case M_DHT:
1359           read_dht_marker();
1360           break;
1361         // No arithmitic support - dumb patents!
1362         case M_DAC:
1363           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1364           break;
1365         case M_DQT:
1366           read_dqt_marker();
1367           break;
1368         case M_DRI:
1369           read_dri_marker();
1370           break;
1371         //case M_APP0:  /* no need to read the JFIF marker */
1372 
1373         case M_RST0:    /* no parameters */
1374         case M_RST1:
1375         case M_RST2:
1376         case M_RST3:
1377         case M_RST4:
1378         case M_RST5:
1379         case M_RST6:
1380         case M_RST7:
1381 		if(allow_restarts)
1382 			continue;
1383 		else
1384 			goto case;
1385         case M_JPG:
1386         case M_TEM:
1387           stop_decoding(JPGD_UNEXPECTED_MARKER);
1388           break;
1389         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1390           skip_variable_marker();
1391           break;
1392       }
1393     }
1394 
1395     assert(0);
1396   }
1397 
1398   // Finds the start of image (SOI) marker.
1399   // This code is rather defensive: it only checks the first 512 bytes to avoid
1400   // false positives.
1401   void locate_soi_marker () {
1402     uint lastchar, thischar;
1403     uint bytesleft;
1404 
1405     lastchar = get_bits(8);
1406 
1407     thischar = get_bits(8);
1408 
1409     /* ok if it's a normal JPEG file without a special header */
1410 
1411     if ((lastchar == 0xFF) && (thischar == M_SOI))
1412       return;
1413 
1414     bytesleft = 4096; //512;
1415 
1416     for ( ; ; )
1417     {
1418       if (--bytesleft == 0)
1419         stop_decoding(JPGD_NOT_JPEG);
1420 
1421       lastchar = thischar;
1422 
1423       thischar = get_bits(8);
1424 
1425       if (lastchar == 0xFF)
1426       {
1427         if (thischar == M_SOI)
1428           break;
1429         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1430           stop_decoding(JPGD_NOT_JPEG);
1431       }
1432     }
1433 
1434     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1435     thischar = (m_bit_buf >> 24) & 0xFF;
1436 
1437     if (thischar != 0xFF)
1438       stop_decoding(JPGD_NOT_JPEG);
1439   }
1440 
1441   // Find a start of frame (SOF) marker.
1442   void locate_sof_marker () {
1443     locate_soi_marker();
1444 
1445     int c = process_markers();
1446 
1447     switch (c)
1448     {
1449       case M_SOF2:
1450         m_progressive_flag = true;
1451         goto case;
1452       case M_SOF0:  /* baseline DCT */
1453       case M_SOF1:  /* extended sequential DCT */
1454         read_sof_marker();
1455         break;
1456       case M_SOF9:  /* Arithmitic coding */
1457         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1458         break;
1459       default:
1460         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1461         break;
1462     }
1463   }
1464 
1465   // Find a start of scan (SOS) marker.
1466   int locate_sos_marker () {
1467     int c;
1468 
1469     c = process_markers();
1470 
1471     if (c == M_EOI)
1472       return false;
1473     else if (c != M_SOS)
1474       stop_decoding(JPGD_UNEXPECTED_MARKER);
1475 
1476     read_sos_marker();
1477 
1478     return true;
1479   }
1480 
1481   // Reset everything to default/uninitialized state.
1482   void initit (JpegStreamReadFunc rfn) {
1483     m_pMem_blocks = null;
1484     m_error_code = JPGD_SUCCESS;
1485     m_ready_flag = false;
1486     m_image_x_size = m_image_y_size = 0;
1487     readfn = rfn;
1488     m_progressive_flag = false;
1489 
1490     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1491     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1492     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1493     memset(m_quant.ptr, 0, m_quant.sizeof);
1494 
1495     m_scan_type = 0;
1496     m_comps_in_frame = 0;
1497 
1498     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1499     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1500     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1501     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1502     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1503     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1504 
1505     m_comps_in_scan = 0;
1506     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1507     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1508     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1509 
1510     m_spectral_start = 0;
1511     m_spectral_end = 0;
1512     m_successive_low = 0;
1513     m_successive_high = 0;
1514     m_max_mcu_x_size = 0;
1515     m_max_mcu_y_size = 0;
1516     m_blocks_per_mcu = 0;
1517     m_max_blocks_per_row = 0;
1518     m_mcus_per_row = 0;
1519     m_mcus_per_col = 0;
1520     m_expanded_blocks_per_component = 0;
1521     m_expanded_blocks_per_mcu = 0;
1522     m_expanded_blocks_per_row = 0;
1523     m_freq_domain_chroma_upsample = false;
1524 
1525     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1526 
1527     m_total_lines_left = 0;
1528     m_mcu_lines_left = 0;
1529     m_real_dest_bytes_per_scan_line = 0;
1530     m_dest_bytes_per_scan_line = 0;
1531     m_dest_bytes_per_pixel = 0;
1532 
1533     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1534 
1535     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1536     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1537     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1538 
1539     m_eob_run = 0;
1540 
1541     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1542 
1543     m_pIn_buf_ofs = m_in_buf.ptr;
1544     m_in_buf_left = 0;
1545     m_eof_flag = false;
1546     m_tem_flag = 0;
1547 
1548     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1549     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1550     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1551 
1552     m_restart_interval = 0;
1553     m_restarts_left    = 0;
1554     m_next_restart_num = 0;
1555 
1556     m_max_mcus_per_row = 0;
1557     m_max_blocks_per_mcu = 0;
1558     m_max_mcus_per_col = 0;
1559 
1560     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1561     m_pMCU_coefficients = null;
1562     m_pSample_buf = null;
1563 
1564     m_total_bytes_read = 0;
1565 
1566     m_pScan_line_0 = null;
1567     m_pScan_line_1 = null;
1568 
1569     // Ready the input buffer.
1570     prep_in_buffer();
1571 
1572     // Prime the bit buffer.
1573     m_bits_left = 16;
1574     m_bit_buf = 0;
1575 
1576     get_bits(16);
1577     get_bits(16);
1578 
1579     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1580       m_mcu_block_max_zag.ptr[i] = 64;
1581   }
1582 
1583   enum SCALEBITS = 16;
1584   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1585   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1586 
1587   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1588   void create_look_ups () {
1589     for (int i = 0; i <= 255; i++)
1590     {
1591       int k = i - 128;
1592       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1593       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1594       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1595       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1596     }
1597   }
1598 
1599   // This method throws back into the stream any bytes that where read
1600   // into the bit buffer during initial marker scanning.
1601   void fix_in_buffer () {
1602     // In case any 0xFF's where pulled into the buffer during marker scanning.
1603     assert((m_bits_left & 7) == 0);
1604 
1605     if (m_bits_left == 16)
1606       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1607 
1608     if (m_bits_left >= 8)
1609       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1610 
1611     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1612     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1613 
1614     m_bits_left = 16;
1615     get_bits_no_markers(16);
1616     get_bits_no_markers(16);
1617   }
1618 
1619   void transform_mcu (int mcu_row) {
1620     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1621     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1622 
1623     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1624     {
1625       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1626       pSrc_ptr += 64;
1627       pDst_ptr += 64;
1628     }
1629   }
1630 
1631   static immutable ubyte[64] s_max_rc = [
1632     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1633     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1634     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1635     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1636   ];
1637 
1638   void transform_mcu_expand (int mcu_row) {
1639     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1640     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1641 
1642     // Y IDCT
1643     int mcu_block;
1644     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1645     {
1646       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1647       pSrc_ptr += 64;
1648       pDst_ptr += 64;
1649     }
1650 
1651     // Chroma IDCT, with upsampling
1652     jpgd_block_t[64] temp_block;
1653 
1654     for (int i = 0; i < 2; i++)
1655     {
1656       DCT_Upsample.Matrix44 P, Q, R, S;
1657 
1658       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1659       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1660 
1661       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1662       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1663       switch (s_max_rc.ptr[max_zag])
1664       {
1665       case 1*16+1:
1666         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1667         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1668         break;
1669       case 1*16+2:
1670         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1671         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1672         break;
1673       case 2*16+2:
1674         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1675         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1676         break;
1677       case 3*16+2:
1678         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1679         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1680         break;
1681       case 3*16+3:
1682         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1683         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1684         break;
1685       case 3*16+4:
1686         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1687         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1688         break;
1689       case 4*16+4:
1690         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1691         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1692         break;
1693       case 5*16+4:
1694         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1695         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1696         break;
1697       case 5*16+5:
1698         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1699         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1700         break;
1701       case 5*16+6:
1702         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1703         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1704         break;
1705       case 6*16+6:
1706         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1707         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1708         break;
1709       case 7*16+6:
1710         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1711         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1712         break;
1713       case 7*16+7:
1714         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1715         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1716         break;
1717       case 7*16+8:
1718         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1719         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1720         break;
1721       case 8*16+8:
1722         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1723         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1724         break;
1725       default:
1726         assert(false);
1727       }
1728 
1729       auto a = DCT_Upsample.Matrix44(P + Q);
1730       P -= Q;
1731       DCT_Upsample.Matrix44* b = &P;
1732       auto c = DCT_Upsample.Matrix44(R + S);
1733       R -= S;
1734       DCT_Upsample.Matrix44* d = &R;
1735 
1736       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1737       idct_4x4(temp_block.ptr, pDst_ptr);
1738       pDst_ptr += 64;
1739 
1740       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1741       idct_4x4(temp_block.ptr, pDst_ptr);
1742       pDst_ptr += 64;
1743 
1744       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1745       idct_4x4(temp_block.ptr, pDst_ptr);
1746       pDst_ptr += 64;
1747 
1748       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1749       idct_4x4(temp_block.ptr, pDst_ptr);
1750       pDst_ptr += 64;
1751 
1752       pSrc_ptr += 64;
1753     }
1754   }
1755 
1756   // Loads and dequantizes the next row of (already decoded) coefficients.
1757   // Progressive images only.
1758   void load_next_row () {
1759     int i;
1760     jpgd_block_t *p;
1761     jpgd_quant_t *q;
1762     int mcu_row, mcu_block, row_block = 0;
1763     int component_num, component_id;
1764     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1765 
1766     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1767 
1768     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1769     {
1770       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1771 
1772       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1773       {
1774         component_id = m_mcu_org.ptr[mcu_block];
1775         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1776 
1777         p = m_pMCU_coefficients + 64 * mcu_block;
1778 
1779         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1780         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1781         p[0] = pDC[0];
1782         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1783 
1784         for (i = 63; i > 0; i--)
1785           if (p[g_ZAG[i]])
1786             break;
1787 
1788         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1789 
1790         for ( ; i >= 0; i--)
1791           if (p[g_ZAG[i]])
1792             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1793 
1794         row_block++;
1795 
1796         if (m_comps_in_scan == 1)
1797           block_x_mcu.ptr[component_id]++;
1798         else
1799         {
1800           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1801           {
1802             block_x_mcu_ofs = 0;
1803 
1804             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1805             {
1806               block_y_mcu_ofs = 0;
1807 
1808               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1809             }
1810           }
1811         }
1812       }
1813 
1814       if (m_freq_domain_chroma_upsample)
1815         transform_mcu_expand(mcu_row);
1816       else
1817         transform_mcu(mcu_row);
1818     }
1819 
1820     if (m_comps_in_scan == 1)
1821       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1822     else
1823     {
1824       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1825       {
1826         component_id = m_comp_list.ptr[component_num];
1827 
1828         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1829       }
1830     }
1831   }
1832 
1833   // Restart interval processing.
1834   void process_restart () {
1835     int i;
1836     int c = 0;
1837 
1838     // Align to a byte boundry
1839     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1840     //get_bits_no_markers(m_bits_left & 7);
1841 
1842     // Let's scan a little bit to find the marker, but not _too_ far.
1843     // 1536 is a "fudge factor" that determines how much to scan.
1844     for (i = 1536; i > 0; i--)
1845       if (get_char() == 0xFF)
1846         break;
1847 
1848     if (i == 0)
1849       stop_decoding(JPGD_BAD_RESTART_MARKER);
1850 
1851     for ( ; i > 0; i--)
1852       if ((c = get_char()) != 0xFF)
1853         break;
1854 
1855     if (i == 0)
1856       stop_decoding(JPGD_BAD_RESTART_MARKER);
1857 
1858     // Is it the expected marker? If not, something bad happened.
1859     if (c != (m_next_restart_num + M_RST0))
1860       stop_decoding(JPGD_BAD_RESTART_MARKER);
1861 
1862     // Reset each component's DC prediction values.
1863     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1864 
1865     m_eob_run = 0;
1866 
1867     m_restarts_left = m_restart_interval;
1868 
1869     m_next_restart_num = (m_next_restart_num + 1) & 7;
1870 
1871     // Get the bit buffer going again...
1872 
1873     m_bits_left = 16;
1874     get_bits_no_markers(16);
1875     get_bits_no_markers(16);
1876   }
1877 
1878   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1879 
1880   // Decodes and dequantizes the next row of coefficients.
1881   void decode_next_row () {
1882     int row_block = 0;
1883 
1884     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1885     {
1886       if ((m_restart_interval) && (m_restarts_left == 0))
1887         process_restart();
1888 
1889       jpgd_block_t* p = m_pMCU_coefficients;
1890       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1891       {
1892         int component_id = m_mcu_org.ptr[mcu_block];
1893         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1894 
1895         int r, s;
1896         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1897         s = JPGD_HUFF_EXTEND(r, s);
1898 
1899         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1900 
1901         p[0] = cast(jpgd_block_t)(s * q[0]);
1902 
1903         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1904 
1905         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1906 
1907         int k;
1908         for (k = 1; k < 64; k++)
1909         {
1910           int extra_bits;
1911           s = huff_decode(pH, extra_bits);
1912 
1913           r = s >> 4;
1914           s &= 15;
1915 
1916           if (s)
1917           {
1918             if (r)
1919             {
1920               if ((k + r) > 63)
1921                 stop_decoding(JPGD_DECODE_ERROR);
1922 
1923               if (k < prev_num_set)
1924               {
1925                 int n = JPGD_MIN(r, prev_num_set - k);
1926                 int kt = k;
1927                 while (n--)
1928                   p[g_ZAG[kt++]] = 0;
1929               }
1930 
1931               k += r;
1932             }
1933 
1934             s = JPGD_HUFF_EXTEND(extra_bits, s);
1935 
1936             assert(k < 64);
1937 
1938             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1939           }
1940           else
1941           {
1942             if (r == 15)
1943             {
1944               if ((k + 16) > 64)
1945                 stop_decoding(JPGD_DECODE_ERROR);
1946 
1947               if (k < prev_num_set)
1948               {
1949                 int n = JPGD_MIN(16, prev_num_set - k);
1950                 int kt = k;
1951                 while (n--)
1952                 {
1953                   assert(kt <= 63);
1954                   p[g_ZAG[kt++]] = 0;
1955                 }
1956               }
1957 
1958               k += 16 - 1; // - 1 because the loop counter is k
1959               assert(p[g_ZAG[k]] == 0);
1960             }
1961             else
1962               break;
1963           }
1964         }
1965 
1966         if (k < prev_num_set)
1967         {
1968           int kt = k;
1969           while (kt < prev_num_set)
1970             p[g_ZAG[kt++]] = 0;
1971         }
1972 
1973         m_mcu_block_max_zag.ptr[mcu_block] = k;
1974 
1975         row_block++;
1976       }
1977 
1978       if (m_freq_domain_chroma_upsample)
1979         transform_mcu_expand(mcu_row);
1980       else
1981         transform_mcu(mcu_row);
1982 
1983       m_restarts_left--;
1984     }
1985   }
1986 
1987   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1988   void H1V1Convert () {
1989     int row = m_max_mcu_y_size - m_mcu_lines_left;
1990     ubyte *d = m_pScan_line_0;
1991     ubyte *s = m_pSample_buf + row * 8;
1992 
1993     for (int i = m_max_mcus_per_row; i > 0; i--)
1994     {
1995       for (int j = 0; j < 8; j++)
1996       {
1997         int y = s[j];
1998         int cb = s[64+j];
1999         int cr = s[128+j];
2000 
2001         d[0] = clamp(y + m_crr.ptr[cr]);
2002         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2003         d[2] = clamp(y + m_cbb.ptr[cb]);
2004         d[3] = 255;
2005 
2006         d += 4;
2007       }
2008 
2009       s += 64*3;
2010     }
2011   }
2012 
2013   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2014   void H2V1Convert () {
2015     int row = m_max_mcu_y_size - m_mcu_lines_left;
2016     ubyte *d0 = m_pScan_line_0;
2017     ubyte *y = m_pSample_buf + row * 8;
2018     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2019 
2020     for (int i = m_max_mcus_per_row; i > 0; i--)
2021     {
2022       for (int l = 0; l < 2; l++)
2023       {
2024         for (int j = 0; j < 4; j++)
2025         {
2026           int cb = c[0];
2027           int cr = c[64];
2028 
2029           int rc = m_crr.ptr[cr];
2030           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2031           int bc = m_cbb.ptr[cb];
2032 
2033           int yy = y[j<<1];
2034           d0[0] = clamp(yy+rc);
2035           d0[1] = clamp(yy+gc);
2036           d0[2] = clamp(yy+bc);
2037           d0[3] = 255;
2038 
2039           yy = y[(j<<1)+1];
2040           d0[4] = clamp(yy+rc);
2041           d0[5] = clamp(yy+gc);
2042           d0[6] = clamp(yy+bc);
2043           d0[7] = 255;
2044 
2045           d0 += 8;
2046 
2047           c++;
2048         }
2049         y += 64;
2050       }
2051 
2052       y += 64*4 - 64*2;
2053       c += 64*4 - 8;
2054     }
2055   }
2056 
2057   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2058   void H1V2Convert () {
2059     int row = m_max_mcu_y_size - m_mcu_lines_left;
2060     ubyte *d0 = m_pScan_line_0;
2061     ubyte *d1 = m_pScan_line_1;
2062     ubyte *y;
2063     ubyte *c;
2064 
2065     if (row < 8)
2066       y = m_pSample_buf + row * 8;
2067     else
2068       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2069 
2070     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2071 
2072     for (int i = m_max_mcus_per_row; i > 0; i--)
2073     {
2074       for (int j = 0; j < 8; j++)
2075       {
2076         int cb = c[0+j];
2077         int cr = c[64+j];
2078 
2079         int rc = m_crr.ptr[cr];
2080         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2081         int bc = m_cbb.ptr[cb];
2082 
2083         int yy = y[j];
2084         d0[0] = clamp(yy+rc);
2085         d0[1] = clamp(yy+gc);
2086         d0[2] = clamp(yy+bc);
2087         d0[3] = 255;
2088 
2089         yy = y[8+j];
2090         d1[0] = clamp(yy+rc);
2091         d1[1] = clamp(yy+gc);
2092         d1[2] = clamp(yy+bc);
2093         d1[3] = 255;
2094 
2095         d0 += 4;
2096         d1 += 4;
2097       }
2098 
2099       y += 64*4;
2100       c += 64*4;
2101     }
2102   }
2103 
2104   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2105   void H2V2Convert () {
2106     int row = m_max_mcu_y_size - m_mcu_lines_left;
2107     ubyte *d0 = m_pScan_line_0;
2108     ubyte *d1 = m_pScan_line_1;
2109     ubyte *y;
2110     ubyte *c;
2111 
2112     if (row < 8)
2113       y = m_pSample_buf + row * 8;
2114     else
2115       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2116 
2117     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2118 
2119     for (int i = m_max_mcus_per_row; i > 0; i--)
2120     {
2121       for (int l = 0; l < 2; l++)
2122       {
2123         for (int j = 0; j < 8; j += 2)
2124         {
2125           int cb = c[0];
2126           int cr = c[64];
2127 
2128           int rc = m_crr.ptr[cr];
2129           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2130           int bc = m_cbb.ptr[cb];
2131 
2132           int yy = y[j];
2133           d0[0] = clamp(yy+rc);
2134           d0[1] = clamp(yy+gc);
2135           d0[2] = clamp(yy+bc);
2136           d0[3] = 255;
2137 
2138           yy = y[j+1];
2139           d0[4] = clamp(yy+rc);
2140           d0[5] = clamp(yy+gc);
2141           d0[6] = clamp(yy+bc);
2142           d0[7] = 255;
2143 
2144           yy = y[j+8];
2145           d1[0] = clamp(yy+rc);
2146           d1[1] = clamp(yy+gc);
2147           d1[2] = clamp(yy+bc);
2148           d1[3] = 255;
2149 
2150           yy = y[j+8+1];
2151           d1[4] = clamp(yy+rc);
2152           d1[5] = clamp(yy+gc);
2153           d1[6] = clamp(yy+bc);
2154           d1[7] = 255;
2155 
2156           d0 += 8;
2157           d1 += 8;
2158 
2159           c++;
2160         }
2161         y += 64;
2162       }
2163 
2164       y += 64*6 - 64*2;
2165       c += 64*6 - 8;
2166     }
2167   }
2168 
2169   // Y (1 block per MCU) to 8-bit grayscale
2170   void gray_convert () {
2171     int row = m_max_mcu_y_size - m_mcu_lines_left;
2172     ubyte *d = m_pScan_line_0;
2173     ubyte *s = m_pSample_buf + row * 8;
2174 
2175     for (int i = m_max_mcus_per_row; i > 0; i--)
2176     {
2177       *cast(uint*)d = *cast(uint*)s;
2178       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2179 
2180       s += 64;
2181       d += 8;
2182     }
2183   }
2184 
2185   void expanded_convert () {
2186     int row = m_max_mcu_y_size - m_mcu_lines_left;
2187 
2188     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2189 
2190     ubyte* d = m_pScan_line_0;
2191 
2192     for (int i = m_max_mcus_per_row; i > 0; i--)
2193     {
2194       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2195       {
2196         immutable int Y_ofs = k * 8;
2197         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2198         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2199         for (int j = 0; j < 8; j++)
2200         {
2201           int y = Py[Y_ofs + j];
2202           int cb = Py[Cb_ofs + j];
2203           int cr = Py[Cr_ofs + j];
2204 
2205           d[0] = clamp(y + m_crr.ptr[cr]);
2206           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2207           d[2] = clamp(y + m_cbb.ptr[cb]);
2208           d[3] = 255;
2209 
2210           d += 4;
2211         }
2212       }
2213 
2214       Py += 64 * m_expanded_blocks_per_mcu;
2215     }
2216   }
2217 
2218   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2219   void find_eoi () {
2220     if (!m_progressive_flag)
2221     {
2222       // Attempt to read the EOI marker.
2223       //get_bits_no_markers(m_bits_left & 7);
2224 
2225       // Prime the bit buffer
2226       m_bits_left = 16;
2227       get_bits(16);
2228       get_bits(16);
2229 
2230       // The next marker _should_ be EOI
2231       process_markers(true); // but restarts are allowed as we can harmlessly skip them at the end of the stream
2232     }
2233 
2234     m_total_bytes_read -= m_in_buf_left;
2235   }
2236 
2237   // Creates the tables needed for efficient Huffman decoding.
2238   void make_huff_table (int index, huff_tables *pH) {
2239     int p, i, l, si;
2240     ubyte[257] huffsize;
2241     uint[257] huffcode;
2242     uint code;
2243     uint subtree;
2244     int code_size;
2245     int lastp;
2246     int nextfreeentry;
2247     int currententry;
2248 
2249     pH.ac_table = m_huff_ac.ptr[index] != 0;
2250 
2251     p = 0;
2252 
2253     for (l = 1; l <= 16; l++)
2254     {
2255       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2256         huffsize.ptr[p++] = cast(ubyte)(l);
2257     }
2258 
2259     huffsize.ptr[p] = 0;
2260 
2261     lastp = p;
2262 
2263     code = 0;
2264     si = huffsize.ptr[0];
2265     p = 0;
2266 
2267     while (huffsize.ptr[p])
2268     {
2269       while (huffsize.ptr[p] == si)
2270       {
2271         huffcode.ptr[p++] = code;
2272         code++;
2273       }
2274 
2275       code <<= 1;
2276       si++;
2277     }
2278 
2279     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2280     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2281     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2282     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2283 
2284     nextfreeentry = -1;
2285 
2286     p = 0;
2287 
2288     while (p < lastp)
2289     {
2290       i = m_huff_val.ptr[index][p];
2291       code = huffcode.ptr[p];
2292       code_size = huffsize.ptr[p];
2293 
2294       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2295 
2296       if (code_size <= 8)
2297       {
2298         code <<= (8 - code_size);
2299 
2300         for (l = 1 << (8 - code_size); l > 0; l--)
2301         {
2302           assert(i < 256);
2303 
2304           pH.look_up.ptr[code] = i;
2305 
2306           bool has_extrabits = false;
2307           int extra_bits = 0;
2308           int num_extra_bits = i & 15;
2309 
2310           int bits_to_fetch = code_size;
2311           if (num_extra_bits)
2312           {
2313             int total_codesize = code_size + num_extra_bits;
2314             if (total_codesize <= 8)
2315             {
2316               has_extrabits = true;
2317               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2318               assert(extra_bits <= 0x7FFF);
2319               bits_to_fetch += num_extra_bits;
2320             }
2321           }
2322 
2323           if (!has_extrabits)
2324             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2325           else
2326             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2327 
2328           code++;
2329         }
2330       }
2331       else
2332       {
2333         subtree = (code >> (code_size - 8)) & 0xFF;
2334 
2335         currententry = pH.look_up.ptr[subtree];
2336 
2337         if (currententry == 0)
2338         {
2339           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2340           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2341 
2342           nextfreeentry -= 2;
2343         }
2344 
2345         code <<= (16 - (code_size - 8));
2346 
2347         for (l = code_size; l > 9; l--)
2348         {
2349           if ((code & 0x8000) == 0)
2350             currententry--;
2351 
2352           if (pH.tree.ptr[-currententry - 1] == 0)
2353           {
2354             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2355 
2356             currententry = nextfreeentry;
2357 
2358             nextfreeentry -= 2;
2359           }
2360           else
2361             currententry = pH.tree.ptr[-currententry - 1];
2362 
2363           code <<= 1;
2364         }
2365 
2366         if ((code & 0x8000) == 0)
2367           currententry--;
2368 
2369         pH.tree.ptr[-currententry - 1] = i;
2370       }
2371 
2372       p++;
2373     }
2374   }
2375 
2376   // Verifies the quantization tables needed for this scan are available.
2377   void check_quant_tables () {
2378     for (int i = 0; i < m_comps_in_scan; i++)
2379       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2380         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2381   }
2382 
2383   // Verifies that all the Huffman tables needed for this scan are available.
2384   void check_huff_tables () {
2385     for (int i = 0; i < m_comps_in_scan; i++)
2386     {
2387       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2388         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2389 
2390       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2391         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2392     }
2393 
2394     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2395       if (m_huff_num.ptr[i])
2396       {
2397         if (!m_pHuff_tabs.ptr[i])
2398           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2399 
2400         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2401       }
2402   }
2403 
2404   // Determines the component order inside each MCU.
2405   // Also calcs how many MCU's are on each row, etc.
2406   void calc_mcu_block_order () {
2407     int component_num, component_id;
2408     int max_h_samp = 0, max_v_samp = 0;
2409 
2410     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2411     {
2412       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2413         max_h_samp = m_comp_h_samp.ptr[component_id];
2414 
2415       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2416         max_v_samp = m_comp_v_samp.ptr[component_id];
2417     }
2418 
2419     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2420     {
2421       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2422       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2423     }
2424 
2425     if (m_comps_in_scan == 1)
2426     {
2427       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2428       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2429     }
2430     else
2431     {
2432       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2433       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2434     }
2435 
2436     if (m_comps_in_scan == 1)
2437     {
2438       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2439 
2440       m_blocks_per_mcu = 1;
2441     }
2442     else
2443     {
2444       m_blocks_per_mcu = 0;
2445 
2446       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2447       {
2448         int num_blocks;
2449 
2450         component_id = m_comp_list.ptr[component_num];
2451 
2452         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2453 
2454         while (num_blocks--)
2455           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2456       }
2457     }
2458   }
2459 
2460   // Starts a new scan.
2461   int init_scan () {
2462     if (!locate_sos_marker())
2463       return false;
2464 
2465     calc_mcu_block_order();
2466 
2467     check_huff_tables();
2468 
2469     check_quant_tables();
2470 
2471     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2472 
2473     m_eob_run = 0;
2474 
2475     if (m_restart_interval)
2476     {
2477       m_restarts_left = m_restart_interval;
2478       m_next_restart_num = 0;
2479     }
2480 
2481     fix_in_buffer();
2482 
2483     return true;
2484   }
2485 
2486   // Starts a frame. Determines if the number of components or sampling factors
2487   // are supported.
2488   void init_frame () {
2489     int i;
2490 
2491     if (m_comps_in_frame == 1)
2492     {
2493       version(jpegd_test) {{ import std.stdio; stderr.writeln("m_comp_h_samp=", m_comp_h_samp.ptr[0], "; m_comp_v_samp=", m_comp_v_samp.ptr[0]); }}
2494 
2495       //if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2496       //  stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2497 
2498       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2499       {
2500         m_scan_type = JPGD_GRAYSCALE;
2501         m_max_blocks_per_mcu = 1;
2502         m_max_mcu_x_size = 8;
2503         m_max_mcu_y_size = 8;
2504       }
2505       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2506       {
2507         //k8: i added this, and i absolutely don't know what it means; but it decoded two sample images i found
2508         m_scan_type = JPGD_GRAYSCALE;
2509         m_max_blocks_per_mcu = 4;
2510         m_max_mcu_x_size = 8;
2511         m_max_mcu_y_size = 8;
2512       }
2513       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2514       {
2515       	// adr added this. idk if it is right seems wrong since it the same as above but..... meh ship it.
2516         m_scan_type = JPGD_GRAYSCALE;
2517         m_max_blocks_per_mcu = 4;
2518         m_max_mcu_x_size = 8;
2519         m_max_mcu_y_size = 8;
2520       }
2521       else {
2522       // code -231 brings us here
2523       //import std.conv;
2524       //assert(0, to!string(m_comp_h_samp) ~ to!string(m_comp_v_samp));
2525         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2526       }
2527     }
2528     else if (m_comps_in_frame == 3)
2529     {
2530       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2531            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2532         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2533 
2534       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2535       {
2536         m_scan_type = JPGD_YH1V1;
2537 
2538         m_max_blocks_per_mcu = 3;
2539         m_max_mcu_x_size = 8;
2540         m_max_mcu_y_size = 8;
2541       }
2542       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2543       {
2544         m_scan_type = JPGD_YH2V1;
2545         m_max_blocks_per_mcu = 4;
2546         m_max_mcu_x_size = 16;
2547         m_max_mcu_y_size = 8;
2548       }
2549       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2550       {
2551         m_scan_type = JPGD_YH1V2;
2552         m_max_blocks_per_mcu = 4;
2553         m_max_mcu_x_size = 8;
2554         m_max_mcu_y_size = 16;
2555       }
2556       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2557       {
2558         m_scan_type = JPGD_YH2V2;
2559         m_max_blocks_per_mcu = 6;
2560         m_max_mcu_x_size = 16;
2561         m_max_mcu_y_size = 16;
2562       }
2563       else
2564         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2565     }
2566     else
2567       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2568 
2569     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2570     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2571 
2572     // These values are for the *destination* pixels: after conversion.
2573     if (m_scan_type == JPGD_GRAYSCALE)
2574       m_dest_bytes_per_pixel = 1;
2575     else
2576       m_dest_bytes_per_pixel = 4;
2577 
2578     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2579 
2580     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2581 
2582     // Initialize two scan line buffers.
2583     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2584     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2585       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2586 
2587     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2588 
2589     // Should never happen
2590     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2591       stop_decoding(JPGD_ASSERTION_ERROR);
2592 
2593     // Allocate the coefficient buffer, enough for one MCU
2594     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2595 
2596     for (i = 0; i < m_max_blocks_per_mcu; i++)
2597       m_mcu_block_max_zag.ptr[i] = 64;
2598 
2599     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2600     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2601     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2602     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2603     m_freq_domain_chroma_upsample = false;
2604     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2605       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2606     }
2607 
2608     if (m_freq_domain_chroma_upsample)
2609       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2610     else
2611       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2612 
2613     m_total_lines_left = m_image_y_size;
2614 
2615     m_mcu_lines_left = 0;
2616 
2617     create_look_ups();
2618   }
2619 
2620   // The coeff_buf series of methods originally stored the coefficients
2621   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2622   // was used to make this process more efficient. Now, we can store the entire
2623   // thing in RAM.
2624   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2625     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2626 
2627     cb.block_num_x = block_num_x;
2628     cb.block_num_y = block_num_y;
2629     cb.block_len_x = block_len_x;
2630     cb.block_len_y = block_len_y;
2631     cb.block_size = cast(int)((block_len_x * block_len_y) * jpgd_block_t.sizeof);
2632     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2633     return cb;
2634   }
2635 
2636   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2637     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2638     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2639   }
2640 
2641   // The following methods decode the various types of m_blocks encountered
2642   // in progressively encoded images.
2643   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2644     int s, r;
2645     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2646 
2647     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2648     {
2649       r = pD.get_bits_no_markers(s);
2650       s = JPGD_HUFF_EXTEND(r, s);
2651     }
2652 
2653     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2654 
2655     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2656   }
2657 
2658   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2659     if (pD.get_bits_no_markers(1))
2660     {
2661       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2662 
2663       p[0] |= (1 << pD.m_successive_low);
2664     }
2665   }
2666 
2667   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2668     int k, s, r;
2669 
2670     if (pD.m_eob_run)
2671     {
2672       pD.m_eob_run--;
2673       return;
2674     }
2675 
2676     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2677 
2678     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2679     {
2680       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2681 
2682       r = s >> 4;
2683       s &= 15;
2684 
2685       if (s)
2686       {
2687         if ((k += r) > 63)
2688           pD.stop_decoding(JPGD_DECODE_ERROR);
2689 
2690         r = pD.get_bits_no_markers(s);
2691         s = JPGD_HUFF_EXTEND(r, s);
2692 
2693         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2694       }
2695       else
2696       {
2697         if (r == 15)
2698         {
2699           if ((k += 15) > 63)
2700             pD.stop_decoding(JPGD_DECODE_ERROR);
2701         }
2702         else
2703         {
2704           pD.m_eob_run = 1 << r;
2705 
2706           if (r)
2707             pD.m_eob_run += pD.get_bits_no_markers(r);
2708 
2709           pD.m_eob_run--;
2710 
2711           break;
2712         }
2713       }
2714     }
2715   }
2716 
2717   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2718     int s, k, r;
2719     int p1 = 1 << pD.m_successive_low;
2720     int m1 = (-1) << pD.m_successive_low;
2721     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2722 
2723     assert(pD.m_spectral_end <= 63);
2724 
2725     k = pD.m_spectral_start;
2726 
2727     if (pD.m_eob_run == 0)
2728     {
2729       for ( ; k <= pD.m_spectral_end; k++)
2730       {
2731         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2732 
2733         r = s >> 4;
2734         s &= 15;
2735 
2736         if (s)
2737         {
2738           if (s != 1)
2739             pD.stop_decoding(JPGD_DECODE_ERROR);
2740 
2741           if (pD.get_bits_no_markers(1))
2742             s = p1;
2743           else
2744             s = m1;
2745         }
2746         else
2747         {
2748           if (r != 15)
2749           {
2750             pD.m_eob_run = 1 << r;
2751 
2752             if (r)
2753               pD.m_eob_run += pD.get_bits_no_markers(r);
2754 
2755             break;
2756           }
2757         }
2758 
2759         do
2760         {
2761           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2762 
2763           if (*this_coef != 0)
2764           {
2765             if (pD.get_bits_no_markers(1))
2766             {
2767               if ((*this_coef & p1) == 0)
2768               {
2769                 if (*this_coef >= 0)
2770                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2771                 else
2772                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2773               }
2774             }
2775           }
2776           else
2777           {
2778             if (--r < 0)
2779               break;
2780           }
2781 
2782           k++;
2783 
2784         } while (k <= pD.m_spectral_end);
2785 
2786         if ((s) && (k < 64))
2787         {
2788           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2789         }
2790       }
2791     }
2792 
2793     if (pD.m_eob_run > 0)
2794     {
2795       for ( ; k <= pD.m_spectral_end; k++)
2796       {
2797         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2798 
2799         if (*this_coef != 0)
2800         {
2801           if (pD.get_bits_no_markers(1))
2802           {
2803             if ((*this_coef & p1) == 0)
2804             {
2805               if (*this_coef >= 0)
2806                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2807               else
2808                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2809             }
2810           }
2811         }
2812       }
2813 
2814       pD.m_eob_run--;
2815     }
2816   }
2817 
2818   // Decode a scan in a progressively encoded image.
2819   void decode_scan (pDecode_block_func decode_block_func) {
2820     int mcu_row, mcu_col, mcu_block;
2821     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2822     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2823 
2824     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2825 
2826     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2827     {
2828       int component_num, component_id;
2829 
2830       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2831 
2832       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2833       {
2834         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2835 
2836         if ((m_restart_interval) && (m_restarts_left == 0))
2837           process_restart();
2838 
2839         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2840         {
2841           component_id = m_mcu_org.ptr[mcu_block];
2842 
2843           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2844 
2845           if (m_comps_in_scan == 1)
2846             block_x_mcu.ptr[component_id]++;
2847           else
2848           {
2849             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2850             {
2851               block_x_mcu_ofs = 0;
2852 
2853               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2854               {
2855                 block_y_mcu_ofs = 0;
2856                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2857               }
2858             }
2859           }
2860         }
2861 
2862         m_restarts_left--;
2863       }
2864 
2865       if (m_comps_in_scan == 1)
2866         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2867       else
2868       {
2869         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2870         {
2871           component_id = m_comp_list.ptr[component_num];
2872           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2873         }
2874       }
2875     }
2876   }
2877 
2878   // Decode a progressively encoded image.
2879   void init_progressive () {
2880     int i;
2881 
2882     if (m_comps_in_frame == 4)
2883       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2884 
2885     // Allocate the coefficient buffers.
2886     for (i = 0; i < m_comps_in_frame; i++)
2887     {
2888       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2889       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2890     }
2891 
2892     for ( ; ; )
2893     {
2894       int dc_only_scan, refinement_scan;
2895       pDecode_block_func decode_block_func;
2896 
2897       if (!init_scan())
2898         break;
2899 
2900       dc_only_scan = (m_spectral_start == 0);
2901       refinement_scan = (m_successive_high != 0);
2902 
2903       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2904         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2905 
2906       if (dc_only_scan)
2907       {
2908         if (m_spectral_end)
2909           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2910       }
2911       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2912         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2913 
2914       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2915         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2916 
2917       if (dc_only_scan)
2918       {
2919         if (refinement_scan)
2920           decode_block_func = &decode_block_dc_refine;
2921         else
2922           decode_block_func = &decode_block_dc_first;
2923       }
2924       else
2925       {
2926         if (refinement_scan)
2927           decode_block_func = &decode_block_ac_refine;
2928         else
2929           decode_block_func = &decode_block_ac_first;
2930       }
2931 
2932       decode_scan(decode_block_func);
2933 
2934       m_bits_left = 16;
2935       get_bits(16);
2936       get_bits(16);
2937     }
2938 
2939     m_comps_in_scan = m_comps_in_frame;
2940 
2941     for (i = 0; i < m_comps_in_frame; i++)
2942       m_comp_list.ptr[i] = i;
2943 
2944     calc_mcu_block_order();
2945   }
2946 
2947   void init_sequential () {
2948     if (!init_scan())
2949       stop_decoding(JPGD_UNEXPECTED_MARKER);
2950   }
2951 
2952   void decode_start () {
2953     init_frame();
2954 
2955     if (m_progressive_flag)
2956       init_progressive();
2957     else
2958       init_sequential();
2959   }
2960 
2961   void decode_init (JpegStreamReadFunc rfn) {
2962     initit(rfn);
2963     locate_sof_marker();
2964   }
2965 }
2966 
2967 
2968 // ////////////////////////////////////////////////////////////////////////// //
2969 /// read JPEG image header, determine dimensions and number of components.
2970 /// return `false` if image is not JPEG (i hope).
2971 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2972   if (rfn is null) return false;
2973   auto decoder = jpeg_decoder(rfn);
2974   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2975   if (decoder.error_code != JPGD_SUCCESS) return false;
2976   width = decoder.width;
2977   height = decoder.height;
2978   actual_comps = decoder.num_components;
2979   return true;
2980 }
2981 
2982 
2983 // ////////////////////////////////////////////////////////////////////////// //
2984 /// read JPEG image header, determine dimensions and number of components.
2985 /// return `false` if image is not JPEG (i hope).
2986 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
2987   import core.stdc.stdio;
2988 
2989   FILE* m_pFile;
2990   bool m_eof_flag, m_error_flag;
2991 
2992   if (filename.length == 0) throw new Exception("cannot open unnamed file");
2993   if (filename.length < 512) {
2994     char[513] buffer;
2995     //import core.stdc.stdlib : alloca;
2996     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
2997     tfn[0..filename.length] = filename[];
2998     tfn[filename.length] = 0;
2999     m_pFile = fopen(tfn.ptr, "rb");
3000   } else {
3001     import core.stdc.stdlib : malloc, free;
3002     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3003     if (tfn !is null) {
3004       scope(exit) free(tfn.ptr);
3005       m_pFile = fopen(tfn.ptr, "rb");
3006     }
3007   }
3008   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3009   scope(exit) if (m_pFile) fclose(m_pFile);
3010 
3011   return detect_jpeg_image_from_stream(
3012     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3013       if (m_pFile is null) return -1;
3014       if (m_eof_flag) {
3015         *pEOF_flag = true;
3016         return 0;
3017       }
3018       if (m_error_flag) return -1;
3019       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3020       if (bytes_read < max_bytes_to_read) {
3021         if (ferror(m_pFile)) {
3022           m_error_flag = true;
3023           return -1;
3024         }
3025         m_eof_flag = true;
3026         *pEOF_flag = true;
3027       }
3028       return bytes_read;
3029     },
3030     width, height, actual_comps);
3031 }
3032 
3033 
3034 // ////////////////////////////////////////////////////////////////////////// //
3035 /// read JPEG image header, determine dimensions and number of components.
3036 /// return `false` if image is not JPEG (i hope).
3037 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3038   size_t bufpos;
3039   return detect_jpeg_image_from_stream(
3040     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3041       import core.stdc.string : memcpy;
3042       if (bufpos >= buf.length) {
3043         *pEOF_flag = true;
3044         return 0;
3045       }
3046       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3047       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3048       bufpos += max_bytes_to_read;
3049       return max_bytes_to_read;
3050     },
3051     width, height, actual_comps);
3052 }
3053 
3054 
3055 // ////////////////////////////////////////////////////////////////////////// //
3056 /// decompress JPEG image, what else?
3057 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3058 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3059   import core.stdc.string : memcpy;
3060 
3061   //actual_comps = 0;
3062   if (rfn is null) return null;
3063   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3064 
3065   auto decoder = jpeg_decoder(rfn);
3066   if (decoder.error_code != JPGD_SUCCESS) return null;
3067   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3068 
3069   immutable int image_width = decoder.width;
3070   immutable int image_height = decoder.height;
3071   width = image_width;
3072   height = image_height;
3073   actual_comps = decoder.num_components;
3074   if (req_comps < 0) req_comps = decoder.num_components;
3075 
3076   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3077 
3078   immutable int dst_bpl = image_width*req_comps;
3079 
3080   static if (useMalloc) {
3081     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3082     if (pImage_data is null) return null;
3083     auto idata = pImage_data[0..dst_bpl*image_height];
3084   } else {
3085     auto idata = new ubyte[](dst_bpl*image_height);
3086     auto pImage_data = idata.ptr;
3087   }
3088 
3089   scope(failure) {
3090     static if (useMalloc) {
3091       jpgd_free(pImage_data);
3092     } else {
3093       import core.memory : GC;
3094       GC.free(idata.ptr);
3095       idata = null;
3096     }
3097   }
3098 
3099   for (int y = 0; y < image_height; ++y) {
3100     const(ubyte)* pScan_line;
3101     uint scan_line_len;
3102     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3103       static if (useMalloc) {
3104         jpgd_free(pImage_data);
3105       } else {
3106         import core.memory : GC;
3107         GC.free(idata.ptr);
3108         idata = null;
3109       }
3110       return null;
3111     }
3112 
3113     ubyte* pDst = pImage_data+y*dst_bpl;
3114 
3115     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3116       memcpy(pDst, pScan_line, dst_bpl);
3117     } else if (decoder.num_components == 1) {
3118       if (req_comps == 3) {
3119         for (int x = 0; x < image_width; ++x) {
3120           ubyte luma = pScan_line[x];
3121           pDst[0] = luma;
3122           pDst[1] = luma;
3123           pDst[2] = luma;
3124           pDst += 3;
3125         }
3126       } else {
3127         for (int x = 0; x < image_width; ++x) {
3128           ubyte luma = pScan_line[x];
3129           pDst[0] = luma;
3130           pDst[1] = luma;
3131           pDst[2] = luma;
3132           pDst[3] = 255;
3133           pDst += 4;
3134         }
3135       }
3136     } else if (decoder.num_components == 3) {
3137       if (req_comps == 1) {
3138         immutable int YR = 19595, YG = 38470, YB = 7471;
3139         for (int x = 0; x < image_width; ++x) {
3140           int r = pScan_line[x*4+0];
3141           int g = pScan_line[x*4+1];
3142           int b = pScan_line[x*4+2];
3143           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3144         }
3145       } else {
3146         for (int x = 0; x < image_width; ++x) {
3147           pDst[0] = pScan_line[x*4+0];
3148           pDst[1] = pScan_line[x*4+1];
3149           pDst[2] = pScan_line[x*4+2];
3150           pDst += 3;
3151         }
3152       }
3153     }
3154   }
3155 
3156   return idata;
3157 }
3158 
3159 
3160 // ////////////////////////////////////////////////////////////////////////// //
3161 /// decompress JPEG image from disk file.
3162 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3163 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3164   import core.stdc.stdio;
3165 
3166   FILE* m_pFile;
3167   bool m_eof_flag, m_error_flag;
3168 
3169   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3170   if (filename.length < 512) {
3171 	char[513] buffer;
3172     //import core.stdc.stdlib : alloca;
3173     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3174     tfn[0..filename.length] = filename[];
3175     tfn[filename.length] = 0;
3176     m_pFile = fopen(tfn.ptr, "rb");
3177   } else {
3178     import core.stdc.stdlib : malloc, free;
3179     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3180     if (tfn !is null) {
3181       scope(exit) free(tfn.ptr);
3182       m_pFile = fopen(tfn.ptr, "rb");
3183     }
3184   }
3185   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3186   scope(exit) if (m_pFile) fclose(m_pFile);
3187 
3188   return decompress_jpeg_image_from_stream!useMalloc(
3189     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3190       if (m_pFile is null) return -1;
3191       if (m_eof_flag) {
3192         *pEOF_flag = true;
3193         return 0;
3194       }
3195       if (m_error_flag) return -1;
3196       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3197       if (bytes_read < max_bytes_to_read) {
3198         if (ferror(m_pFile)) {
3199           m_error_flag = true;
3200           return -1;
3201         }
3202         m_eof_flag = true;
3203         *pEOF_flag = true;
3204       }
3205       return bytes_read;
3206     },
3207     width, height, actual_comps, req_comps);
3208 }
3209 
3210 
3211 // ////////////////////////////////////////////////////////////////////////// //
3212 /// decompress JPEG image from memory buffer.
3213 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3214 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3215   size_t bufpos;
3216   return decompress_jpeg_image_from_stream!useMalloc(
3217     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3218       import core.stdc.string : memcpy;
3219       if (bufpos >= buf.length) {
3220         *pEOF_flag = true;
3221         return 0;
3222       }
3223       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3224       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3225       bufpos += max_bytes_to_read;
3226       return max_bytes_to_read;
3227     },
3228     width, height, actual_comps, req_comps);
3229 }
3230 
3231 
3232 // ////////////////////////////////////////////////////////////////////////// //
3233 // if we have access "iv.vfs", add some handy API
3234 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3235 
3236 static if (JpegHasIVVFS) {
3237 import iv.vfs;
3238 
3239 // ////////////////////////////////////////////////////////////////////////// //
3240 /// decompress JPEG image from disk file.
3241 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3242 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3243   return decompress_jpeg_image_from_stream!useMalloc(
3244     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3245       if (!fl.isOpen) return -1;
3246       if (fl.eof) {
3247         *pEOF_flag = true;
3248         return 0;
3249       }
3250       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3251       if (fl.eof) *pEOF_flag = true;
3252       return cast(int)rd.length;
3253     },
3254     width, height, actual_comps, req_comps);
3255 }
3256 // vfs API
3257 }
3258 
3259 
3260 // ////////////////////////////////////////////////////////////////////////// //
3261 // if we have access "arsd.color", add some handy API
3262 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3263 
3264 
3265 
3266 public struct LastJpegError {
3267 	int stage;
3268 	int code;
3269 	int details;
3270 }
3271 
3272 public LastJpegError lastJpegError;
3273 
3274 
3275 static if (JpegHasArsd) {
3276 import arsd.color;
3277 
3278 // ////////////////////////////////////////////////////////////////////////// //
3279 /// decompress JPEG image, what else?
3280 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3281   import core.stdc.string : memcpy;
3282   enum req_comps = 4;
3283 
3284   if (rfn is null) return null;
3285 
3286   auto decoder = jpeg_decoder(rfn);
3287   if (decoder.error_code != JPGD_SUCCESS) { lastJpegError = LastJpegError(1, decoder.error_code); return null; }
3288   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3289 
3290   immutable int image_width = decoder.width;
3291   immutable int image_height = decoder.height;
3292   //width = image_width;
3293   //height = image_height;
3294   //actual_comps = decoder.num_components;
3295 
3296   version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("starting (%dx%d)...\n", image_width, image_height); }}
3297 
3298   auto err = decoder.begin_decoding();
3299   if (err != JPGD_SUCCESS || image_width < 1 || image_height < 1) {
3300 		lastJpegError = LastJpegError(2, err, decoder.m_error_code);
3301 		return null;
3302   }
3303 
3304   immutable int dst_bpl = image_width*req_comps;
3305   auto img = new TrueColorImage(image_width, image_height);
3306   scope(failure) { img.clearInternal(); img = null; }
3307   ubyte* pImage_data = img.imageData.bytes.ptr;
3308 
3309   for (int y = 0; y < image_height; ++y) {
3310     //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("loading line %d...\n", y); }}
3311 
3312     const(ubyte)* pScan_line;
3313     uint scan_line_len;
3314     err = decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len);
3315     if (err != JPGD_SUCCESS) {
3316       lastJpegError = LastJpegError(3, err);
3317       img.clearInternal();
3318       img = null;
3319       //jpgd_free(pImage_data);
3320       return null;
3321     }
3322 
3323     ubyte* pDst = pImage_data+y*dst_bpl;
3324 
3325     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3326       memcpy(pDst, pScan_line, dst_bpl);
3327     } else if (decoder.num_components == 1) {
3328       if (req_comps == 3) {
3329         for (int x = 0; x < image_width; ++x) {
3330           ubyte luma = pScan_line[x];
3331           pDst[0] = luma;
3332           pDst[1] = luma;
3333           pDst[2] = luma;
3334           pDst += 3;
3335         }
3336       } else {
3337         for (int x = 0; x < image_width; ++x) {
3338           ubyte luma = pScan_line[x];
3339           pDst[0] = luma;
3340           pDst[1] = luma;
3341           pDst[2] = luma;
3342           pDst[3] = 255;
3343           pDst += 4;
3344         }
3345       }
3346     } else if (decoder.num_components == 3) {
3347       if (req_comps == 1) {
3348         immutable int YR = 19595, YG = 38470, YB = 7471;
3349         for (int x = 0; x < image_width; ++x) {
3350           int r = pScan_line[x*4+0];
3351           int g = pScan_line[x*4+1];
3352           int b = pScan_line[x*4+2];
3353           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3354         }
3355       } else {
3356         for (int x = 0; x < image_width; ++x) {
3357           pDst[0] = pScan_line[x*4+0];
3358           pDst[1] = pScan_line[x*4+1];
3359           pDst[2] = pScan_line[x*4+2];
3360           pDst += 3;
3361         }
3362       }
3363     }
3364   }
3365 
3366   return img;
3367 }
3368 
3369 
3370 // ////////////////////////////////////////////////////////////////////////// //
3371 /// decompress JPEG image from disk file.
3372 /// Returns null if loading failed for any reason.
3373 public MemoryImage readJpeg (const(char)[] filename) {
3374   import core.stdc.stdio;
3375 
3376   FILE* m_pFile;
3377   bool m_eof_flag, m_error_flag;
3378 
3379   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3380   if (filename.length < 512) {
3381 	char[513] buffer;
3382     //import core.stdc.stdlib : alloca;
3383     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3384     tfn[0..filename.length] = filename[];
3385     tfn[filename.length] = 0;
3386     m_pFile = fopen(tfn.ptr, "rb");
3387   } else {
3388     import core.stdc.stdlib : malloc, free;
3389     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3390     if (tfn !is null) {
3391       scope(exit) free(tfn.ptr);
3392       m_pFile = fopen(tfn.ptr, "rb");
3393     }
3394   }
3395   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3396   scope(exit) if (m_pFile) fclose(m_pFile);
3397 
3398   return readJpegFromStream(
3399     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3400       if (m_pFile is null) return -1;
3401       if (m_eof_flag) {
3402         *pEOF_flag = true;
3403         return 0;
3404       }
3405       if (m_error_flag) return -1;
3406       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3407       if (bytes_read < max_bytes_to_read) {
3408         if (ferror(m_pFile)) {
3409           m_error_flag = true;
3410           return -1;
3411         }
3412         m_eof_flag = true;
3413         *pEOF_flag = true;
3414       }
3415       return bytes_read;
3416     }
3417   );
3418 }
3419 
3420 /++
3421 	History:
3422 		Added January 22, 2021 (release version 9.2)
3423 +/
3424 public void writeJpeg(const(char)[] filename, TrueColorImage img, JpegParams params = JpegParams.init) {
3425 	if(!compress_image_to_jpeg_file(filename, img.width, img.height, 4, img.imageData.bytes, params))
3426 		throw new Exception("jpeg write failed"); // FIXME: check errno?
3427 }
3428 
3429 /++
3430   	Encodes an image as jpeg in memory.
3431 
3432 	History:
3433 		Added January 22, 2021 (release version 9.2)
3434 +/
3435 public ubyte[] encodeJpeg(TrueColorImage img, JpegParams params = JpegParams.init) {
3436   	ubyte[] data;
3437 	encodeJpeg((const scope ubyte[] i) {
3438 		data ~= i;
3439 		return true;
3440 	}, img, params);
3441 
3442 	return data;
3443 }
3444 
3445 /// ditto
3446 public void encodeJpeg(scope bool delegate(const scope ubyte[]) dg, TrueColorImage img, JpegParams params = JpegParams.init) {
3447 	if(!compress_image_to_jpeg_stream(
3448 		dg,
3449 		img.width, img.height, 4, img.imageData.bytes, params))
3450 		throw new Exception("encode");
3451 }
3452 
3453 
3454 // ////////////////////////////////////////////////////////////////////////// //
3455 /// decompress JPEG image from memory buffer.
3456 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3457   size_t bufpos;
3458   return readJpegFromStream(
3459     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3460       import core.stdc.string : memcpy;
3461       if (bufpos >= buf.length) {
3462         *pEOF_flag = true;
3463         return 0;
3464       }
3465       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3466       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3467       bufpos += max_bytes_to_read;
3468       return max_bytes_to_read;
3469     }
3470   );
3471 }
3472 // done with arsd API
3473 }
3474 
3475 
3476 static if (JpegHasIVVFS) {
3477 public MemoryImage readJpeg (VFile fl) {
3478   return readJpegFromStream(
3479     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3480       if (!fl.isOpen) return -1;
3481       if (fl.eof) {
3482         *pEOF_flag = true;
3483         return 0;
3484       }
3485       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3486       if (fl.eof) *pEOF_flag = true;
3487       return cast(int)rd.length;
3488     }
3489   );
3490 }
3491 
3492 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3493   return detect_jpeg_image_from_stream(
3494     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3495       if (!fl.isOpen) return -1;
3496       if (fl.eof) {
3497         *pEOF_flag = true;
3498         return 0;
3499       }
3500       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3501       if (fl.eof) *pEOF_flag = true;
3502       return cast(int)rd.length;
3503     },
3504     width, height, actual_comps);
3505 }
3506 // vfs API
3507 }
3508 
3509 
3510 // ////////////////////////////////////////////////////////////////////////// //
3511 version(jpegd_test) {
3512 import arsd.color;
3513 import arsd.png;
3514 
3515 void main (string[] args) {
3516   import std.stdio;
3517   int width, height, comps;
3518   {
3519     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3520     writeln(width, "x", height, "x", comps);
3521     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3522     writeln(img.width, "x", img.height);
3523     writePng("z00.png", img);
3524   }
3525   {
3526     ubyte[] file;
3527     {
3528       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3529       file.length = cast(int)fl.size;
3530       fl.rawRead(file[]);
3531     }
3532     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3533     writeln(width, "x", height, "x", comps);
3534     auto img = readJpegFromMemory(file[]);
3535     writeln(img.width, "x", img.height);
3536     writePng("z01.png", img);
3537   }
3538 }
3539 }
3540 
3541 // jpge.cpp - C++ class for JPEG compression.
3542 // Public domain, Rich Geldreich <richgel99@gmail.com>
3543 // Alex Evans: Added RGBA support, linear memory allocator.
3544 // v1.01, Dec. 18, 2010 - Initial release
3545 // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
3546 // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
3547 //                        Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
3548 // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to null in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
3549 //                       Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
3550 //                       Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
3551 // D translation by Ketmar // Invisible Vector
3552 //
3553 // This is free and unencumbered software released into the public domain.
3554 //
3555 // Anyone is free to copy, modify, publish, use, compile, sell, or
3556 // distribute this software, either in source code form or as a compiled
3557 // binary, for any purpose, commercial or non-commercial, and by any
3558 // means.
3559 //
3560 // In jurisdictions that recognize copyright laws, the author or authors
3561 // of this software dedicate any and all copyright interest in the
3562 // software to the public domain. We make this dedication for the benefit
3563 // of the public at large and to the detriment of our heirs and
3564 // successors. We intend this dedication to be an overt act of
3565 // relinquishment in perpetuity of all present and future rights to this
3566 // software under copyright law.
3567 //
3568 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3569 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3570 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
3571 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
3572 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
3573 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
3574 // OTHER DEALINGS IN THE SOFTWARE.
3575 //
3576 // For more information, please refer to <http://unlicense.org/>
3577 /**
3578  * Writes a JPEG image to a file or stream.
3579  * num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3580  * note that alpha will not be stored in jpeg file.
3581  */
3582 
3583 public:
3584 // ////////////////////////////////////////////////////////////////////////// //
3585 // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
3586 enum JpegSubsampling { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }
3587 
3588 /// JPEG compression parameters structure.
3589 public struct JpegParams {
3590   /// Quality: 1-100, higher is better. Typical values are around 50-95.
3591   int quality = 85;
3592 
3593   /// subsampling:
3594   /// 0 = Y (grayscale) only
3595   /// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
3596   /// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
3597   /// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
3598   JpegSubsampling subsampling = JpegSubsampling.H2V2;
3599 
3600   /// Disables CbCr discrimination - only intended for testing.
3601   /// If true, the Y quantization table is also used for the CbCr channels.
3602   bool noChromaDiscrimFlag = false;
3603 
3604   ///
3605   bool twoPass = true;
3606 
3607   ///
3608   bool check () const pure nothrow @safe @nogc {
3609     if (quality < 1 || quality > 100) return false;
3610     if (cast(uint)subsampling > cast(uint)JpegSubsampling.H2V2) return false;
3611     return true;
3612   }
3613 }
3614 
3615 
3616 // ////////////////////////////////////////////////////////////////////////// //
3617 /// Writes JPEG image to file.
3618 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3619 /// note that alpha will not be stored in jpeg file.
3620 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_stream(wfn, width, height, num_channels, pImage_data, JpegParams()); }
3621 
3622 /// Writes JPEG image to file.
3623 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3624 /// note that alpha will not be stored in jpeg file.
3625 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data, in JpegParams comp_params) {
3626   jpeg_encoder dst_image;
3627   if (!dst_image.setup(wfn, width, height, num_channels, comp_params)) return false;
3628   for (uint pass_index = 0; pass_index < dst_image.total_passes(); pass_index++) {
3629     for (int i = 0; i < height; i++) {
3630       const(ubyte)* pBuf = pImage_data.ptr+i*width*num_channels;
3631       if (!dst_image.process_scanline(pBuf)) return false;
3632     }
3633     if (!dst_image.process_scanline(null)) return false;
3634   }
3635   dst_image.deinit();
3636   //return dst_stream.close();
3637   return true;
3638 }
3639 
3640 
3641 /// Writes JPEG image to file.
3642 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3643 /// note that alpha will not be stored in jpeg file.
3644 bool compress_image_to_jpeg_file (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_file(fname, width, height, num_channels, pImage_data, JpegParams()); }
3645 
3646 /// Writes JPEG image to file.
3647 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3648 /// note that alpha will not be stored in jpeg file.
3649 bool compress_image_to_jpeg_file() (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data, const scope auto ref JpegParams comp_params) {
3650   import std.internal.cstring;
3651   import core.stdc.stdio : FILE, fopen, fclose, fwrite;
3652   FILE* fl = fopen(fname.tempCString, "wb");
3653   if (fl is null) return false;
3654   scope(exit) if (fl !is null) fclose(fl);
3655   auto res = compress_image_to_jpeg_stream(
3656     delegate bool (scope const(ubyte)[] buf) {
3657       if (fwrite(buf.ptr, 1, buf.length, fl) != buf.length) return false;
3658       return true;
3659     }, width, height, num_channels, pImage_data, comp_params);
3660   if (res) {
3661     if (fclose(fl) != 0) res = false;
3662     fl = null;
3663   }
3664   return res;
3665 }
3666 
3667 
3668 // ////////////////////////////////////////////////////////////////////////// //
3669 private:
3670 nothrow @trusted @nogc {
3671 auto JPGE_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
3672 auto JPGE_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
3673 
3674 void *jpge_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
3675 void jpge_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
3676 
3677 
3678 // Various JPEG enums and tables.
3679 enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }
3680 
3681 static immutable ubyte[64] s_zag = [ 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
3682 static immutable short[64] s_std_lum_quant = [ 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 ];
3683 static immutable short[64] s_std_croma_quant = [ 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 ];
3684 static immutable ubyte[17] s_dc_lum_bits = [ 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 ];
3685 static immutable ubyte[DC_LUM_CODES] s_dc_lum_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3686 static immutable ubyte[17] s_ac_lum_bits = [ 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d ];
3687 static immutable ubyte[AC_LUM_CODES] s_ac_lum_val = [
3688   0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
3689   0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
3690   0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
3691   0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
3692   0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3693   0xf9,0xfa
3694 ];
3695 static immutable ubyte[17] s_dc_chroma_bits = [ 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 ];
3696 static immutable ubyte[DC_CHROMA_CODES] s_dc_chroma_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3697 static immutable ubyte[17] s_ac_chroma_bits = [ 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 ];
3698 static immutable ubyte[AC_CHROMA_CODES] s_ac_chroma_val = [
3699   0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
3700   0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
3701   0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
3702   0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
3703   0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3704   0xf9,0xfa
3705 ];
3706 
3707 // Low-level helper functions.
3708 //template <class T> inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
3709 
3710 enum YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; // int
3711 //ubyte clamp (int i) { if (cast(uint)(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return cast(ubyte)(i); }
3712 ubyte clamp() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i)>>31)&0xFF) : i); }
3713 
3714 void RGB_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3715   for (; num_pixels; pDst += 3, pSrc += 3, --num_pixels) {
3716     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3717     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3718     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3719     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3720   }
3721 }
3722 
3723 void RGB_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3724   for (; num_pixels; ++pDst, pSrc += 3, --num_pixels) {
3725     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3726   }
3727 }
3728 
3729 void RGBA_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3730   for (; num_pixels; pDst += 3, pSrc += 4, --num_pixels) {
3731     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3732     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3733     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3734     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3735   }
3736 }
3737 
3738 void RGBA_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3739   for (; num_pixels; ++pDst, pSrc += 4, --num_pixels) {
3740     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3741   }
3742 }
3743 
3744 void Y_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3745   for (; num_pixels; pDst += 3, ++pSrc, --num_pixels) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
3746 }
3747 
3748 // Forward DCT - DCT derived from jfdctint.
3749 enum { ROW_BITS = 2 }
3750 //#define DCT_DESCALE(x, n) (((x)+(((int)1)<<((n)-1)))>>(n))
3751 int DCT_DESCALE() (int x, int n) { pragma(inline, true); return (((x)+((cast(int)1)<<((n)-1)))>>(n)); }
3752 //#define DCT_MUL(var, c) (cast(short)(var)*cast(int)(c))
3753 
3754 //#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7)
3755 enum DCT1D = q{{
3756   int t0 = s0+s7, t7 = s0-s7, t1 = s1+s6, t6 = s1-s6, t2 = s2+s5, t5 = s2-s5, t3 = s3+s4, t4 = s3-s4;
3757   int t10 = t0+t3, t13 = t0-t3, t11 = t1+t2, t12 = t1-t2;
3758   int u1 = (cast(short)(t12+t13)*cast(int)(4433));
3759   s2 = u1+(cast(short)(t13)*cast(int)(6270));
3760   s6 = u1+(cast(short)(t12)*cast(int)(-15137));
3761   u1 = t4+t7;
3762   int u2 = t5+t6, u3 = t4+t6, u4 = t5+t7;
3763   int z5 = (cast(short)(u3+u4)*cast(int)(9633));
3764   t4 = (cast(short)(t4)*cast(int)(2446)); t5 = (cast(short)(t5)*cast(int)(16819));
3765   t6 = (cast(short)(t6)*cast(int)(25172)); t7 = (cast(short)(t7)*cast(int)(12299));
3766   u1 = (cast(short)(u1)*cast(int)(-7373)); u2 = (cast(short)(u2)*cast(int)(-20995));
3767   u3 = (cast(short)(u3)*cast(int)(-16069)); u4 = (cast(short)(u4)*cast(int)(-3196));
3768   u3 += z5; u4 += z5;
3769   s0 = t10+t11; s1 = t7+u1+u4; s3 = t6+u2+u3; s4 = t10-t11; s5 = t5+u2+u4; s7 = t4+u1+u3;
3770 }};
3771 
3772 void DCT2D (int* p) {
3773   int c;
3774   int* q = p;
3775   for (c = 7; c >= 0; --c, q += 8) {
3776     int s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
3777     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
3778     mixin(DCT1D);
3779     q[0] = s0<<ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
3780     q[4] = s4<<ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
3781   }
3782   for (q = p, c = 7; c >= 0; --c, ++q) {
3783     int s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
3784     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
3785     mixin(DCT1D);
3786     q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
3787     q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
3788   }
3789 }
3790 
3791 struct sym_freq { uint m_key, m_sym_index; }
3792 
3793 // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
3794 sym_freq* radix_sort_syms (uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) {
3795   const uint cMaxPasses = 4;
3796   uint[256*cMaxPasses] hist;
3797   //clear_obj(hist);
3798   for (uint i = 0; i < num_syms; i++) {
3799     uint freq = pSyms0[i].m_key;
3800     ++hist[freq&0xFF];
3801     ++hist[256+((freq>>8)&0xFF)];
3802     ++hist[256*2+((freq>>16)&0xFF)];
3803     ++hist[256*3+((freq>>24)&0xFF)];
3804   }
3805   sym_freq* pCur_syms = pSyms0;
3806   sym_freq* pNew_syms = pSyms1;
3807   uint total_passes = cMaxPasses; while (total_passes > 1 && num_syms == hist[(total_passes-1)*256]) --total_passes;
3808   uint[256] offsets;
3809   for (uint pass_shift = 0, pass = 0; pass < total_passes; ++pass, pass_shift += 8) {
3810     const(uint)* pHist = &hist[pass<<8];
3811     uint cur_ofs = 0;
3812     for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
3813     for (uint i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key>>pass_shift)&0xFF]++] = pCur_syms[i];
3814     sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
3815   }
3816   return pCur_syms;
3817 }
3818 
3819 // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
3820 void calculate_minimum_redundancy (sym_freq* A, int n) {
3821   int root, leaf, next, avbl, used, dpth;
3822   if (n == 0) return;
3823   if (n == 1) { A[0].m_key = 1; return; }
3824   A[0].m_key += A[1].m_key; root = 0; leaf = 2;
3825   for (next=1; next < n-1; next++)
3826   {
3827     if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key;
3828     if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key;
3829   }
3830   A[n-2].m_key = 0;
3831   for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
3832   avbl = 1; used = dpth = 0; root = n-2; next = n-1;
3833   while (avbl>0)
3834   {
3835     while (root >= 0 && cast(int)A[root].m_key == dpth) { used++; root--; }
3836     while (avbl>used) { A[next--].m_key = dpth; avbl--; }
3837     avbl = 2*used; dpth++; used = 0;
3838   }
3839 }
3840 
3841 // Limits canonical Huffman code table's max code size to max_code_size.
3842 void huffman_enforce_max_code_size (int* pNum_codes, int code_list_len, int max_code_size) {
3843   if (code_list_len <= 1) return;
3844   for (int i = max_code_size+1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
3845   uint total = 0;
3846   for (int i = max_code_size; i > 0; i--) total += ((cast(uint)pNum_codes[i])<<(max_code_size-i));
3847   while (total != (1UL<<max_code_size)) {
3848     pNum_codes[max_code_size]--;
3849     for (int i = max_code_size-1; i > 0; i--) {
3850       if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i+1] += 2; break; }
3851     }
3852     total--;
3853   }
3854 }
3855 }
3856 
3857 
3858 // ////////////////////////////////////////////////////////////////////////// //
3859 // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
3860 struct jpeg_encoder {
3861 public:
3862   alias WriteFunc = bool delegate (scope const(ubyte)[] buf);
3863 
3864 nothrow /*@trusted @nogc*/:
3865 private:
3866   alias sample_array_t = int;
3867 
3868   WriteFunc m_pStream;
3869   JpegParams m_params;
3870   ubyte m_num_components;
3871   ubyte[3] m_comp_h_samp;
3872   ubyte[3] m_comp_v_samp;
3873   int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
3874   int m_image_x_mcu, m_image_y_mcu;
3875   int m_image_bpl_xlt, m_image_bpl_mcu;
3876   int m_mcus_per_row;
3877   int m_mcu_x, m_mcu_y;
3878   ubyte*[16] m_mcu_lines;
3879   ubyte m_mcu_y_ofs;
3880   sample_array_t[64] m_sample_array;
3881   short[64] m_coefficient_array;
3882   int[64][2] m_quantization_tables;
3883   uint[256][4] m_huff_codes;
3884   ubyte[256][4] m_huff_code_sizes;
3885   ubyte[17][4] m_huff_bits;
3886   ubyte[256][4] m_huff_val;
3887   uint[256][4] m_huff_count;
3888   int[3] m_last_dc_val;
3889   enum JPGE_OUT_BUF_SIZE = 2048;
3890   ubyte[JPGE_OUT_BUF_SIZE] m_out_buf;
3891   ubyte* m_pOut_buf;
3892   uint m_out_buf_left;
3893   uint m_bit_buffer;
3894   uint m_bits_in;
3895   ubyte m_pass_num;
3896   bool m_all_stream_writes_succeeded = true;
3897 
3898 private:
3899   // Generates an optimized offman table.
3900   void optimize_huffman_table (int table_num, int table_len) {
3901     sym_freq[MAX_HUFF_SYMBOLS] syms0;
3902     sym_freq[MAX_HUFF_SYMBOLS] syms1;
3903     syms0[0].m_key = 1; syms0[0].m_sym_index = 0;  // dummy symbol, assures that no valid code contains all 1's
3904     int num_used_syms = 1;
3905     const uint *pSym_count = &m_huff_count[table_num][0];
3906     for (int i = 0; i < table_len; i++) {
3907       if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i+1; }
3908     }
3909     sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0.ptr, syms1.ptr);
3910     calculate_minimum_redundancy(pSyms, num_used_syms);
3911 
3912     // Count the # of symbols of each code size.
3913     int[1+MAX_HUFF_CODESIZE] num_codes;
3914     //clear_obj(num_codes);
3915     for (int i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
3916 
3917     enum JPGE_CODE_SIZE_LIMIT = 16u; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
3918     huffman_enforce_max_code_size(num_codes.ptr, num_used_syms, JPGE_CODE_SIZE_LIMIT);
3919 
3920     // Compute m_huff_bits array, which contains the # of symbols per code size.
3921     //clear_obj(m_huff_bits[table_num]);
3922     m_huff_bits[table_num][] = 0;
3923     for (int i = 1; i <= cast(int)JPGE_CODE_SIZE_LIMIT; i++) m_huff_bits[table_num][i] = cast(ubyte)(num_codes[i]);
3924 
3925     // Remove the dummy symbol added above, which must be in largest bucket.
3926     for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) {
3927       if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
3928     }
3929 
3930     // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
3931     for (int i = num_used_syms-1; i >= 1; i--) m_huff_val[table_num][num_used_syms-1-i] = cast(ubyte)(pSyms[i].m_sym_index-1);
3932   }
3933 
3934   bool put_obj(T) (T v) {
3935     try {
3936       return (m_pStream !is null && m_pStream((&v)[0..1]));
3937     } catch (Exception) {}
3938     return false;
3939   }
3940 
3941   bool put_buf() (const(void)* v, uint len) {
3942     try {
3943       return (m_pStream !is null && m_pStream((cast(ubyte*)v)[0..len]));
3944     } catch (Exception) {}
3945     return false;
3946   }
3947 
3948   // JPEG marker generation.
3949   void emit_byte (ubyte i) {
3950     m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_obj(i);
3951   }
3952 
3953   void emit_word(uint i) {
3954     emit_byte(cast(ubyte)(i>>8));
3955     emit_byte(cast(ubyte)(i&0xFF));
3956   }
3957 
3958   void emit_marker (int marker) {
3959     emit_byte(cast(ubyte)(0xFF));
3960     emit_byte(cast(ubyte)(marker));
3961   }
3962 
3963   // Emit JFIF marker
3964   void emit_jfif_app0 () {
3965     emit_marker(M_APP0);
3966     emit_word(2+4+1+2+1+2+2+1+1);
3967     emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
3968     emit_byte(0);
3969     emit_byte(1); /* Major version */
3970     emit_byte(1); /* Minor version */
3971     emit_byte(0); /* Density unit */
3972     emit_word(1);
3973     emit_word(1);
3974     emit_byte(0); /* No thumbnail image */
3975     emit_byte(0);
3976   }
3977 
3978   // Emit quantization tables
3979   void emit_dqt () {
3980     for (int i = 0; i < (m_num_components == 3 ? 2 : 1); i++) {
3981       emit_marker(M_DQT);
3982       emit_word(64+1+2);
3983       emit_byte(cast(ubyte)(i));
3984       for (int j = 0; j < 64; j++) emit_byte(cast(ubyte)(m_quantization_tables[i][j]));
3985     }
3986   }
3987 
3988   // Emit start of frame marker
3989   void emit_sof () {
3990     emit_marker(M_SOF0); /* baseline */
3991     emit_word(3*m_num_components+2+5+1);
3992     emit_byte(8); /* precision */
3993     emit_word(m_image_y);
3994     emit_word(m_image_x);
3995     emit_byte(m_num_components);
3996     for (int i = 0; i < m_num_components; i++) {
3997       emit_byte(cast(ubyte)(i+1)); /* component ID */
3998       emit_byte(cast(ubyte)((m_comp_h_samp[i]<<4)+m_comp_v_samp[i])); /* h and v sampling */
3999       emit_byte(i > 0); /* quant. table num */
4000     }
4001   }
4002 
4003   // Emit Huffman table.
4004   void emit_dht (ubyte* bits, ubyte* val, int index, bool ac_flag) {
4005     emit_marker(M_DHT);
4006     int length = 0;
4007     for (int i = 1; i <= 16; i++) length += bits[i];
4008     emit_word(length+2+1+16);
4009     emit_byte(cast(ubyte)(index+(ac_flag<<4)));
4010     for (int i = 1; i <= 16; i++) emit_byte(bits[i]);
4011     for (int i = 0; i < length; i++) emit_byte(val[i]);
4012   }
4013 
4014   // Emit all Huffman tables.
4015   void emit_dhts () {
4016     emit_dht(m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr, 0, false);
4017     emit_dht(m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr, 0, true);
4018     if (m_num_components == 3) {
4019       emit_dht(m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr, 1, false);
4020       emit_dht(m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr, 1, true);
4021     }
4022   }
4023 
4024   // emit start of scan
4025   void emit_sos () {
4026     emit_marker(M_SOS);
4027     emit_word(2*m_num_components+2+1+3);
4028     emit_byte(m_num_components);
4029     for (int i = 0; i < m_num_components; i++) {
4030       emit_byte(cast(ubyte)(i+1));
4031       if (i == 0)
4032         emit_byte((0<<4)+0);
4033       else
4034         emit_byte((1<<4)+1);
4035     }
4036     emit_byte(0); /* spectral selection */
4037     emit_byte(63);
4038     emit_byte(0);
4039   }
4040 
4041   // Emit all markers at beginning of image file.
4042   void emit_markers () {
4043     emit_marker(M_SOI);
4044     emit_jfif_app0();
4045     emit_dqt();
4046     emit_sof();
4047     emit_dhts();
4048     emit_sos();
4049   }
4050 
4051   // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
4052   void compute_huffman_table (uint* codes, ubyte* code_sizes, ubyte* bits, ubyte* val) {
4053     import core.stdc.string : memset;
4054 
4055     int i, l, last_p, si;
4056     ubyte[257] huff_size;
4057     uint[257] huff_code;
4058     uint code;
4059 
4060     int p = 0;
4061     for (l = 1; l <= 16; l++)
4062       for (i = 1; i <= bits[l]; i++)
4063         huff_size[p++] = cast(ubyte)l;
4064 
4065     huff_size[p] = 0; last_p = p; // write sentinel
4066 
4067     code = 0; si = huff_size[0]; p = 0;
4068 
4069     while (huff_size[p])
4070     {
4071       while (huff_size[p] == si)
4072         huff_code[p++] = code++;
4073       code <<= 1;
4074       si++;
4075     }
4076 
4077     memset(codes, 0, codes[0].sizeof*256);
4078     memset(code_sizes, 0, code_sizes[0].sizeof*256);
4079     for (p = 0; p < last_p; p++)
4080     {
4081       codes[val[p]]      = huff_code[p];
4082       code_sizes[val[p]] = huff_size[p];
4083     }
4084   }
4085 
4086   // Quantization table generation.
4087   void compute_quant_table (int* pDst, const(short)* pSrc) {
4088     int q;
4089     if (m_params.quality < 50)
4090       q = 5000/m_params.quality;
4091     else
4092       q = 200-m_params.quality*2;
4093     for (int i = 0; i < 64; i++) {
4094       int j = *pSrc++; j = (j*q+50L)/100L;
4095       *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
4096     }
4097   }
4098 
4099   // Higher-level methods.
4100   void first_pass_init () {
4101     import core.stdc.string : memset;
4102     m_bit_buffer = 0; m_bits_in = 0;
4103     memset(m_last_dc_val.ptr, 0, 3*m_last_dc_val[0].sizeof);
4104     m_mcu_y_ofs = 0;
4105     m_pass_num = 1;
4106   }
4107 
4108   bool second_pass_init () {
4109     compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr);
4110     compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr);
4111     if (m_num_components > 1)
4112     {
4113       compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr);
4114       compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr);
4115     }
4116     first_pass_init();
4117     emit_markers();
4118     m_pass_num = 2;
4119     return true;
4120   }
4121 
4122   bool jpg_open (int p_x_res, int p_y_res, int src_channels) {
4123     m_num_components = 3;
4124     switch (m_params.subsampling) {
4125       case JpegSubsampling.Y_ONLY:
4126         m_num_components = 1;
4127         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4128         m_mcu_x          = 8; m_mcu_y          = 8;
4129         break;
4130       case JpegSubsampling.H1V1:
4131         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4132         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4133         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4134         m_mcu_x          = 8; m_mcu_y          = 8;
4135         break;
4136       case JpegSubsampling.H2V1:
4137         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
4138         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4139         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4140         m_mcu_x          = 16; m_mcu_y         = 8;
4141         break;
4142       case JpegSubsampling.H2V2:
4143         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
4144         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4145         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4146         m_mcu_x          = 16; m_mcu_y         = 16;
4147         break;
4148       default: assert(0);
4149     }
4150 
4151     m_image_x        = p_x_res; m_image_y = p_y_res;
4152     m_image_bpp      = src_channels;
4153     m_image_bpl      = m_image_x*src_channels;
4154     m_image_x_mcu    = (m_image_x+m_mcu_x-1)&(~(m_mcu_x-1));
4155     m_image_y_mcu    = (m_image_y+m_mcu_y-1)&(~(m_mcu_y-1));
4156     m_image_bpl_xlt  = m_image_x*m_num_components;
4157     m_image_bpl_mcu  = m_image_x_mcu*m_num_components;
4158     m_mcus_per_row   = m_image_x_mcu/m_mcu_x;
4159 
4160     if ((m_mcu_lines[0] = cast(ubyte*)(jpge_malloc(m_image_bpl_mcu*m_mcu_y))) is null) return false;
4161     for (int i = 1; i < m_mcu_y; i++)
4162       m_mcu_lines[i] = m_mcu_lines[i-1]+m_image_bpl_mcu;
4163 
4164     compute_quant_table(m_quantization_tables[0].ptr, s_std_lum_quant.ptr);
4165     compute_quant_table(m_quantization_tables[1].ptr, (m_params.noChromaDiscrimFlag ? s_std_lum_quant.ptr : s_std_croma_quant.ptr));
4166 
4167     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4168     m_pOut_buf = m_out_buf.ptr;
4169 
4170     if (m_params.twoPass)
4171     {
4172       //clear_obj(m_huff_count);
4173       import core.stdc.string : memset;
4174       memset(m_huff_count.ptr, 0, m_huff_count.sizeof);
4175       first_pass_init();
4176     }
4177     else
4178     {
4179       import core.stdc.string : memcpy;
4180       memcpy(m_huff_bits[0+0].ptr, s_dc_lum_bits.ptr, 17);    memcpy(m_huff_val[0+0].ptr, s_dc_lum_val.ptr, DC_LUM_CODES);
4181       memcpy(m_huff_bits[2+0].ptr, s_ac_lum_bits.ptr, 17);    memcpy(m_huff_val[2+0].ptr, s_ac_lum_val.ptr, AC_LUM_CODES);
4182       memcpy(m_huff_bits[0+1].ptr, s_dc_chroma_bits.ptr, 17); memcpy(m_huff_val[0+1].ptr, s_dc_chroma_val.ptr, DC_CHROMA_CODES);
4183       memcpy(m_huff_bits[2+1].ptr, s_ac_chroma_bits.ptr, 17); memcpy(m_huff_val[2+1].ptr, s_ac_chroma_val.ptr, AC_CHROMA_CODES);
4184       if (!second_pass_init()) return false;   // in effect, skip over the first pass
4185     }
4186     return m_all_stream_writes_succeeded;
4187   }
4188 
4189   void load_block_8_8_grey (int x) {
4190     ubyte *pSrc;
4191     sample_array_t *pDst = m_sample_array.ptr;
4192     x <<= 3;
4193     for (int i = 0; i < 8; i++, pDst += 8)
4194     {
4195       pSrc = m_mcu_lines[i]+x;
4196       pDst[0] = pSrc[0]-128; pDst[1] = pSrc[1]-128; pDst[2] = pSrc[2]-128; pDst[3] = pSrc[3]-128;
4197       pDst[4] = pSrc[4]-128; pDst[5] = pSrc[5]-128; pDst[6] = pSrc[6]-128; pDst[7] = pSrc[7]-128;
4198     }
4199   }
4200 
4201   void load_block_8_8 (int x, int y, int c) {
4202     ubyte *pSrc;
4203     sample_array_t *pDst = m_sample_array.ptr;
4204     x = (x*(8*3))+c;
4205     y <<= 3;
4206     for (int i = 0; i < 8; i++, pDst += 8)
4207     {
4208       pSrc = m_mcu_lines[y+i]+x;
4209       pDst[0] = pSrc[0*3]-128; pDst[1] = pSrc[1*3]-128; pDst[2] = pSrc[2*3]-128; pDst[3] = pSrc[3*3]-128;
4210       pDst[4] = pSrc[4*3]-128; pDst[5] = pSrc[5*3]-128; pDst[6] = pSrc[6*3]-128; pDst[7] = pSrc[7*3]-128;
4211     }
4212   }
4213 
4214   void load_block_16_8 (int x, int c) {
4215     ubyte* pSrc1;
4216     ubyte* pSrc2;
4217     sample_array_t *pDst = m_sample_array.ptr;
4218     x = (x*(16*3))+c;
4219     int a = 0, b = 2;
4220     for (int i = 0; i < 16; i += 2, pDst += 8)
4221     {
4222       pSrc1 = m_mcu_lines[i+0]+x;
4223       pSrc2 = m_mcu_lines[i+1]+x;
4224       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3]+pSrc2[ 0*3]+pSrc2[ 1*3]+a)>>2)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3]+pSrc2[ 2*3]+pSrc2[ 3*3]+b)>>2)-128;
4225       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3]+pSrc2[ 4*3]+pSrc2[ 5*3]+a)>>2)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3]+pSrc2[ 6*3]+pSrc2[ 7*3]+b)>>2)-128;
4226       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3]+pSrc2[ 8*3]+pSrc2[ 9*3]+a)>>2)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3]+pSrc2[10*3]+pSrc2[11*3]+b)>>2)-128;
4227       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3]+pSrc2[12*3]+pSrc2[13*3]+a)>>2)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3]+pSrc2[14*3]+pSrc2[15*3]+b)>>2)-128;
4228       int temp = a; a = b; b = temp;
4229     }
4230   }
4231 
4232   void load_block_16_8_8 (int x, int c) {
4233     ubyte *pSrc1;
4234     sample_array_t *pDst = m_sample_array.ptr;
4235     x = (x*(16*3))+c;
4236     for (int i = 0; i < 8; i++, pDst += 8) {
4237       pSrc1 = m_mcu_lines[i+0]+x;
4238       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3])>>1)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3])>>1)-128;
4239       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3])>>1)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3])>>1)-128;
4240       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3])>>1)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3])>>1)-128;
4241       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3])>>1)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3])>>1)-128;
4242     }
4243   }
4244 
4245   void load_quantized_coefficients (int component_num) {
4246     int *q = m_quantization_tables[component_num > 0].ptr;
4247     short *pDst = m_coefficient_array.ptr;
4248     for (int i = 0; i < 64; i++)
4249     {
4250       sample_array_t j = m_sample_array[s_zag[i]];
4251       if (j < 0)
4252       {
4253         if ((j = -j+(*q>>1)) < *q)
4254           *pDst++ = 0;
4255         else
4256           *pDst++ = cast(short)(-(j/ *q));
4257       }
4258       else
4259       {
4260         if ((j = j+(*q>>1)) < *q)
4261           *pDst++ = 0;
4262         else
4263           *pDst++ = cast(short)((j/ *q));
4264       }
4265       q++;
4266     }
4267   }
4268 
4269   void flush_output_buffer () {
4270     if (m_out_buf_left != JPGE_OUT_BUF_SIZE) m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_buf(m_out_buf.ptr, JPGE_OUT_BUF_SIZE-m_out_buf_left);
4271     m_pOut_buf = m_out_buf.ptr;
4272     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4273   }
4274 
4275   void put_bits (uint bits, uint len) {
4276     m_bit_buffer |= (cast(uint)bits<<(24-(m_bits_in += len)));
4277     while (m_bits_in >= 8) {
4278       ubyte c;
4279       //#define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
4280       //JPGE_PUT_BYTE(c = (ubyte)((m_bit_buffer>>16)&0xFF));
4281       //if (c == 0xFF) JPGE_PUT_BYTE(0);
4282       c = cast(ubyte)((m_bit_buffer>>16)&0xFF);
4283       *m_pOut_buf++ = c;
4284       if (--m_out_buf_left == 0) flush_output_buffer();
4285       if (c == 0xFF) {
4286         *m_pOut_buf++ = 0;
4287         if (--m_out_buf_left == 0) flush_output_buffer();
4288       }
4289       m_bit_buffer <<= 8;
4290       m_bits_in -= 8;
4291     }
4292   }
4293 
4294   void code_coefficients_pass_one (int component_num) {
4295     if (component_num >= 3) return; // just to shut up static analysis
4296     int i, run_len, nbits, temp1;
4297     short *src = m_coefficient_array.ptr;
4298     uint *dc_count = (component_num ? m_huff_count[0+1].ptr : m_huff_count[0+0].ptr);
4299     uint *ac_count = (component_num ? m_huff_count[2+1].ptr : m_huff_count[2+0].ptr);
4300 
4301     temp1 = src[0]-m_last_dc_val[component_num];
4302     m_last_dc_val[component_num] = src[0];
4303     if (temp1 < 0) temp1 = -temp1;
4304 
4305     nbits = 0;
4306     while (temp1)
4307     {
4308       nbits++; temp1 >>= 1;
4309     }
4310 
4311     dc_count[nbits]++;
4312     for (run_len = 0, i = 1; i < 64; i++)
4313     {
4314       if ((temp1 = m_coefficient_array[i]) == 0)
4315         run_len++;
4316       else
4317       {
4318         while (run_len >= 16)
4319         {
4320           ac_count[0xF0]++;
4321           run_len -= 16;
4322         }
4323         if (temp1 < 0) temp1 = -temp1;
4324         nbits = 1;
4325         while (temp1 >>= 1) nbits++;
4326         ac_count[(run_len<<4)+nbits]++;
4327         run_len = 0;
4328       }
4329     }
4330     if (run_len) ac_count[0]++;
4331   }
4332 
4333   void code_coefficients_pass_two (int component_num) {
4334     int i, j, run_len, nbits, temp1, temp2;
4335     short *pSrc = m_coefficient_array.ptr;
4336     uint*[2] codes;
4337     ubyte*[2] code_sizes;
4338 
4339     if (component_num == 0)
4340     {
4341       codes[0] = m_huff_codes[0+0].ptr; codes[1] = m_huff_codes[2+0].ptr;
4342       code_sizes[0] = m_huff_code_sizes[0+0].ptr; code_sizes[1] = m_huff_code_sizes[2+0].ptr;
4343     }
4344     else
4345     {
4346       codes[0] = m_huff_codes[0+1].ptr; codes[1] = m_huff_codes[2+1].ptr;
4347       code_sizes[0] = m_huff_code_sizes[0+1].ptr; code_sizes[1] = m_huff_code_sizes[2+1].ptr;
4348     }
4349 
4350     temp1 = temp2 = pSrc[0]-m_last_dc_val[component_num];
4351     m_last_dc_val[component_num] = pSrc[0];
4352 
4353     if (temp1 < 0)
4354     {
4355       temp1 = -temp1; temp2--;
4356     }
4357 
4358     nbits = 0;
4359     while (temp1)
4360     {
4361       nbits++; temp1 >>= 1;
4362     }
4363 
4364     put_bits(codes[0][nbits], code_sizes[0][nbits]);
4365     if (nbits) put_bits(temp2&((1<<nbits)-1), nbits);
4366 
4367     for (run_len = 0, i = 1; i < 64; i++)
4368     {
4369       if ((temp1 = m_coefficient_array[i]) == 0)
4370         run_len++;
4371       else
4372       {
4373         while (run_len >= 16)
4374         {
4375           put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
4376           run_len -= 16;
4377         }
4378         if ((temp2 = temp1) < 0)
4379         {
4380           temp1 = -temp1;
4381           temp2--;
4382         }
4383         nbits = 1;
4384         while (temp1 >>= 1)
4385           nbits++;
4386         j = (run_len<<4)+nbits;
4387         put_bits(codes[1][j], code_sizes[1][j]);
4388         put_bits(temp2&((1<<nbits)-1), nbits);
4389         run_len = 0;
4390       }
4391     }
4392     if (run_len)
4393       put_bits(codes[1][0], code_sizes[1][0]);
4394   }
4395 
4396   void code_block (int component_num) {
4397     DCT2D(m_sample_array.ptr);
4398     load_quantized_coefficients(component_num);
4399     if (m_pass_num == 1)
4400       code_coefficients_pass_one(component_num);
4401     else
4402       code_coefficients_pass_two(component_num);
4403   }
4404 
4405   void process_mcu_row () {
4406     if (m_num_components == 1)
4407     {
4408       for (int i = 0; i < m_mcus_per_row; i++)
4409       {
4410         load_block_8_8_grey(i); code_block(0);
4411       }
4412     }
4413     else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
4414     {
4415       for (int i = 0; i < m_mcus_per_row; i++)
4416       {
4417         load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
4418       }
4419     }
4420     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
4421     {
4422       for (int i = 0; i < m_mcus_per_row; i++)
4423       {
4424         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4425         load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
4426       }
4427     }
4428     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
4429     {
4430       for (int i = 0; i < m_mcus_per_row; i++)
4431       {
4432         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4433         load_block_8_8(i*2+0, 1, 0); code_block(0); load_block_8_8(i*2+1, 1, 0); code_block(0);
4434         load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
4435       }
4436     }
4437   }
4438 
4439   bool terminate_pass_one () {
4440     optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
4441     if (m_num_components > 1)
4442     {
4443       optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
4444     }
4445     return second_pass_init();
4446   }
4447 
4448   bool terminate_pass_two () {
4449     put_bits(0x7F, 7);
4450     flush_output_buffer();
4451     emit_marker(M_EOI);
4452     m_pass_num++; // purposely bump up m_pass_num, for debugging
4453     return true;
4454   }
4455 
4456   bool process_end_of_image () {
4457     if (m_mcu_y_ofs)
4458     {
4459       if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
4460       {
4461         for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) {
4462           import core.stdc.string : memcpy;
4463           memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs-1], m_image_bpl_mcu);
4464         }
4465       }
4466       process_mcu_row();
4467     }
4468 
4469     if (m_pass_num == 1)
4470       return terminate_pass_one();
4471     else
4472       return terminate_pass_two();
4473   }
4474 
4475   void load_mcu (const(void)* pSrc) {
4476     import core.stdc.string : memcpy;
4477     const(ubyte)* Psrc = cast(const(ubyte)*)(pSrc);
4478 
4479     ubyte* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
4480 
4481     if (m_num_components == 1)
4482     {
4483       if (m_image_bpp == 4)
4484         RGBA_to_Y(pDst, Psrc, m_image_x);
4485       else if (m_image_bpp == 3)
4486         RGB_to_Y(pDst, Psrc, m_image_x);
4487       else
4488         memcpy(pDst, Psrc, m_image_x);
4489     }
4490     else
4491     {
4492       if (m_image_bpp == 4)
4493         RGBA_to_YCC(pDst, Psrc, m_image_x);
4494       else if (m_image_bpp == 3)
4495         RGB_to_YCC(pDst, Psrc, m_image_x);
4496       else
4497         Y_to_YCC(pDst, Psrc, m_image_x);
4498     }
4499 
4500     // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
4501     if (m_num_components == 1) {
4502       import core.stdc.string : memset;
4503       memset(m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt, pDst[m_image_bpl_xlt-1], m_image_x_mcu-m_image_x);
4504     } else
4505     {
4506       const ubyte y = pDst[m_image_bpl_xlt-3+0], cb = pDst[m_image_bpl_xlt-3+1], cr = pDst[m_image_bpl_xlt-3+2];
4507       ubyte *q = m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt;
4508       for (int i = m_image_x; i < m_image_x_mcu; i++)
4509       {
4510         *q++ = y; *q++ = cb; *q++ = cr;
4511       }
4512     }
4513 
4514     if (++m_mcu_y_ofs == m_mcu_y)
4515     {
4516       process_mcu_row();
4517       m_mcu_y_ofs = 0;
4518     }
4519   }
4520 
4521   void clear() {
4522     m_mcu_lines[0] = null;
4523     m_pass_num = 0;
4524     m_all_stream_writes_succeeded = true;
4525   }
4526 
4527 
4528 public:
4529   //this () { clear(); }
4530   ~this () { deinit(); }
4531 
4532   @disable this (this); // no copies
4533 
4534   // Initializes the compressor.
4535   // pStream: The stream object to use for writing compressed data.
4536   // comp_params - Compression parameters structure, defined above.
4537   // width, height  - Image dimensions.
4538   // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
4539   // Returns false on out of memory or if a stream write fails.
4540   bool setup() (WriteFunc pStream, int width, int height, int src_channels, const scope auto ref JpegParams comp_params) {
4541     deinit();
4542     if ((pStream is null || width < 1 || height < 1) || (src_channels != 1 && src_channels != 3 && src_channels != 4) || !comp_params.check()) return false;
4543     m_pStream = pStream;
4544     m_params = comp_params;
4545     return jpg_open(width, height, src_channels);
4546   }
4547 
4548   bool setup() (WriteFunc pStream, int width, int height, int src_channels) { return setup(pStream, width, height, src_channels, JpegParams()); }
4549 
4550   @property ref inout(JpegParams) params () return inout pure nothrow @safe @nogc { pragma(inline, true); return m_params; }
4551 
4552   // Deinitializes the compressor, freeing any allocated memory. May be called at any time.
4553   void deinit () {
4554     jpge_free(m_mcu_lines[0]);
4555     clear();
4556   }
4557 
4558   @property uint total_passes () const pure nothrow @safe @nogc { pragma(inline, true); return (m_params.twoPass ? 2 : 1); }
4559   @property uint cur_pass () const pure nothrow @safe @nogc { pragma(inline, true); return m_pass_num; }
4560 
4561   // Call this method with each source scanline.
4562   // width*src_channels bytes per scanline is expected (RGB or Y format).
4563   // You must call with null after all scanlines are processed to finish compression.
4564   // Returns false on out of memory or if a stream write fails.
4565   bool process_scanline (const(void)* pScanline) {
4566     if (m_pass_num < 1 || m_pass_num > 2) return false;
4567     if (m_all_stream_writes_succeeded) {
4568       if (pScanline is null) {
4569         if (!process_end_of_image()) return false;
4570       } else {
4571         load_mcu(pScanline);
4572       }
4573     }
4574     return m_all_stream_writes_succeeded;
4575   }
4576 }