1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  *
40  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
41  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
42  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
43  */
44 module arsd.jpeg;
45 
46 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
47 // This is slower, but results in higher quality on images with highly saturated colors.
48 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
49 
50 /// Input stream interface.
51 /// This delegate is called when the internal input buffer is empty.
52 /// Parameters:
53 ///   pBuf - input buffer
54 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
55 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
56 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
57 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
58 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
59 
60 
61 // ////////////////////////////////////////////////////////////////////////// //
62 private:
63 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
64 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
65 
66 // Success/failure error codes.
67 alias jpgd_status = int;
68 enum /*jpgd_status*/ {
69   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
70   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
71   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
72   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
73   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
74   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
75   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
76   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
77   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
78 }
79 
80 enum {
81   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
82   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
83 }
84 
85 // DCT coefficients are stored in this sequence.
86 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
87 
88 alias JPEG_MARKER = int;
89 enum /*JPEG_MARKER*/ {
90   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
91   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
92   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
93   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
94   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
95 }
96 
97 alias JPEG_SUBSAMPLING = int;
98 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
99 
100 enum CONST_BITS = 13;
101 enum PASS1_BITS = 2;
102 enum SCALEDONE = cast(int)1;
103 
104 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
105 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
106 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
107 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
108 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
109 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
110 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
111 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
112 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
113 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
114 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
115 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
116 
117 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
118 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
119 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
120 
121 
122 // Compiler creates a fast path 1D IDCT for X non-zero columns
123 struct Row(int NONZERO_COLS) {
124 pure nothrow @trusted @nogc:
125   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
126     static if (NONZERO_COLS == 0) {
127       // nothing
128     } else static if (NONZERO_COLS == 1) {
129       immutable int dcval = (pSrc[0] << PASS1_BITS);
130       pTemp[0] = dcval;
131       pTemp[1] = dcval;
132       pTemp[2] = dcval;
133       pTemp[3] = dcval;
134       pTemp[4] = dcval;
135       pTemp[5] = dcval;
136       pTemp[6] = dcval;
137       pTemp[7] = dcval;
138     } else {
139       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
140       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
141       template ACCESS_COL(int x) {
142         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
143       }
144 
145       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
146 
147       immutable int z1 = (z2 + z3)*FIX_0_541196100;
148       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
149       immutable int tmp3 = z1 + z2*FIX_0_765366865;
150 
151       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
152       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
153 
154       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
155 
156       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
157 
158       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
159       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
160 
161       immutable int az1 = bz1*(-FIX_0_899976223);
162       immutable int az2 = bz2*(-FIX_2_562915447);
163       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
164       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
165 
166       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
167       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
168       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
169       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
170 
171       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
172       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
173       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
174       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
175       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
176       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
177       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
178       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
179     }
180   }
181 }
182 
183 
184 // Compiler creates a fast path 1D IDCT for X non-zero rows
185 struct Col (int NONZERO_ROWS) {
186 pure nothrow @trusted @nogc:
187   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
188     static assert(NONZERO_ROWS > 0);
189     static if (NONZERO_ROWS == 1) {
190       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
191       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
192       pDst_ptr[0*8] = dcval_clamped;
193       pDst_ptr[1*8] = dcval_clamped;
194       pDst_ptr[2*8] = dcval_clamped;
195       pDst_ptr[3*8] = dcval_clamped;
196       pDst_ptr[4*8] = dcval_clamped;
197       pDst_ptr[5*8] = dcval_clamped;
198       pDst_ptr[6*8] = dcval_clamped;
199       pDst_ptr[7*8] = dcval_clamped;
200     } else {
201       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
202       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
203       template ACCESS_ROW(int x) {
204         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
205       }
206 
207       immutable int z2 = mixin(ACCESS_ROW!2);
208       immutable int z3 = mixin(ACCESS_ROW!6);
209 
210       immutable int z1 = (z2 + z3)*FIX_0_541196100;
211       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
212       immutable int tmp3 = z1 + z2*FIX_0_765366865;
213 
214       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
215       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
216 
217       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
218 
219       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
220 
221       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
222       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
223 
224       immutable int az1 = bz1*(-FIX_0_899976223);
225       immutable int az2 = bz2*(-FIX_2_562915447);
226       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
227       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
228 
229       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
230       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
231       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
232       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
233 
234       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
235       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
236 
237       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
238       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
239 
240       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
241       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
242 
243       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
244       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
245 
246       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
247       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
248 
249       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
250       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
251 
252       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
253       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
254 
255       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
256       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
257     }
258   }
259 }
260 
261 
262 static immutable ubyte[512] s_idct_row_table = [
263   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
264   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
265   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
266   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
267   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
268   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
269   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
270   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
271 ];
272 
273 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
274 
275 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
276   assert(block_max_zag >= 1);
277   assert(block_max_zag <= 64);
278 
279   if (block_max_zag <= 1)
280   {
281     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
282     k = CLAMP(k);
283     k = k | (k<<8);
284     k = k | (k<<16);
285 
286     for (int i = 8; i > 0; i--)
287     {
288       *cast(int*)&pDst_ptr[0] = k;
289       *cast(int*)&pDst_ptr[4] = k;
290       pDst_ptr += 8;
291     }
292     return;
293   }
294 
295   int[64] temp;
296 
297   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
298   int* pTemp = temp.ptr;
299 
300   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
301   int i;
302   for (i = 8; i > 0; i--, pRow_tab++)
303   {
304     switch (*pRow_tab)
305     {
306       case 0: Row!(0).idct(pTemp, pSrc); break;
307       case 1: Row!(1).idct(pTemp, pSrc); break;
308       case 2: Row!(2).idct(pTemp, pSrc); break;
309       case 3: Row!(3).idct(pTemp, pSrc); break;
310       case 4: Row!(4).idct(pTemp, pSrc); break;
311       case 5: Row!(5).idct(pTemp, pSrc); break;
312       case 6: Row!(6).idct(pTemp, pSrc); break;
313       case 7: Row!(7).idct(pTemp, pSrc); break;
314       case 8: Row!(8).idct(pTemp, pSrc); break;
315       default: assert(0);
316     }
317 
318     pSrc += 8;
319     pTemp += 8;
320   }
321 
322   pTemp = temp.ptr;
323 
324   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
325   for (i = 8; i > 0; i--)
326   {
327     switch (nonzero_rows)
328     {
329       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
330       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
331       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
332       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
333       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
334       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
335       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
336       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
337       default: assert(0);
338     }
339 
340     pTemp++;
341     pDst_ptr++;
342   }
343 }
344 
345 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
346   int[64] temp;
347   int* pTemp = temp.ptr;
348   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
349 
350   for (int i = 4; i > 0; i--)
351   {
352     Row!(4).idct(pTemp, pSrc);
353     pSrc += 8;
354     pTemp += 8;
355   }
356 
357   pTemp = temp.ptr;
358   for (int i = 8; i > 0; i--)
359   {
360     Col!(4).idct(pDst_ptr, pTemp);
361     pTemp++;
362     pDst_ptr++;
363   }
364 }
365 
366 
367 // ////////////////////////////////////////////////////////////////////////// //
368 struct jpeg_decoder {
369 private import core.stdc.string : memcpy, memset;
370 private:
371   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
372   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
373 
374   alias jpgd_quant_t = short;
375   alias jpgd_block_t = short;
376   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
377 
378   static struct huff_tables {
379     bool ac_table;
380     uint[256] look_up;
381     uint[256] look_up2;
382     ubyte[256] code_size;
383     uint[512] tree;
384   }
385 
386   static struct coeff_buf {
387     ubyte* pData;
388     int block_num_x, block_num_y;
389     int block_len_x, block_len_y;
390     int block_size;
391   }
392 
393   static struct mem_block {
394     mem_block* m_pNext;
395     size_t m_used_count;
396     size_t m_size;
397     char[1] m_data;
398   }
399 
400   mem_block* m_pMem_blocks;
401   int m_image_x_size;
402   int m_image_y_size;
403   JpegStreamReadFunc readfn;
404   int m_progressive_flag;
405   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
406   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
407   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
408   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
409   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
410   int m_comps_in_frame;                         // # of components in frame
411   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
412   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
413   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
414   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
415   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
416   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
417   int m_comps_in_scan;                          // # of components in scan
418   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
419   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
420   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
421   int m_spectral_start;                         // spectral selection start
422   int m_spectral_end;                           // spectral selection end
423   int m_successive_low;                         // successive approximation low
424   int m_successive_high;                        // successive approximation high
425   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
426   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
427   int m_blocks_per_mcu;
428   int m_max_blocks_per_row;
429   int m_mcus_per_row, m_mcus_per_col;
430   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
431   int m_total_lines_left;                       // total # lines left in image
432   int m_mcu_lines_left;                         // total # lines left in this MCU
433   int m_real_dest_bytes_per_scan_line;
434   int m_dest_bytes_per_scan_line;               // rounded up
435   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
436   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
437   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
438   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
439   int m_eob_run;
440   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
441   ubyte* m_pIn_buf_ofs;
442   int m_in_buf_left;
443   int m_tem_flag;
444   bool m_eof_flag;
445   ubyte[128] m_in_buf_pad_start;
446   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
447   ubyte[128] m_in_buf_pad_end;
448   int m_bits_left;
449   uint m_bit_buf;
450   int m_restart_interval;
451   int m_restarts_left;
452   int m_next_restart_num;
453   int m_max_mcus_per_row;
454   int m_max_blocks_per_mcu;
455   int m_expanded_blocks_per_mcu;
456   int m_expanded_blocks_per_row;
457   int m_expanded_blocks_per_component;
458   bool m_freq_domain_chroma_upsample;
459   int m_max_mcus_per_col;
460   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
461   jpgd_block_t* m_pMCU_coefficients;
462   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
463   ubyte* m_pSample_buf;
464   int[256] m_crr;
465   int[256] m_cbb;
466   int[256] m_crg;
467   int[256] m_cbg;
468   ubyte* m_pScan_line_0;
469   ubyte* m_pScan_line_1;
470   jpgd_status m_error_code;
471   bool m_ready_flag;
472   int m_total_bytes_read;
473 
474 public:
475   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
476   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
477   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
478 
479   ~this () { free_all_blocks(); }
480 
481   @disable this (this); // no copies
482 
483   // Call this method after constructing the object to begin decompression.
484   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
485   int begin_decoding () {
486     if (m_ready_flag) return JPGD_SUCCESS;
487     if (m_error_code) return JPGD_FAILED;
488     try {
489       decode_start();
490       m_ready_flag = true;
491       return JPGD_SUCCESS;
492     } catch (Exception e) {
493       //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("ERROR: %.*s...\n", cast(int)e.msg.length, e.msg.ptr); }}
494       version(jpegd_test) {{ import std.stdio; stderr.writeln(e.toString); }}
495     }
496     return JPGD_FAILED;
497   }
498 
499   // Returns the next scan line.
500   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
501   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
502   // Returns JPGD_SUCCESS if a scan line has been returned.
503   // Returns JPGD_DONE if all scan lines have been returned.
504   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
505   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
506     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
507     if (m_total_lines_left == 0) return JPGD_DONE;
508     try {
509       if (m_mcu_lines_left == 0) {
510         if (m_progressive_flag) load_next_row(); else decode_next_row();
511         // Find the EOI marker if that was the last row.
512         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
513         m_mcu_lines_left = m_max_mcu_y_size;
514       }
515       if (m_freq_domain_chroma_upsample) {
516         expanded_convert();
517         *pScan_line = m_pScan_line_0;
518       } else {
519         switch (m_scan_type) {
520           case JPGD_YH2V2:
521             if ((m_mcu_lines_left & 1) == 0) {
522               H2V2Convert();
523               *pScan_line = m_pScan_line_0;
524             } else {
525               *pScan_line = m_pScan_line_1;
526             }
527             break;
528           case JPGD_YH2V1:
529             H2V1Convert();
530             *pScan_line = m_pScan_line_0;
531             break;
532           case JPGD_YH1V2:
533             if ((m_mcu_lines_left & 1) == 0) {
534               H1V2Convert();
535               *pScan_line = m_pScan_line_0;
536             } else {
537               *pScan_line = m_pScan_line_1;
538             }
539             break;
540           case JPGD_YH1V1:
541             H1V1Convert();
542             *pScan_line = m_pScan_line_0;
543             break;
544           case JPGD_GRAYSCALE:
545             gray_convert();
546             *pScan_line = m_pScan_line_0;
547             break;
548           default:
549         }
550       }
551       *pScan_line_len = m_real_dest_bytes_per_scan_line;
552       --m_mcu_lines_left;
553       --m_total_lines_left;
554       return JPGD_SUCCESS;
555     } catch (Exception) {}
556     return JPGD_FAILED;
557   }
558 
559   @property const pure nothrow @safe @nogc {
560     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
561 
562     int width () { pragma(inline, true); return m_image_x_size; }
563     int height () { pragma(inline, true); return m_image_y_size; }
564 
565     int num_components () { pragma(inline, true); return m_comps_in_frame; }
566 
567     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
568     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
569 
570     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
571     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
572   }
573 
574 private:
575   // Retrieve one character from the input stream.
576   uint get_char () {
577     // Any bytes remaining in buffer?
578     if (!m_in_buf_left) {
579       // Try to get more bytes.
580       prep_in_buffer();
581       // Still nothing to get?
582       if (!m_in_buf_left) {
583         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
584         int t = m_tem_flag;
585         m_tem_flag ^= 1;
586         return (t ? 0xD9 : 0xFF);
587       }
588     }
589     uint c = *m_pIn_buf_ofs++;
590     --m_in_buf_left;
591     return c;
592   }
593 
594   // Same as previous method, except can indicate if the character is a pad character or not.
595   uint get_char (bool* pPadding_flag) {
596     if (!m_in_buf_left) {
597       prep_in_buffer();
598       if (!m_in_buf_left) {
599         *pPadding_flag = true;
600         int t = m_tem_flag;
601         m_tem_flag ^= 1;
602         return (t ? 0xD9 : 0xFF);
603       }
604     }
605     *pPadding_flag = false;
606     uint c = *m_pIn_buf_ofs++;
607     --m_in_buf_left;
608     return c;
609   }
610 
611   // Inserts a previously retrieved character back into the input buffer.
612   void stuff_char (ubyte q) {
613     *(--m_pIn_buf_ofs) = q;
614     m_in_buf_left++;
615   }
616 
617   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
618   ubyte get_octet () {
619     bool padding_flag;
620     int c = get_char(&padding_flag);
621     if (c == 0xFF) {
622       if (padding_flag) return 0xFF;
623       c = get_char(&padding_flag);
624       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
625       if (c == 0x00) return 0xFF;
626       stuff_char(cast(ubyte)(c));
627       stuff_char(0xFF);
628       return 0xFF;
629     }
630     return cast(ubyte)(c);
631   }
632 
633   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
634   uint get_bits (int num_bits) {
635     if (!num_bits) return 0;
636     uint i = m_bit_buf >> (32 - num_bits);
637     if ((m_bits_left -= num_bits) <= 0) {
638       m_bit_buf <<= (num_bits += m_bits_left);
639       uint c1 = get_char();
640       uint c2 = get_char();
641       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
642       m_bit_buf <<= -m_bits_left;
643       m_bits_left += 16;
644       assert(m_bits_left >= 0);
645     } else {
646       m_bit_buf <<= num_bits;
647     }
648     return i;
649   }
650 
651   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
652   uint get_bits_no_markers (int num_bits) {
653     if (!num_bits) return 0;
654     uint i = m_bit_buf >> (32 - num_bits);
655     if ((m_bits_left -= num_bits) <= 0) {
656       m_bit_buf <<= (num_bits += m_bits_left);
657       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
658         uint c1 = get_octet();
659         uint c2 = get_octet();
660         m_bit_buf |= (c1 << 8) | c2;
661       } else {
662         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
663         m_in_buf_left -= 2;
664         m_pIn_buf_ofs += 2;
665       }
666       m_bit_buf <<= -m_bits_left;
667       m_bits_left += 16;
668       assert(m_bits_left >= 0);
669     } else {
670       m_bit_buf <<= num_bits;
671     }
672     return i;
673   }
674 
675   // Decodes a Huffman encoded symbol.
676   int huff_decode (huff_tables *pH) {
677     int symbol;
678     // Check first 8-bits: do we have a complete symbol?
679     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
680       // Decode more bits, use a tree traversal to find symbol.
681       int ofs = 23;
682       do {
683         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
684         --ofs;
685       } while (symbol < 0);
686       get_bits_no_markers(8 + (23 - ofs));
687     } else {
688       get_bits_no_markers(pH.code_size.ptr[symbol]);
689     }
690     return symbol;
691   }
692 
693   // Decodes a Huffman encoded symbol.
694   int huff_decode (huff_tables *pH, ref int extra_bits) {
695     int symbol;
696     // Check first 8-bits: do we have a complete symbol?
697     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
698       // Use a tree traversal to find symbol.
699       int ofs = 23;
700       do {
701         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
702         --ofs;
703       } while (symbol < 0);
704       get_bits_no_markers(8 + (23 - ofs));
705       extra_bits = get_bits_no_markers(symbol & 0xF);
706     } else {
707       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
708       if (symbol & 0x8000) {
709         get_bits_no_markers((symbol >> 8) & 31);
710         extra_bits = symbol >> 16;
711       } else {
712         int code_size = (symbol >> 8) & 31;
713         int num_extra_bits = symbol & 0xF;
714         int bits = code_size + num_extra_bits;
715         if (bits <= (m_bits_left + 16)) {
716           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
717         } else {
718           get_bits_no_markers(code_size);
719           extra_bits = get_bits_no_markers(num_extra_bits);
720         }
721       }
722       symbol &= 0xFF;
723     }
724     return symbol;
725   }
726 
727   // Tables and macro used to fully decode the DPCM differences.
728   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
729   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
730   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
731   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
732   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
733   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
734 
735   // Clamps a value between 0-255.
736   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
737   alias clamp = CLAMP;
738 
739   static struct DCT_Upsample {
740   static:
741     static struct Matrix44 {
742     pure nothrow @trusted @nogc:
743       alias Element_Type = int;
744       enum { NUM_ROWS = 4, NUM_COLS = 4 }
745 
746       Element_Type[NUM_COLS][NUM_ROWS] v;
747 
748       this() (in auto ref Matrix44 m) {
749         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
750       }
751 
752       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
753       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
754 
755       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
756 
757       ref Matrix44 opOpAssign(string op:"+") (in auto ref Matrix44 a) {
758         foreach (int r; 0..NUM_ROWS) {
759           at(r, 0) += a.at(r, 0);
760           at(r, 1) += a.at(r, 1);
761           at(r, 2) += a.at(r, 2);
762           at(r, 3) += a.at(r, 3);
763         }
764         return this;
765       }
766 
767       ref Matrix44 opOpAssign(string op:"-") (in auto ref Matrix44 a) {
768         foreach (int r; 0..NUM_ROWS) {
769           at(r, 0) -= a.at(r, 0);
770           at(r, 1) -= a.at(r, 1);
771           at(r, 2) -= a.at(r, 2);
772           at(r, 3) -= a.at(r, 3);
773         }
774         return this;
775       }
776 
777       Matrix44 opBinary(string op:"+") (in auto ref Matrix44 b) const {
778         alias a = this;
779         Matrix44 ret;
780         foreach (int r; 0..NUM_ROWS) {
781           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
782           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
783           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
784           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
785         }
786         return ret;
787       }
788 
789       Matrix44 opBinary(string op:"-") (in auto ref Matrix44 b) const {
790         alias a = this;
791         Matrix44 ret;
792         foreach (int r; 0..NUM_ROWS) {
793           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
794           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
795           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
796           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
797         }
798         return ret;
799       }
800 
801       static void add_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
802         foreach (int r; 0..4) {
803           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
804           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
805           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
806           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
807         }
808       }
809 
810       static void sub_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
811         foreach (int r; 0..4) {
812           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
813           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
814           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
815           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
816         }
817       }
818     }
819 
820     enum FRACT_BITS = 10;
821     enum SCALE = 1 << FRACT_BITS;
822 
823     alias Temp_Type = int;
824     //TODO: convert defines to mixins
825     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
826     //#define F(i) ((int)((i) * SCALE + .5f))
827     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
828     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
829 
830     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
831     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
832 
833     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
834     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
835       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
836         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
837         template AT(int c, int r) {
838           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
839         }
840         // 4x8 = 4x8 times 8x8, matrix 0 is constant
841         immutable Temp_Type X000 = mixin(AT!(0, 0));
842         immutable Temp_Type X001 = mixin(AT!(0, 1));
843         immutable Temp_Type X002 = mixin(AT!(0, 2));
844         immutable Temp_Type X003 = mixin(AT!(0, 3));
845         immutable Temp_Type X004 = mixin(AT!(0, 4));
846         immutable Temp_Type X005 = mixin(AT!(0, 5));
847         immutable Temp_Type X006 = mixin(AT!(0, 6));
848         immutable Temp_Type X007 = mixin(AT!(0, 7));
849         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
850         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
851         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
852         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
853         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
854         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
855         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
856         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
857         immutable Temp_Type X020 = mixin(AT!(4, 0));
858         immutable Temp_Type X021 = mixin(AT!(4, 1));
859         immutable Temp_Type X022 = mixin(AT!(4, 2));
860         immutable Temp_Type X023 = mixin(AT!(4, 3));
861         immutable Temp_Type X024 = mixin(AT!(4, 4));
862         immutable Temp_Type X025 = mixin(AT!(4, 5));
863         immutable Temp_Type X026 = mixin(AT!(4, 6));
864         immutable Temp_Type X027 = mixin(AT!(4, 7));
865         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
866         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
867         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
868         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
869         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
870         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
871         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
872         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
873 
874         // 4x4 = 4x8 times 8x4, matrix 1 is constant
875         P.at(0, 0) = X000;
876         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
877         P.at(0, 2) = X004;
878         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
879         P.at(1, 0) = X010;
880         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
881         P.at(1, 2) = X014;
882         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
883         P.at(2, 0) = X020;
884         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
885         P.at(2, 2) = X024;
886         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
887         P.at(3, 0) = X030;
888         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
889         P.at(3, 2) = X034;
890         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
891         // 40 muls 24 adds
892 
893         // 4x4 = 4x8 times 8x4, matrix 1 is constant
894         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
895         Q.at(0, 1) = X002;
896         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
897         Q.at(0, 3) = X006;
898         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
899         Q.at(1, 1) = X012;
900         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
901         Q.at(1, 3) = X016;
902         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
903         Q.at(2, 1) = X022;
904         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
905         Q.at(2, 3) = X026;
906         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
907         Q.at(3, 1) = X032;
908         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
909         Q.at(3, 3) = X036;
910         // 40 muls 24 adds
911       }
912     }
913 
914     static struct R_S(int NUM_ROWS, int NUM_COLS) {
915       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
916         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
917         template AT(int c, int r) {
918           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
919         }
920         // 4x8 = 4x8 times 8x8, matrix 0 is constant
921         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
922         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
923         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
924         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
925         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
926         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
927         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
928         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
929         immutable Temp_Type X110 = mixin(AT!(2, 0));
930         immutable Temp_Type X111 = mixin(AT!(2, 1));
931         immutable Temp_Type X112 = mixin(AT!(2, 2));
932         immutable Temp_Type X113 = mixin(AT!(2, 3));
933         immutable Temp_Type X114 = mixin(AT!(2, 4));
934         immutable Temp_Type X115 = mixin(AT!(2, 5));
935         immutable Temp_Type X116 = mixin(AT!(2, 6));
936         immutable Temp_Type X117 = mixin(AT!(2, 7));
937         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
938         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
939         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
940         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
941         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
942         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
943         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
944         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
945         immutable Temp_Type X130 = mixin(AT!(6, 0));
946         immutable Temp_Type X131 = mixin(AT!(6, 1));
947         immutable Temp_Type X132 = mixin(AT!(6, 2));
948         immutable Temp_Type X133 = mixin(AT!(6, 3));
949         immutable Temp_Type X134 = mixin(AT!(6, 4));
950         immutable Temp_Type X135 = mixin(AT!(6, 5));
951         immutable Temp_Type X136 = mixin(AT!(6, 6));
952         immutable Temp_Type X137 = mixin(AT!(6, 7));
953         // 80 muls 48 adds
954 
955         // 4x4 = 4x8 times 8x4, matrix 1 is constant
956         R.at(0, 0) = X100;
957         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
958         R.at(0, 2) = X104;
959         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
960         R.at(1, 0) = X110;
961         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
962         R.at(1, 2) = X114;
963         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
964         R.at(2, 0) = X120;
965         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
966         R.at(2, 2) = X124;
967         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
968         R.at(3, 0) = X130;
969         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
970         R.at(3, 2) = X134;
971         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
972         // 40 muls 24 adds
973         // 4x4 = 4x8 times 8x4, matrix 1 is constant
974         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
975         S.at(0, 1) = X102;
976         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
977         S.at(0, 3) = X106;
978         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
979         S.at(1, 1) = X112;
980         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
981         S.at(1, 3) = X116;
982         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
983         S.at(2, 1) = X122;
984         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
985         S.at(2, 3) = X126;
986         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
987         S.at(3, 1) = X132;
988         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
989         S.at(3, 3) = X136;
990         // 40 muls 24 adds
991       }
992     }
993   } // end namespace DCT_Upsample
994 
995   // Unconditionally frees all allocated m_blocks.
996   void free_all_blocks () {
997     //m_pStream = null;
998     readfn = null;
999     for (mem_block *b = m_pMem_blocks; b; ) {
1000       mem_block* n = b.m_pNext;
1001       jpgd_free(b);
1002       b = n;
1003     }
1004     m_pMem_blocks = null;
1005   }
1006 
1007   // This method handles all errors. It will never return.
1008   // It could easily be changed to use C++ exceptions.
1009   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status, size_t line=__LINE__) {
1010     m_error_code = status;
1011     free_all_blocks();
1012     //longjmp(m_jmp_state, status);
1013     throw new Exception("jpeg decoding error", __FILE__, line);
1014   }
1015 
1016   void* alloc (size_t nSize, bool zero=false) {
1017     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1018     char *rv = null;
1019     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1020     {
1021       if ((b.m_used_count + nSize) <= b.m_size)
1022       {
1023         rv = b.m_data.ptr + b.m_used_count;
1024         b.m_used_count += nSize;
1025         break;
1026       }
1027     }
1028     if (!rv)
1029     {
1030       size_t capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1031       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1032       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1033       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1034       b.m_used_count = nSize;
1035       b.m_size = capacity;
1036       rv = b.m_data.ptr;
1037     }
1038     if (zero) memset(rv, 0, nSize);
1039     return rv;
1040   }
1041 
1042   void word_clear (void *p, ushort c, uint n) {
1043     ubyte *pD = cast(ubyte*)p;
1044     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1045     while (n)
1046     {
1047       pD[0] = l; pD[1] = h; pD += 2;
1048       n--;
1049     }
1050   }
1051 
1052   // Refill the input buffer.
1053   // This method will sit in a loop until (A) the buffer is full or (B)
1054   // the stream's read() method reports and end of file condition.
1055   void prep_in_buffer () {
1056     m_in_buf_left = 0;
1057     m_pIn_buf_ofs = m_in_buf.ptr;
1058 
1059     if (m_eof_flag)
1060       return;
1061 
1062     do
1063     {
1064       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1065       if (bytes_read == -1)
1066         stop_decoding(JPGD_STREAM_READ);
1067 
1068       m_in_buf_left += bytes_read;
1069     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1070 
1071     m_total_bytes_read += m_in_buf_left;
1072 
1073     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1074     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1075     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1076   }
1077 
1078   // Read a Huffman code table.
1079   void read_dht_marker () {
1080     int i, index, count;
1081     ubyte[17] huff_num;
1082     ubyte[256] huff_val;
1083 
1084     uint num_left = get_bits(16);
1085 
1086     if (num_left < 2)
1087       stop_decoding(JPGD_BAD_DHT_MARKER);
1088 
1089     num_left -= 2;
1090 
1091     while (num_left)
1092     {
1093       index = get_bits(8);
1094 
1095       huff_num.ptr[0] = 0;
1096 
1097       count = 0;
1098 
1099       for (i = 1; i <= 16; i++)
1100       {
1101         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1102         count += huff_num.ptr[i];
1103       }
1104 
1105       if (count > 255)
1106         stop_decoding(JPGD_BAD_DHT_COUNTS);
1107 
1108       for (i = 0; i < count; i++)
1109         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1110 
1111       i = 1 + 16 + count;
1112 
1113       if (num_left < cast(uint)i)
1114         stop_decoding(JPGD_BAD_DHT_MARKER);
1115 
1116       num_left -= i;
1117 
1118       if ((index & 0x10) > 0x10)
1119         stop_decoding(JPGD_BAD_DHT_INDEX);
1120 
1121       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1122 
1123       if (index >= JPGD_MAX_HUFF_TABLES)
1124         stop_decoding(JPGD_BAD_DHT_INDEX);
1125 
1126       if (!m_huff_num.ptr[index])
1127         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1128 
1129       if (!m_huff_val.ptr[index])
1130         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1131 
1132       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1133       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1134       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1135     }
1136   }
1137 
1138   // Read a quantization table.
1139   void read_dqt_marker () {
1140     int n, i, prec;
1141     uint num_left;
1142     uint temp;
1143 
1144     num_left = get_bits(16);
1145 
1146     if (num_left < 2)
1147       stop_decoding(JPGD_BAD_DQT_MARKER);
1148 
1149     num_left -= 2;
1150 
1151     while (num_left)
1152     {
1153       n = get_bits(8);
1154       prec = n >> 4;
1155       n &= 0x0F;
1156 
1157       if (n >= JPGD_MAX_QUANT_TABLES)
1158         stop_decoding(JPGD_BAD_DQT_TABLE);
1159 
1160       if (!m_quant.ptr[n])
1161         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1162 
1163       // read quantization entries, in zag order
1164       for (i = 0; i < 64; i++)
1165       {
1166         temp = get_bits(8);
1167 
1168         if (prec)
1169           temp = (temp << 8) + get_bits(8);
1170 
1171         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1172       }
1173 
1174       i = 64 + 1;
1175 
1176       if (prec)
1177         i += 64;
1178 
1179       if (num_left < cast(uint)i)
1180         stop_decoding(JPGD_BAD_DQT_LENGTH);
1181 
1182       num_left -= i;
1183     }
1184   }
1185 
1186   // Read the start of frame (SOF) marker.
1187   void read_sof_marker () {
1188     int i;
1189     uint num_left;
1190 
1191     num_left = get_bits(16);
1192 
1193     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1194       stop_decoding(JPGD_BAD_PRECISION);
1195 
1196     m_image_y_size = get_bits(16);
1197 
1198     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1199       stop_decoding(JPGD_BAD_HEIGHT);
1200 
1201     m_image_x_size = get_bits(16);
1202 
1203     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1204       stop_decoding(JPGD_BAD_WIDTH);
1205 
1206     m_comps_in_frame = get_bits(8);
1207 
1208     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1209       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1210 
1211     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1212       stop_decoding(JPGD_BAD_SOF_LENGTH);
1213 
1214     for (i = 0; i < m_comps_in_frame; i++)
1215     {
1216       m_comp_ident.ptr[i]  = get_bits(8);
1217       m_comp_h_samp.ptr[i] = get_bits(4);
1218       m_comp_v_samp.ptr[i] = get_bits(4);
1219       m_comp_quant.ptr[i]  = get_bits(8);
1220     }
1221   }
1222 
1223   // Used to skip unrecognized markers.
1224   void skip_variable_marker () {
1225     uint num_left;
1226 
1227     num_left = get_bits(16);
1228 
1229     if (num_left < 2)
1230       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1231 
1232     num_left -= 2;
1233 
1234     while (num_left)
1235     {
1236       get_bits(8);
1237       num_left--;
1238     }
1239   }
1240 
1241   // Read a define restart interval (DRI) marker.
1242   void read_dri_marker () {
1243     if (get_bits(16) != 4)
1244       stop_decoding(JPGD_BAD_DRI_LENGTH);
1245 
1246     m_restart_interval = get_bits(16);
1247   }
1248 
1249   // Read a start of scan (SOS) marker.
1250   void read_sos_marker () {
1251     uint num_left;
1252     int i, ci, n, c, cc;
1253 
1254     num_left = get_bits(16);
1255 
1256     n = get_bits(8);
1257 
1258     m_comps_in_scan = n;
1259 
1260     num_left -= 3;
1261 
1262     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1263       stop_decoding(JPGD_BAD_SOS_LENGTH);
1264 
1265     for (i = 0; i < n; i++)
1266     {
1267       cc = get_bits(8);
1268       c = get_bits(8);
1269       num_left -= 2;
1270 
1271       for (ci = 0; ci < m_comps_in_frame; ci++)
1272         if (cc == m_comp_ident.ptr[ci])
1273           break;
1274 
1275       if (ci >= m_comps_in_frame)
1276         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1277 
1278       m_comp_list.ptr[i]    = ci;
1279       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1280       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1281     }
1282 
1283     m_spectral_start  = get_bits(8);
1284     m_spectral_end    = get_bits(8);
1285     m_successive_high = get_bits(4);
1286     m_successive_low  = get_bits(4);
1287 
1288     if (!m_progressive_flag)
1289     {
1290       m_spectral_start = 0;
1291       m_spectral_end = 63;
1292     }
1293 
1294     num_left -= 3;
1295 
1296     /* read past whatever is num_left */
1297     while (num_left)
1298     {
1299       get_bits(8);
1300       num_left--;
1301     }
1302   }
1303 
1304   // Finds the next marker.
1305   int next_marker () {
1306     uint c, bytes;
1307 
1308     bytes = 0;
1309 
1310     do
1311     {
1312       do
1313       {
1314         bytes++;
1315         c = get_bits(8);
1316       } while (c != 0xFF);
1317 
1318       do
1319       {
1320         c = get_bits(8);
1321       } while (c == 0xFF);
1322 
1323     } while (c == 0);
1324 
1325     // If bytes > 0 here, there where extra bytes before the marker (not good).
1326 
1327     return c;
1328   }
1329 
1330   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1331   // encountered.
1332   int process_markers () {
1333     int c;
1334 
1335     for ( ; ; ) {
1336       c = next_marker();
1337 
1338       switch (c)
1339       {
1340         case M_SOF0:
1341         case M_SOF1:
1342         case M_SOF2:
1343         case M_SOF3:
1344         case M_SOF5:
1345         case M_SOF6:
1346         case M_SOF7:
1347         //case M_JPG:
1348         case M_SOF9:
1349         case M_SOF10:
1350         case M_SOF11:
1351         case M_SOF13:
1352         case M_SOF14:
1353         case M_SOF15:
1354         case M_SOI:
1355         case M_EOI:
1356         case M_SOS:
1357           return c;
1358         case M_DHT:
1359           read_dht_marker();
1360           break;
1361         // No arithmitic support - dumb patents!
1362         case M_DAC:
1363           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1364           break;
1365         case M_DQT:
1366           read_dqt_marker();
1367           break;
1368         case M_DRI:
1369           read_dri_marker();
1370           break;
1371         //case M_APP0:  /* no need to read the JFIF marker */
1372 
1373         case M_JPG:
1374         case M_RST0:    /* no parameters */
1375         case M_RST1:
1376         case M_RST2:
1377         case M_RST3:
1378         case M_RST4:
1379         case M_RST5:
1380         case M_RST6:
1381         case M_RST7:
1382         case M_TEM:
1383           stop_decoding(JPGD_UNEXPECTED_MARKER);
1384           break;
1385         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1386           skip_variable_marker();
1387           break;
1388       }
1389     }
1390   }
1391 
1392   // Finds the start of image (SOI) marker.
1393   // This code is rather defensive: it only checks the first 512 bytes to avoid
1394   // false positives.
1395   void locate_soi_marker () {
1396     uint lastchar, thischar;
1397     uint bytesleft;
1398 
1399     lastchar = get_bits(8);
1400 
1401     thischar = get_bits(8);
1402 
1403     /* ok if it's a normal JPEG file without a special header */
1404 
1405     if ((lastchar == 0xFF) && (thischar == M_SOI))
1406       return;
1407 
1408     bytesleft = 4096; //512;
1409 
1410     for ( ; ; )
1411     {
1412       if (--bytesleft == 0)
1413         stop_decoding(JPGD_NOT_JPEG);
1414 
1415       lastchar = thischar;
1416 
1417       thischar = get_bits(8);
1418 
1419       if (lastchar == 0xFF)
1420       {
1421         if (thischar == M_SOI)
1422           break;
1423         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1424           stop_decoding(JPGD_NOT_JPEG);
1425       }
1426     }
1427 
1428     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1429     thischar = (m_bit_buf >> 24) & 0xFF;
1430 
1431     if (thischar != 0xFF)
1432       stop_decoding(JPGD_NOT_JPEG);
1433   }
1434 
1435   // Find a start of frame (SOF) marker.
1436   void locate_sof_marker () {
1437     locate_soi_marker();
1438 
1439     int c = process_markers();
1440 
1441     switch (c)
1442     {
1443       case M_SOF2:
1444         m_progressive_flag = true;
1445         goto case;
1446       case M_SOF0:  /* baseline DCT */
1447       case M_SOF1:  /* extended sequential DCT */
1448         read_sof_marker();
1449         break;
1450       case M_SOF9:  /* Arithmitic coding */
1451         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1452         break;
1453       default:
1454         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1455         break;
1456     }
1457   }
1458 
1459   // Find a start of scan (SOS) marker.
1460   int locate_sos_marker () {
1461     int c;
1462 
1463     c = process_markers();
1464 
1465     if (c == M_EOI)
1466       return false;
1467     else if (c != M_SOS)
1468       stop_decoding(JPGD_UNEXPECTED_MARKER);
1469 
1470     read_sos_marker();
1471 
1472     return true;
1473   }
1474 
1475   // Reset everything to default/uninitialized state.
1476   void initit (JpegStreamReadFunc rfn) {
1477     m_pMem_blocks = null;
1478     m_error_code = JPGD_SUCCESS;
1479     m_ready_flag = false;
1480     m_image_x_size = m_image_y_size = 0;
1481     readfn = rfn;
1482     m_progressive_flag = false;
1483 
1484     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1485     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1486     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1487     memset(m_quant.ptr, 0, m_quant.sizeof);
1488 
1489     m_scan_type = 0;
1490     m_comps_in_frame = 0;
1491 
1492     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1493     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1494     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1495     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1496     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1497     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1498 
1499     m_comps_in_scan = 0;
1500     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1501     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1502     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1503 
1504     m_spectral_start = 0;
1505     m_spectral_end = 0;
1506     m_successive_low = 0;
1507     m_successive_high = 0;
1508     m_max_mcu_x_size = 0;
1509     m_max_mcu_y_size = 0;
1510     m_blocks_per_mcu = 0;
1511     m_max_blocks_per_row = 0;
1512     m_mcus_per_row = 0;
1513     m_mcus_per_col = 0;
1514     m_expanded_blocks_per_component = 0;
1515     m_expanded_blocks_per_mcu = 0;
1516     m_expanded_blocks_per_row = 0;
1517     m_freq_domain_chroma_upsample = false;
1518 
1519     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1520 
1521     m_total_lines_left = 0;
1522     m_mcu_lines_left = 0;
1523     m_real_dest_bytes_per_scan_line = 0;
1524     m_dest_bytes_per_scan_line = 0;
1525     m_dest_bytes_per_pixel = 0;
1526 
1527     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1528 
1529     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1530     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1531     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1532 
1533     m_eob_run = 0;
1534 
1535     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1536 
1537     m_pIn_buf_ofs = m_in_buf.ptr;
1538     m_in_buf_left = 0;
1539     m_eof_flag = false;
1540     m_tem_flag = 0;
1541 
1542     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1543     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1544     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1545 
1546     m_restart_interval = 0;
1547     m_restarts_left    = 0;
1548     m_next_restart_num = 0;
1549 
1550     m_max_mcus_per_row = 0;
1551     m_max_blocks_per_mcu = 0;
1552     m_max_mcus_per_col = 0;
1553 
1554     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1555     m_pMCU_coefficients = null;
1556     m_pSample_buf = null;
1557 
1558     m_total_bytes_read = 0;
1559 
1560     m_pScan_line_0 = null;
1561     m_pScan_line_1 = null;
1562 
1563     // Ready the input buffer.
1564     prep_in_buffer();
1565 
1566     // Prime the bit buffer.
1567     m_bits_left = 16;
1568     m_bit_buf = 0;
1569 
1570     get_bits(16);
1571     get_bits(16);
1572 
1573     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1574       m_mcu_block_max_zag.ptr[i] = 64;
1575   }
1576 
1577   enum SCALEBITS = 16;
1578   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1579   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1580 
1581   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1582   void create_look_ups () {
1583     for (int i = 0; i <= 255; i++)
1584     {
1585       int k = i - 128;
1586       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1587       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1588       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1589       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1590     }
1591   }
1592 
1593   // This method throws back into the stream any bytes that where read
1594   // into the bit buffer during initial marker scanning.
1595   void fix_in_buffer () {
1596     // In case any 0xFF's where pulled into the buffer during marker scanning.
1597     assert((m_bits_left & 7) == 0);
1598 
1599     if (m_bits_left == 16)
1600       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1601 
1602     if (m_bits_left >= 8)
1603       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1604 
1605     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1606     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1607 
1608     m_bits_left = 16;
1609     get_bits_no_markers(16);
1610     get_bits_no_markers(16);
1611   }
1612 
1613   void transform_mcu (int mcu_row) {
1614     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1615     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1616 
1617     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1618     {
1619       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1620       pSrc_ptr += 64;
1621       pDst_ptr += 64;
1622     }
1623   }
1624 
1625   static immutable ubyte[64] s_max_rc = [
1626     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1627     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1628     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1629     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1630   ];
1631 
1632   void transform_mcu_expand (int mcu_row) {
1633     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1634     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1635 
1636     // Y IDCT
1637     int mcu_block;
1638     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1639     {
1640       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1641       pSrc_ptr += 64;
1642       pDst_ptr += 64;
1643     }
1644 
1645     // Chroma IDCT, with upsampling
1646     jpgd_block_t[64] temp_block;
1647 
1648     for (int i = 0; i < 2; i++)
1649     {
1650       DCT_Upsample.Matrix44 P, Q, R, S;
1651 
1652       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1653       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1654 
1655       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1656       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1657       switch (s_max_rc.ptr[max_zag])
1658       {
1659       case 1*16+1:
1660         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1661         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1662         break;
1663       case 1*16+2:
1664         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1665         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1666         break;
1667       case 2*16+2:
1668         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1669         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1670         break;
1671       case 3*16+2:
1672         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1673         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1674         break;
1675       case 3*16+3:
1676         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1677         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1678         break;
1679       case 3*16+4:
1680         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1681         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1682         break;
1683       case 4*16+4:
1684         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1685         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1686         break;
1687       case 5*16+4:
1688         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1689         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1690         break;
1691       case 5*16+5:
1692         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1693         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1694         break;
1695       case 5*16+6:
1696         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1697         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1698         break;
1699       case 6*16+6:
1700         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1701         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1702         break;
1703       case 7*16+6:
1704         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1705         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1706         break;
1707       case 7*16+7:
1708         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1709         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1710         break;
1711       case 7*16+8:
1712         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1713         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1714         break;
1715       case 8*16+8:
1716         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1717         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1718         break;
1719       default:
1720         assert(false);
1721       }
1722 
1723       auto a = DCT_Upsample.Matrix44(P + Q);
1724       P -= Q;
1725       DCT_Upsample.Matrix44* b = &P;
1726       auto c = DCT_Upsample.Matrix44(R + S);
1727       R -= S;
1728       DCT_Upsample.Matrix44* d = &R;
1729 
1730       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1731       idct_4x4(temp_block.ptr, pDst_ptr);
1732       pDst_ptr += 64;
1733 
1734       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1735       idct_4x4(temp_block.ptr, pDst_ptr);
1736       pDst_ptr += 64;
1737 
1738       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1739       idct_4x4(temp_block.ptr, pDst_ptr);
1740       pDst_ptr += 64;
1741 
1742       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1743       idct_4x4(temp_block.ptr, pDst_ptr);
1744       pDst_ptr += 64;
1745 
1746       pSrc_ptr += 64;
1747     }
1748   }
1749 
1750   // Loads and dequantizes the next row of (already decoded) coefficients.
1751   // Progressive images only.
1752   void load_next_row () {
1753     int i;
1754     jpgd_block_t *p;
1755     jpgd_quant_t *q;
1756     int mcu_row, mcu_block, row_block = 0;
1757     int component_num, component_id;
1758     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1759 
1760     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1761 
1762     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1763     {
1764       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1765 
1766       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1767       {
1768         component_id = m_mcu_org.ptr[mcu_block];
1769         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1770 
1771         p = m_pMCU_coefficients + 64 * mcu_block;
1772 
1773         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1774         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1775         p[0] = pDC[0];
1776         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1777 
1778         for (i = 63; i > 0; i--)
1779           if (p[g_ZAG[i]])
1780             break;
1781 
1782         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1783 
1784         for ( ; i >= 0; i--)
1785           if (p[g_ZAG[i]])
1786             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1787 
1788         row_block++;
1789 
1790         if (m_comps_in_scan == 1)
1791           block_x_mcu.ptr[component_id]++;
1792         else
1793         {
1794           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1795           {
1796             block_x_mcu_ofs = 0;
1797 
1798             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1799             {
1800               block_y_mcu_ofs = 0;
1801 
1802               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1803             }
1804           }
1805         }
1806       }
1807 
1808       if (m_freq_domain_chroma_upsample)
1809         transform_mcu_expand(mcu_row);
1810       else
1811         transform_mcu(mcu_row);
1812     }
1813 
1814     if (m_comps_in_scan == 1)
1815       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1816     else
1817     {
1818       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1819       {
1820         component_id = m_comp_list.ptr[component_num];
1821 
1822         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1823       }
1824     }
1825   }
1826 
1827   // Restart interval processing.
1828   void process_restart () {
1829     int i;
1830     int c = 0;
1831 
1832     // Align to a byte boundry
1833     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1834     //get_bits_no_markers(m_bits_left & 7);
1835 
1836     // Let's scan a little bit to find the marker, but not _too_ far.
1837     // 1536 is a "fudge factor" that determines how much to scan.
1838     for (i = 1536; i > 0; i--)
1839       if (get_char() == 0xFF)
1840         break;
1841 
1842     if (i == 0)
1843       stop_decoding(JPGD_BAD_RESTART_MARKER);
1844 
1845     for ( ; i > 0; i--)
1846       if ((c = get_char()) != 0xFF)
1847         break;
1848 
1849     if (i == 0)
1850       stop_decoding(JPGD_BAD_RESTART_MARKER);
1851 
1852     // Is it the expected marker? If not, something bad happened.
1853     if (c != (m_next_restart_num + M_RST0))
1854       stop_decoding(JPGD_BAD_RESTART_MARKER);
1855 
1856     // Reset each component's DC prediction values.
1857     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1858 
1859     m_eob_run = 0;
1860 
1861     m_restarts_left = m_restart_interval;
1862 
1863     m_next_restart_num = (m_next_restart_num + 1) & 7;
1864 
1865     // Get the bit buffer going again...
1866 
1867     m_bits_left = 16;
1868     get_bits_no_markers(16);
1869     get_bits_no_markers(16);
1870   }
1871 
1872   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1873 
1874   // Decodes and dequantizes the next row of coefficients.
1875   void decode_next_row () {
1876     int row_block = 0;
1877 
1878     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1879     {
1880       if ((m_restart_interval) && (m_restarts_left == 0))
1881         process_restart();
1882 
1883       jpgd_block_t* p = m_pMCU_coefficients;
1884       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1885       {
1886         int component_id = m_mcu_org.ptr[mcu_block];
1887         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1888 
1889         int r, s;
1890         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1891         s = JPGD_HUFF_EXTEND(r, s);
1892 
1893         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1894 
1895         p[0] = cast(jpgd_block_t)(s * q[0]);
1896 
1897         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1898 
1899         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1900 
1901         int k;
1902         for (k = 1; k < 64; k++)
1903         {
1904           int extra_bits;
1905           s = huff_decode(pH, extra_bits);
1906 
1907           r = s >> 4;
1908           s &= 15;
1909 
1910           if (s)
1911           {
1912             if (r)
1913             {
1914               if ((k + r) > 63)
1915                 stop_decoding(JPGD_DECODE_ERROR);
1916 
1917               if (k < prev_num_set)
1918               {
1919                 int n = JPGD_MIN(r, prev_num_set - k);
1920                 int kt = k;
1921                 while (n--)
1922                   p[g_ZAG[kt++]] = 0;
1923               }
1924 
1925               k += r;
1926             }
1927 
1928             s = JPGD_HUFF_EXTEND(extra_bits, s);
1929 
1930             assert(k < 64);
1931 
1932             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1933           }
1934           else
1935           {
1936             if (r == 15)
1937             {
1938               if ((k + 16) > 64)
1939                 stop_decoding(JPGD_DECODE_ERROR);
1940 
1941               if (k < prev_num_set)
1942               {
1943                 int n = JPGD_MIN(16, prev_num_set - k);
1944                 int kt = k;
1945                 while (n--)
1946                 {
1947                   assert(kt <= 63);
1948                   p[g_ZAG[kt++]] = 0;
1949                 }
1950               }
1951 
1952               k += 16 - 1; // - 1 because the loop counter is k
1953               assert(p[g_ZAG[k]] == 0);
1954             }
1955             else
1956               break;
1957           }
1958         }
1959 
1960         if (k < prev_num_set)
1961         {
1962           int kt = k;
1963           while (kt < prev_num_set)
1964             p[g_ZAG[kt++]] = 0;
1965         }
1966 
1967         m_mcu_block_max_zag.ptr[mcu_block] = k;
1968 
1969         row_block++;
1970       }
1971 
1972       if (m_freq_domain_chroma_upsample)
1973         transform_mcu_expand(mcu_row);
1974       else
1975         transform_mcu(mcu_row);
1976 
1977       m_restarts_left--;
1978     }
1979   }
1980 
1981   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1982   void H1V1Convert () {
1983     int row = m_max_mcu_y_size - m_mcu_lines_left;
1984     ubyte *d = m_pScan_line_0;
1985     ubyte *s = m_pSample_buf + row * 8;
1986 
1987     for (int i = m_max_mcus_per_row; i > 0; i--)
1988     {
1989       for (int j = 0; j < 8; j++)
1990       {
1991         int y = s[j];
1992         int cb = s[64+j];
1993         int cr = s[128+j];
1994 
1995         d[0] = clamp(y + m_crr.ptr[cr]);
1996         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
1997         d[2] = clamp(y + m_cbb.ptr[cb]);
1998         d[3] = 255;
1999 
2000         d += 4;
2001       }
2002 
2003       s += 64*3;
2004     }
2005   }
2006 
2007   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2008   void H2V1Convert () {
2009     int row = m_max_mcu_y_size - m_mcu_lines_left;
2010     ubyte *d0 = m_pScan_line_0;
2011     ubyte *y = m_pSample_buf + row * 8;
2012     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2013 
2014     for (int i = m_max_mcus_per_row; i > 0; i--)
2015     {
2016       for (int l = 0; l < 2; l++)
2017       {
2018         for (int j = 0; j < 4; j++)
2019         {
2020           int cb = c[0];
2021           int cr = c[64];
2022 
2023           int rc = m_crr.ptr[cr];
2024           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2025           int bc = m_cbb.ptr[cb];
2026 
2027           int yy = y[j<<1];
2028           d0[0] = clamp(yy+rc);
2029           d0[1] = clamp(yy+gc);
2030           d0[2] = clamp(yy+bc);
2031           d0[3] = 255;
2032 
2033           yy = y[(j<<1)+1];
2034           d0[4] = clamp(yy+rc);
2035           d0[5] = clamp(yy+gc);
2036           d0[6] = clamp(yy+bc);
2037           d0[7] = 255;
2038 
2039           d0 += 8;
2040 
2041           c++;
2042         }
2043         y += 64;
2044       }
2045 
2046       y += 64*4 - 64*2;
2047       c += 64*4 - 8;
2048     }
2049   }
2050 
2051   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2052   void H1V2Convert () {
2053     int row = m_max_mcu_y_size - m_mcu_lines_left;
2054     ubyte *d0 = m_pScan_line_0;
2055     ubyte *d1 = m_pScan_line_1;
2056     ubyte *y;
2057     ubyte *c;
2058 
2059     if (row < 8)
2060       y = m_pSample_buf + row * 8;
2061     else
2062       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2063 
2064     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2065 
2066     for (int i = m_max_mcus_per_row; i > 0; i--)
2067     {
2068       for (int j = 0; j < 8; j++)
2069       {
2070         int cb = c[0+j];
2071         int cr = c[64+j];
2072 
2073         int rc = m_crr.ptr[cr];
2074         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2075         int bc = m_cbb.ptr[cb];
2076 
2077         int yy = y[j];
2078         d0[0] = clamp(yy+rc);
2079         d0[1] = clamp(yy+gc);
2080         d0[2] = clamp(yy+bc);
2081         d0[3] = 255;
2082 
2083         yy = y[8+j];
2084         d1[0] = clamp(yy+rc);
2085         d1[1] = clamp(yy+gc);
2086         d1[2] = clamp(yy+bc);
2087         d1[3] = 255;
2088 
2089         d0 += 4;
2090         d1 += 4;
2091       }
2092 
2093       y += 64*4;
2094       c += 64*4;
2095     }
2096   }
2097 
2098   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2099   void H2V2Convert () {
2100     int row = m_max_mcu_y_size - m_mcu_lines_left;
2101     ubyte *d0 = m_pScan_line_0;
2102     ubyte *d1 = m_pScan_line_1;
2103     ubyte *y;
2104     ubyte *c;
2105 
2106     if (row < 8)
2107       y = m_pSample_buf + row * 8;
2108     else
2109       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2110 
2111     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2112 
2113     for (int i = m_max_mcus_per_row; i > 0; i--)
2114     {
2115       for (int l = 0; l < 2; l++)
2116       {
2117         for (int j = 0; j < 8; j += 2)
2118         {
2119           int cb = c[0];
2120           int cr = c[64];
2121 
2122           int rc = m_crr.ptr[cr];
2123           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2124           int bc = m_cbb.ptr[cb];
2125 
2126           int yy = y[j];
2127           d0[0] = clamp(yy+rc);
2128           d0[1] = clamp(yy+gc);
2129           d0[2] = clamp(yy+bc);
2130           d0[3] = 255;
2131 
2132           yy = y[j+1];
2133           d0[4] = clamp(yy+rc);
2134           d0[5] = clamp(yy+gc);
2135           d0[6] = clamp(yy+bc);
2136           d0[7] = 255;
2137 
2138           yy = y[j+8];
2139           d1[0] = clamp(yy+rc);
2140           d1[1] = clamp(yy+gc);
2141           d1[2] = clamp(yy+bc);
2142           d1[3] = 255;
2143 
2144           yy = y[j+8+1];
2145           d1[4] = clamp(yy+rc);
2146           d1[5] = clamp(yy+gc);
2147           d1[6] = clamp(yy+bc);
2148           d1[7] = 255;
2149 
2150           d0 += 8;
2151           d1 += 8;
2152 
2153           c++;
2154         }
2155         y += 64;
2156       }
2157 
2158       y += 64*6 - 64*2;
2159       c += 64*6 - 8;
2160     }
2161   }
2162 
2163   // Y (1 block per MCU) to 8-bit grayscale
2164   void gray_convert () {
2165     int row = m_max_mcu_y_size - m_mcu_lines_left;
2166     ubyte *d = m_pScan_line_0;
2167     ubyte *s = m_pSample_buf + row * 8;
2168 
2169     for (int i = m_max_mcus_per_row; i > 0; i--)
2170     {
2171       *cast(uint*)d = *cast(uint*)s;
2172       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2173 
2174       s += 64;
2175       d += 8;
2176     }
2177   }
2178 
2179   void expanded_convert () {
2180     int row = m_max_mcu_y_size - m_mcu_lines_left;
2181 
2182     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2183 
2184     ubyte* d = m_pScan_line_0;
2185 
2186     for (int i = m_max_mcus_per_row; i > 0; i--)
2187     {
2188       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2189       {
2190         immutable int Y_ofs = k * 8;
2191         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2192         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2193         for (int j = 0; j < 8; j++)
2194         {
2195           int y = Py[Y_ofs + j];
2196           int cb = Py[Cb_ofs + j];
2197           int cr = Py[Cr_ofs + j];
2198 
2199           d[0] = clamp(y + m_crr.ptr[cr]);
2200           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2201           d[2] = clamp(y + m_cbb.ptr[cb]);
2202           d[3] = 255;
2203 
2204           d += 4;
2205         }
2206       }
2207 
2208       Py += 64 * m_expanded_blocks_per_mcu;
2209     }
2210   }
2211 
2212   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2213   void find_eoi () {
2214     if (!m_progressive_flag)
2215     {
2216       // Attempt to read the EOI marker.
2217       //get_bits_no_markers(m_bits_left & 7);
2218 
2219       // Prime the bit buffer
2220       m_bits_left = 16;
2221       get_bits(16);
2222       get_bits(16);
2223 
2224       // The next marker _should_ be EOI
2225       process_markers();
2226     }
2227 
2228     m_total_bytes_read -= m_in_buf_left;
2229   }
2230 
2231   // Creates the tables needed for efficient Huffman decoding.
2232   void make_huff_table (int index, huff_tables *pH) {
2233     int p, i, l, si;
2234     ubyte[257] huffsize;
2235     uint[257] huffcode;
2236     uint code;
2237     uint subtree;
2238     int code_size;
2239     int lastp;
2240     int nextfreeentry;
2241     int currententry;
2242 
2243     pH.ac_table = m_huff_ac.ptr[index] != 0;
2244 
2245     p = 0;
2246 
2247     for (l = 1; l <= 16; l++)
2248     {
2249       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2250         huffsize.ptr[p++] = cast(ubyte)(l);
2251     }
2252 
2253     huffsize.ptr[p] = 0;
2254 
2255     lastp = p;
2256 
2257     code = 0;
2258     si = huffsize.ptr[0];
2259     p = 0;
2260 
2261     while (huffsize.ptr[p])
2262     {
2263       while (huffsize.ptr[p] == si)
2264       {
2265         huffcode.ptr[p++] = code;
2266         code++;
2267       }
2268 
2269       code <<= 1;
2270       si++;
2271     }
2272 
2273     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2274     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2275     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2276     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2277 
2278     nextfreeentry = -1;
2279 
2280     p = 0;
2281 
2282     while (p < lastp)
2283     {
2284       i = m_huff_val.ptr[index][p];
2285       code = huffcode.ptr[p];
2286       code_size = huffsize.ptr[p];
2287 
2288       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2289 
2290       if (code_size <= 8)
2291       {
2292         code <<= (8 - code_size);
2293 
2294         for (l = 1 << (8 - code_size); l > 0; l--)
2295         {
2296           assert(i < 256);
2297 
2298           pH.look_up.ptr[code] = i;
2299 
2300           bool has_extrabits = false;
2301           int extra_bits = 0;
2302           int num_extra_bits = i & 15;
2303 
2304           int bits_to_fetch = code_size;
2305           if (num_extra_bits)
2306           {
2307             int total_codesize = code_size + num_extra_bits;
2308             if (total_codesize <= 8)
2309             {
2310               has_extrabits = true;
2311               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2312               assert(extra_bits <= 0x7FFF);
2313               bits_to_fetch += num_extra_bits;
2314             }
2315           }
2316 
2317           if (!has_extrabits)
2318             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2319           else
2320             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2321 
2322           code++;
2323         }
2324       }
2325       else
2326       {
2327         subtree = (code >> (code_size - 8)) & 0xFF;
2328 
2329         currententry = pH.look_up.ptr[subtree];
2330 
2331         if (currententry == 0)
2332         {
2333           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2334           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2335 
2336           nextfreeentry -= 2;
2337         }
2338 
2339         code <<= (16 - (code_size - 8));
2340 
2341         for (l = code_size; l > 9; l--)
2342         {
2343           if ((code & 0x8000) == 0)
2344             currententry--;
2345 
2346           if (pH.tree.ptr[-currententry - 1] == 0)
2347           {
2348             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2349 
2350             currententry = nextfreeentry;
2351 
2352             nextfreeentry -= 2;
2353           }
2354           else
2355             currententry = pH.tree.ptr[-currententry - 1];
2356 
2357           code <<= 1;
2358         }
2359 
2360         if ((code & 0x8000) == 0)
2361           currententry--;
2362 
2363         pH.tree.ptr[-currententry - 1] = i;
2364       }
2365 
2366       p++;
2367     }
2368   }
2369 
2370   // Verifies the quantization tables needed for this scan are available.
2371   void check_quant_tables () {
2372     for (int i = 0; i < m_comps_in_scan; i++)
2373       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2374         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2375   }
2376 
2377   // Verifies that all the Huffman tables needed for this scan are available.
2378   void check_huff_tables () {
2379     for (int i = 0; i < m_comps_in_scan; i++)
2380     {
2381       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2382         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2383 
2384       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2385         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2386     }
2387 
2388     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2389       if (m_huff_num.ptr[i])
2390       {
2391         if (!m_pHuff_tabs.ptr[i])
2392           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2393 
2394         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2395       }
2396   }
2397 
2398   // Determines the component order inside each MCU.
2399   // Also calcs how many MCU's are on each row, etc.
2400   void calc_mcu_block_order () {
2401     int component_num, component_id;
2402     int max_h_samp = 0, max_v_samp = 0;
2403 
2404     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2405     {
2406       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2407         max_h_samp = m_comp_h_samp.ptr[component_id];
2408 
2409       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2410         max_v_samp = m_comp_v_samp.ptr[component_id];
2411     }
2412 
2413     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2414     {
2415       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2416       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2417     }
2418 
2419     if (m_comps_in_scan == 1)
2420     {
2421       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2422       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2423     }
2424     else
2425     {
2426       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2427       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2428     }
2429 
2430     if (m_comps_in_scan == 1)
2431     {
2432       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2433 
2434       m_blocks_per_mcu = 1;
2435     }
2436     else
2437     {
2438       m_blocks_per_mcu = 0;
2439 
2440       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2441       {
2442         int num_blocks;
2443 
2444         component_id = m_comp_list.ptr[component_num];
2445 
2446         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2447 
2448         while (num_blocks--)
2449           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2450       }
2451     }
2452   }
2453 
2454   // Starts a new scan.
2455   int init_scan () {
2456     if (!locate_sos_marker())
2457       return false;
2458 
2459     calc_mcu_block_order();
2460 
2461     check_huff_tables();
2462 
2463     check_quant_tables();
2464 
2465     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2466 
2467     m_eob_run = 0;
2468 
2469     if (m_restart_interval)
2470     {
2471       m_restarts_left = m_restart_interval;
2472       m_next_restart_num = 0;
2473     }
2474 
2475     fix_in_buffer();
2476 
2477     return true;
2478   }
2479 
2480   // Starts a frame. Determines if the number of components or sampling factors
2481   // are supported.
2482   void init_frame () {
2483     int i;
2484 
2485     if (m_comps_in_frame == 1)
2486     {
2487       version(jpegd_test) {{ import std.stdio; stderr.writeln("m_comp_h_samp=", m_comp_h_samp.ptr[0], "; m_comp_v_samp=", m_comp_v_samp.ptr[0]); }}
2488 
2489       //if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2490       //  stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2491 
2492       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2493       {
2494         m_scan_type = JPGD_GRAYSCALE;
2495         m_max_blocks_per_mcu = 1;
2496         m_max_mcu_x_size = 8;
2497         m_max_mcu_y_size = 8;
2498       }
2499       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2500       {
2501         //k8: i added this, and i absolutely don't know what it means; but it decoded two sample images i found
2502         m_scan_type = JPGD_GRAYSCALE;
2503         m_max_blocks_per_mcu = 4;
2504         m_max_mcu_x_size = 8;
2505         m_max_mcu_y_size = 8;
2506       }
2507       else
2508         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2509     }
2510     else if (m_comps_in_frame == 3)
2511     {
2512       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2513            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2514         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2515 
2516       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2517       {
2518         m_scan_type = JPGD_YH1V1;
2519 
2520         m_max_blocks_per_mcu = 3;
2521         m_max_mcu_x_size = 8;
2522         m_max_mcu_y_size = 8;
2523       }
2524       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2525       {
2526         m_scan_type = JPGD_YH2V1;
2527         m_max_blocks_per_mcu = 4;
2528         m_max_mcu_x_size = 16;
2529         m_max_mcu_y_size = 8;
2530       }
2531       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2532       {
2533         m_scan_type = JPGD_YH1V2;
2534         m_max_blocks_per_mcu = 4;
2535         m_max_mcu_x_size = 8;
2536         m_max_mcu_y_size = 16;
2537       }
2538       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2539       {
2540         m_scan_type = JPGD_YH2V2;
2541         m_max_blocks_per_mcu = 6;
2542         m_max_mcu_x_size = 16;
2543         m_max_mcu_y_size = 16;
2544       }
2545       else
2546         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2547     }
2548     else
2549       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2550 
2551     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2552     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2553 
2554     // These values are for the *destination* pixels: after conversion.
2555     if (m_scan_type == JPGD_GRAYSCALE)
2556       m_dest_bytes_per_pixel = 1;
2557     else
2558       m_dest_bytes_per_pixel = 4;
2559 
2560     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2561 
2562     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2563 
2564     // Initialize two scan line buffers.
2565     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2566     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2567       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2568 
2569     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2570 
2571     // Should never happen
2572     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2573       stop_decoding(JPGD_ASSERTION_ERROR);
2574 
2575     // Allocate the coefficient buffer, enough for one MCU
2576     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2577 
2578     for (i = 0; i < m_max_blocks_per_mcu; i++)
2579       m_mcu_block_max_zag.ptr[i] = 64;
2580 
2581     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2582     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2583     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2584     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2585     m_freq_domain_chroma_upsample = false;
2586     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2587       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2588     }
2589 
2590     if (m_freq_domain_chroma_upsample)
2591       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2592     else
2593       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2594 
2595     m_total_lines_left = m_image_y_size;
2596 
2597     m_mcu_lines_left = 0;
2598 
2599     create_look_ups();
2600   }
2601 
2602   // The coeff_buf series of methods originally stored the coefficients
2603   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2604   // was used to make this process more efficient. Now, we can store the entire
2605   // thing in RAM.
2606   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2607     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2608 
2609     cb.block_num_x = block_num_x;
2610     cb.block_num_y = block_num_y;
2611     cb.block_len_x = block_len_x;
2612     cb.block_len_y = block_len_y;
2613     cb.block_size = cast(int)((block_len_x * block_len_y) * jpgd_block_t.sizeof);
2614     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2615     return cb;
2616   }
2617 
2618   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2619     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2620     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2621   }
2622 
2623   // The following methods decode the various types of m_blocks encountered
2624   // in progressively encoded images.
2625   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2626     int s, r;
2627     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2628 
2629     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2630     {
2631       r = pD.get_bits_no_markers(s);
2632       s = JPGD_HUFF_EXTEND(r, s);
2633     }
2634 
2635     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2636 
2637     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2638   }
2639 
2640   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2641     if (pD.get_bits_no_markers(1))
2642     {
2643       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2644 
2645       p[0] |= (1 << pD.m_successive_low);
2646     }
2647   }
2648 
2649   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2650     int k, s, r;
2651 
2652     if (pD.m_eob_run)
2653     {
2654       pD.m_eob_run--;
2655       return;
2656     }
2657 
2658     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2659 
2660     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2661     {
2662       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2663 
2664       r = s >> 4;
2665       s &= 15;
2666 
2667       if (s)
2668       {
2669         if ((k += r) > 63)
2670           pD.stop_decoding(JPGD_DECODE_ERROR);
2671 
2672         r = pD.get_bits_no_markers(s);
2673         s = JPGD_HUFF_EXTEND(r, s);
2674 
2675         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2676       }
2677       else
2678       {
2679         if (r == 15)
2680         {
2681           if ((k += 15) > 63)
2682             pD.stop_decoding(JPGD_DECODE_ERROR);
2683         }
2684         else
2685         {
2686           pD.m_eob_run = 1 << r;
2687 
2688           if (r)
2689             pD.m_eob_run += pD.get_bits_no_markers(r);
2690 
2691           pD.m_eob_run--;
2692 
2693           break;
2694         }
2695       }
2696     }
2697   }
2698 
2699   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2700     int s, k, r;
2701     int p1 = 1 << pD.m_successive_low;
2702     int m1 = (-1) << pD.m_successive_low;
2703     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2704 
2705     assert(pD.m_spectral_end <= 63);
2706 
2707     k = pD.m_spectral_start;
2708 
2709     if (pD.m_eob_run == 0)
2710     {
2711       for ( ; k <= pD.m_spectral_end; k++)
2712       {
2713         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2714 
2715         r = s >> 4;
2716         s &= 15;
2717 
2718         if (s)
2719         {
2720           if (s != 1)
2721             pD.stop_decoding(JPGD_DECODE_ERROR);
2722 
2723           if (pD.get_bits_no_markers(1))
2724             s = p1;
2725           else
2726             s = m1;
2727         }
2728         else
2729         {
2730           if (r != 15)
2731           {
2732             pD.m_eob_run = 1 << r;
2733 
2734             if (r)
2735               pD.m_eob_run += pD.get_bits_no_markers(r);
2736 
2737             break;
2738           }
2739         }
2740 
2741         do
2742         {
2743           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2744 
2745           if (*this_coef != 0)
2746           {
2747             if (pD.get_bits_no_markers(1))
2748             {
2749               if ((*this_coef & p1) == 0)
2750               {
2751                 if (*this_coef >= 0)
2752                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2753                 else
2754                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2755               }
2756             }
2757           }
2758           else
2759           {
2760             if (--r < 0)
2761               break;
2762           }
2763 
2764           k++;
2765 
2766         } while (k <= pD.m_spectral_end);
2767 
2768         if ((s) && (k < 64))
2769         {
2770           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2771         }
2772       }
2773     }
2774 
2775     if (pD.m_eob_run > 0)
2776     {
2777       for ( ; k <= pD.m_spectral_end; k++)
2778       {
2779         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2780 
2781         if (*this_coef != 0)
2782         {
2783           if (pD.get_bits_no_markers(1))
2784           {
2785             if ((*this_coef & p1) == 0)
2786             {
2787               if (*this_coef >= 0)
2788                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2789               else
2790                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2791             }
2792           }
2793         }
2794       }
2795 
2796       pD.m_eob_run--;
2797     }
2798   }
2799 
2800   // Decode a scan in a progressively encoded image.
2801   void decode_scan (pDecode_block_func decode_block_func) {
2802     int mcu_row, mcu_col, mcu_block;
2803     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2804     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2805 
2806     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2807 
2808     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2809     {
2810       int component_num, component_id;
2811 
2812       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2813 
2814       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2815       {
2816         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2817 
2818         if ((m_restart_interval) && (m_restarts_left == 0))
2819           process_restart();
2820 
2821         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2822         {
2823           component_id = m_mcu_org.ptr[mcu_block];
2824 
2825           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2826 
2827           if (m_comps_in_scan == 1)
2828             block_x_mcu.ptr[component_id]++;
2829           else
2830           {
2831             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2832             {
2833               block_x_mcu_ofs = 0;
2834 
2835               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2836               {
2837                 block_y_mcu_ofs = 0;
2838                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2839               }
2840             }
2841           }
2842         }
2843 
2844         m_restarts_left--;
2845       }
2846 
2847       if (m_comps_in_scan == 1)
2848         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2849       else
2850       {
2851         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2852         {
2853           component_id = m_comp_list.ptr[component_num];
2854           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2855         }
2856       }
2857     }
2858   }
2859 
2860   // Decode a progressively encoded image.
2861   void init_progressive () {
2862     int i;
2863 
2864     if (m_comps_in_frame == 4)
2865       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2866 
2867     // Allocate the coefficient buffers.
2868     for (i = 0; i < m_comps_in_frame; i++)
2869     {
2870       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2871       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2872     }
2873 
2874     for ( ; ; )
2875     {
2876       int dc_only_scan, refinement_scan;
2877       pDecode_block_func decode_block_func;
2878 
2879       if (!init_scan())
2880         break;
2881 
2882       dc_only_scan = (m_spectral_start == 0);
2883       refinement_scan = (m_successive_high != 0);
2884 
2885       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2886         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2887 
2888       if (dc_only_scan)
2889       {
2890         if (m_spectral_end)
2891           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2892       }
2893       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2894         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2895 
2896       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2897         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2898 
2899       if (dc_only_scan)
2900       {
2901         if (refinement_scan)
2902           decode_block_func = &decode_block_dc_refine;
2903         else
2904           decode_block_func = &decode_block_dc_first;
2905       }
2906       else
2907       {
2908         if (refinement_scan)
2909           decode_block_func = &decode_block_ac_refine;
2910         else
2911           decode_block_func = &decode_block_ac_first;
2912       }
2913 
2914       decode_scan(decode_block_func);
2915 
2916       m_bits_left = 16;
2917       get_bits(16);
2918       get_bits(16);
2919     }
2920 
2921     m_comps_in_scan = m_comps_in_frame;
2922 
2923     for (i = 0; i < m_comps_in_frame; i++)
2924       m_comp_list.ptr[i] = i;
2925 
2926     calc_mcu_block_order();
2927   }
2928 
2929   void init_sequential () {
2930     if (!init_scan())
2931       stop_decoding(JPGD_UNEXPECTED_MARKER);
2932   }
2933 
2934   void decode_start () {
2935     init_frame();
2936 
2937     if (m_progressive_flag)
2938       init_progressive();
2939     else
2940       init_sequential();
2941   }
2942 
2943   void decode_init (JpegStreamReadFunc rfn) {
2944     initit(rfn);
2945     locate_sof_marker();
2946   }
2947 }
2948 
2949 
2950 // ////////////////////////////////////////////////////////////////////////// //
2951 /// read JPEG image header, determine dimensions and number of components.
2952 /// return `false` if image is not JPEG (i hope).
2953 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2954   if (rfn is null) return false;
2955   auto decoder = jpeg_decoder(rfn);
2956   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2957   if (decoder.error_code != JPGD_SUCCESS) return false;
2958   width = decoder.width;
2959   height = decoder.height;
2960   actual_comps = decoder.num_components;
2961   return true;
2962 }
2963 
2964 
2965 // ////////////////////////////////////////////////////////////////////////// //
2966 /// read JPEG image header, determine dimensions and number of components.
2967 /// return `false` if image is not JPEG (i hope).
2968 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
2969   import core.stdc.stdio;
2970 
2971   FILE* m_pFile;
2972   bool m_eof_flag, m_error_flag;
2973 
2974   if (filename.length == 0) throw new Exception("cannot open unnamed file");
2975   if (filename.length < 512) {
2976     char[513] buffer;
2977     //import core.stdc.stdlib : alloca;
2978     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
2979     tfn[0..filename.length] = filename[];
2980     tfn[filename.length] = 0;
2981     m_pFile = fopen(tfn.ptr, "rb");
2982   } else {
2983     import core.stdc.stdlib : malloc, free;
2984     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
2985     if (tfn !is null) {
2986       scope(exit) free(tfn.ptr);
2987       m_pFile = fopen(tfn.ptr, "rb");
2988     }
2989   }
2990   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
2991   scope(exit) if (m_pFile) fclose(m_pFile);
2992 
2993   return detect_jpeg_image_from_stream(
2994     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
2995       if (m_pFile is null) return -1;
2996       if (m_eof_flag) {
2997         *pEOF_flag = true;
2998         return 0;
2999       }
3000       if (m_error_flag) return -1;
3001       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3002       if (bytes_read < max_bytes_to_read) {
3003         if (ferror(m_pFile)) {
3004           m_error_flag = true;
3005           return -1;
3006         }
3007         m_eof_flag = true;
3008         *pEOF_flag = true;
3009       }
3010       return bytes_read;
3011     },
3012     width, height, actual_comps);
3013 }
3014 
3015 
3016 // ////////////////////////////////////////////////////////////////////////// //
3017 /// read JPEG image header, determine dimensions and number of components.
3018 /// return `false` if image is not JPEG (i hope).
3019 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3020   size_t bufpos;
3021   return detect_jpeg_image_from_stream(
3022     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3023       import core.stdc.string : memcpy;
3024       if (bufpos >= buf.length) {
3025         *pEOF_flag = true;
3026         return 0;
3027       }
3028       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3029       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3030       bufpos += max_bytes_to_read;
3031       return max_bytes_to_read;
3032     },
3033     width, height, actual_comps);
3034 }
3035 
3036 
3037 // ////////////////////////////////////////////////////////////////////////// //
3038 /// decompress JPEG image, what else?
3039 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3040 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3041   import core.stdc.string : memcpy;
3042 
3043   //actual_comps = 0;
3044   if (rfn is null) return null;
3045   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3046 
3047   auto decoder = jpeg_decoder(rfn);
3048   if (decoder.error_code != JPGD_SUCCESS) return null;
3049   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3050 
3051   immutable int image_width = decoder.width;
3052   immutable int image_height = decoder.height;
3053   width = image_width;
3054   height = image_height;
3055   actual_comps = decoder.num_components;
3056   if (req_comps < 0) req_comps = decoder.num_components;
3057 
3058   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3059 
3060   immutable int dst_bpl = image_width*req_comps;
3061 
3062   static if (useMalloc) {
3063     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3064     if (pImage_data is null) return null;
3065     auto idata = pImage_data[0..dst_bpl*image_height];
3066   } else {
3067     auto idata = new ubyte[](dst_bpl*image_height);
3068     auto pImage_data = idata.ptr;
3069   }
3070 
3071   scope(failure) {
3072     static if (useMalloc) {
3073       jpgd_free(pImage_data);
3074     } else {
3075       import core.memory : GC;
3076       GC.free(idata.ptr);
3077       idata = null;
3078     }
3079   }
3080 
3081   for (int y = 0; y < image_height; ++y) {
3082     const(ubyte)* pScan_line;
3083     uint scan_line_len;
3084     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3085       static if (useMalloc) {
3086         jpgd_free(pImage_data);
3087       } else {
3088         import core.memory : GC;
3089         GC.free(idata.ptr);
3090         idata = null;
3091       }
3092       return null;
3093     }
3094 
3095     ubyte* pDst = pImage_data+y*dst_bpl;
3096 
3097     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3098       memcpy(pDst, pScan_line, dst_bpl);
3099     } else if (decoder.num_components == 1) {
3100       if (req_comps == 3) {
3101         for (int x = 0; x < image_width; ++x) {
3102           ubyte luma = pScan_line[x];
3103           pDst[0] = luma;
3104           pDst[1] = luma;
3105           pDst[2] = luma;
3106           pDst += 3;
3107         }
3108       } else {
3109         for (int x = 0; x < image_width; ++x) {
3110           ubyte luma = pScan_line[x];
3111           pDst[0] = luma;
3112           pDst[1] = luma;
3113           pDst[2] = luma;
3114           pDst[3] = 255;
3115           pDst += 4;
3116         }
3117       }
3118     } else if (decoder.num_components == 3) {
3119       if (req_comps == 1) {
3120         immutable int YR = 19595, YG = 38470, YB = 7471;
3121         for (int x = 0; x < image_width; ++x) {
3122           int r = pScan_line[x*4+0];
3123           int g = pScan_line[x*4+1];
3124           int b = pScan_line[x*4+2];
3125           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3126         }
3127       } else {
3128         for (int x = 0; x < image_width; ++x) {
3129           pDst[0] = pScan_line[x*4+0];
3130           pDst[1] = pScan_line[x*4+1];
3131           pDst[2] = pScan_line[x*4+2];
3132           pDst += 3;
3133         }
3134       }
3135     }
3136   }
3137 
3138   return idata;
3139 }
3140 
3141 
3142 // ////////////////////////////////////////////////////////////////////////// //
3143 /// decompress JPEG image from disk file.
3144 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3145 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3146   import core.stdc.stdio;
3147 
3148   FILE* m_pFile;
3149   bool m_eof_flag, m_error_flag;
3150 
3151   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3152   if (filename.length < 512) {
3153 	char[513] buffer;
3154     //import core.stdc.stdlib : alloca;
3155     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3156     tfn[0..filename.length] = filename[];
3157     tfn[filename.length] = 0;
3158     m_pFile = fopen(tfn.ptr, "rb");
3159   } else {
3160     import core.stdc.stdlib : malloc, free;
3161     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3162     if (tfn !is null) {
3163       scope(exit) free(tfn.ptr);
3164       m_pFile = fopen(tfn.ptr, "rb");
3165     }
3166   }
3167   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3168   scope(exit) if (m_pFile) fclose(m_pFile);
3169 
3170   return decompress_jpeg_image_from_stream!useMalloc(
3171     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3172       if (m_pFile is null) return -1;
3173       if (m_eof_flag) {
3174         *pEOF_flag = true;
3175         return 0;
3176       }
3177       if (m_error_flag) return -1;
3178       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3179       if (bytes_read < max_bytes_to_read) {
3180         if (ferror(m_pFile)) {
3181           m_error_flag = true;
3182           return -1;
3183         }
3184         m_eof_flag = true;
3185         *pEOF_flag = true;
3186       }
3187       return bytes_read;
3188     },
3189     width, height, actual_comps, req_comps);
3190 }
3191 
3192 
3193 // ////////////////////////////////////////////////////////////////////////// //
3194 /// decompress JPEG image from memory buffer.
3195 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3196 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3197   size_t bufpos;
3198   return decompress_jpeg_image_from_stream!useMalloc(
3199     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3200       import core.stdc.string : memcpy;
3201       if (bufpos >= buf.length) {
3202         *pEOF_flag = true;
3203         return 0;
3204       }
3205       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3206       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3207       bufpos += max_bytes_to_read;
3208       return max_bytes_to_read;
3209     },
3210     width, height, actual_comps, req_comps);
3211 }
3212 
3213 
3214 // ////////////////////////////////////////////////////////////////////////// //
3215 // if we have access "iv.vfs", add some handy API
3216 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3217 
3218 static if (JpegHasIVVFS) {
3219 import iv.vfs;
3220 
3221 // ////////////////////////////////////////////////////////////////////////// //
3222 /// decompress JPEG image from disk file.
3223 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3224 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3225   return decompress_jpeg_image_from_stream!useMalloc(
3226     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3227       if (!fl.isOpen) return -1;
3228       if (fl.eof) {
3229         *pEOF_flag = true;
3230         return 0;
3231       }
3232       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3233       if (fl.eof) *pEOF_flag = true;
3234       return cast(int)rd.length;
3235     },
3236     width, height, actual_comps, req_comps);
3237 }
3238 // vfs API
3239 }
3240 
3241 
3242 // ////////////////////////////////////////////////////////////////////////// //
3243 // if we have access "arsd.color", add some handy API
3244 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3245 
3246 static if (JpegHasArsd) {
3247 import arsd.color;
3248 
3249 // ////////////////////////////////////////////////////////////////////////// //
3250 /// decompress JPEG image, what else?
3251 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3252   import core.stdc.string : memcpy;
3253   enum req_comps = 4;
3254 
3255   if (rfn is null) return null;
3256 
3257   auto decoder = jpeg_decoder(rfn);
3258   if (decoder.error_code != JPGD_SUCCESS) return null;
3259   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3260 
3261   immutable int image_width = decoder.width;
3262   immutable int image_height = decoder.height;
3263   //width = image_width;
3264   //height = image_height;
3265   //actual_comps = decoder.num_components;
3266 
3267   version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("starting (%dx%d)...\n", image_width, image_height); }}
3268 
3269   if (decoder.begin_decoding() != JPGD_SUCCESS || image_width < 1 || image_height < 1) return null;
3270 
3271   immutable int dst_bpl = image_width*req_comps;
3272   auto img = new TrueColorImage(image_width, image_height);
3273   scope(failure) { img.clearInternal(); img = null; }
3274   ubyte* pImage_data = img.imageData.bytes.ptr;
3275 
3276   for (int y = 0; y < image_height; ++y) {
3277     //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("loading line %d...\n", y); }}
3278 
3279     const(ubyte)* pScan_line;
3280     uint scan_line_len;
3281     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3282       img.clearInternal();
3283       img = null;
3284       //jpgd_free(pImage_data);
3285       return null;
3286     }
3287 
3288     ubyte* pDst = pImage_data+y*dst_bpl;
3289 
3290     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3291       memcpy(pDst, pScan_line, dst_bpl);
3292     } else if (decoder.num_components == 1) {
3293       if (req_comps == 3) {
3294         for (int x = 0; x < image_width; ++x) {
3295           ubyte luma = pScan_line[x];
3296           pDst[0] = luma;
3297           pDst[1] = luma;
3298           pDst[2] = luma;
3299           pDst += 3;
3300         }
3301       } else {
3302         for (int x = 0; x < image_width; ++x) {
3303           ubyte luma = pScan_line[x];
3304           pDst[0] = luma;
3305           pDst[1] = luma;
3306           pDst[2] = luma;
3307           pDst[3] = 255;
3308           pDst += 4;
3309         }
3310       }
3311     } else if (decoder.num_components == 3) {
3312       if (req_comps == 1) {
3313         immutable int YR = 19595, YG = 38470, YB = 7471;
3314         for (int x = 0; x < image_width; ++x) {
3315           int r = pScan_line[x*4+0];
3316           int g = pScan_line[x*4+1];
3317           int b = pScan_line[x*4+2];
3318           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3319         }
3320       } else {
3321         for (int x = 0; x < image_width; ++x) {
3322           pDst[0] = pScan_line[x*4+0];
3323           pDst[1] = pScan_line[x*4+1];
3324           pDst[2] = pScan_line[x*4+2];
3325           pDst += 3;
3326         }
3327       }
3328     }
3329   }
3330 
3331   return img;
3332 }
3333 
3334 
3335 // ////////////////////////////////////////////////////////////////////////// //
3336 /// decompress JPEG image from disk file.
3337 public MemoryImage readJpeg (const(char)[] filename) {
3338   import core.stdc.stdio;
3339 
3340   FILE* m_pFile;
3341   bool m_eof_flag, m_error_flag;
3342 
3343   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3344   if (filename.length < 512) {
3345 	char[513] buffer;
3346     //import core.stdc.stdlib : alloca;
3347     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3348     tfn[0..filename.length] = filename[];
3349     tfn[filename.length] = 0;
3350     m_pFile = fopen(tfn.ptr, "rb");
3351   } else {
3352     import core.stdc.stdlib : malloc, free;
3353     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3354     if (tfn !is null) {
3355       scope(exit) free(tfn.ptr);
3356       m_pFile = fopen(tfn.ptr, "rb");
3357     }
3358   }
3359   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3360   scope(exit) if (m_pFile) fclose(m_pFile);
3361 
3362   return readJpegFromStream(
3363     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3364       if (m_pFile is null) return -1;
3365       if (m_eof_flag) {
3366         *pEOF_flag = true;
3367         return 0;
3368       }
3369       if (m_error_flag) return -1;
3370       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3371       if (bytes_read < max_bytes_to_read) {
3372         if (ferror(m_pFile)) {
3373           m_error_flag = true;
3374           return -1;
3375         }
3376         m_eof_flag = true;
3377         *pEOF_flag = true;
3378       }
3379       return bytes_read;
3380     }
3381   );
3382 }
3383 
3384 
3385 // ////////////////////////////////////////////////////////////////////////// //
3386 /// decompress JPEG image from memory buffer.
3387 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3388   size_t bufpos;
3389   return readJpegFromStream(
3390     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3391       import core.stdc.string : memcpy;
3392       if (bufpos >= buf.length) {
3393         *pEOF_flag = true;
3394         return 0;
3395       }
3396       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3397       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3398       bufpos += max_bytes_to_read;
3399       return max_bytes_to_read;
3400     }
3401   );
3402 }
3403 // done with arsd API
3404 }
3405 
3406 
3407 static if (JpegHasIVVFS) {
3408 public MemoryImage readJpeg (VFile fl) {
3409   return readJpegFromStream(
3410     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3411       if (!fl.isOpen) return -1;
3412       if (fl.eof) {
3413         *pEOF_flag = true;
3414         return 0;
3415       }
3416       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3417       if (fl.eof) *pEOF_flag = true;
3418       return cast(int)rd.length;
3419     }
3420   );
3421 }
3422 
3423 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3424   return detect_jpeg_image_from_stream(
3425     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3426       if (!fl.isOpen) return -1;
3427       if (fl.eof) {
3428         *pEOF_flag = true;
3429         return 0;
3430       }
3431       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3432       if (fl.eof) *pEOF_flag = true;
3433       return cast(int)rd.length;
3434     },
3435     width, height, actual_comps);
3436 }
3437 // vfs API
3438 }
3439 
3440 
3441 // ////////////////////////////////////////////////////////////////////////// //
3442 version(jpegd_test) {
3443 import arsd.color;
3444 import arsd.png;
3445 
3446 void main (string[] args) {
3447   import std.stdio;
3448   int width, height, comps;
3449   {
3450     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3451     writeln(width, "x", height, "x", comps);
3452     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3453     writeln(img.width, "x", img.height);
3454     writePng("z00.png", img);
3455   }
3456   {
3457     ubyte[] file;
3458     {
3459       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3460       file.length = cast(int)fl.size;
3461       fl.rawRead(file[]);
3462     }
3463     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3464     writeln(width, "x", height, "x", comps);
3465     auto img = readJpegFromMemory(file[]);
3466     writeln(img.width, "x", img.height);
3467     writePng("z01.png", img);
3468   }
3469 }
3470 }