arsd.jpeg source code

1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
40  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
41  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
42  */
43 module arsd.jpeg;
44 
45 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
46 // This is slower, but results in higher quality on images with highly saturated colors.
47 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
48 
49 /// Input stream interface.
50 /// This delegate is called when the internal input buffer is empty.
51 /// Parameters:
52 ///   pBuf - input buffer
53 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
54 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
55 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
56 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
57 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
58 
59 
60 // ////////////////////////////////////////////////////////////////////////// //
61 private:
62 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
63 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
64 
65 // Success/failure error codes.
66 alias jpgd_status = int;
67 enum /*jpgd_status*/ {
68   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
69   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
70   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
71   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
72   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
73   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
74   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
75   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
76   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
77 }
78 
79 enum {
80   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
81   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
82 }
83 
84 // DCT coefficients are stored in this sequence.
85 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
86 
87 alias JPEG_MARKER = int;
88 enum /*JPEG_MARKER*/ {
89   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
90   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
91   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
92   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
93   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
94 }
95 
96 alias JPEG_SUBSAMPLING = int;
97 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
98 
99 enum CONST_BITS = 13;
100 enum PASS1_BITS = 2;
101 enum SCALEDONE = cast(int)1;
102 
103 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
104 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
105 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
106 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
107 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
108 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
109 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
110 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
111 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
112 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
113 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
114 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
115 
116 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
117 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
118 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
119 
120 
121 // Compiler creates a fast path 1D IDCT for X non-zero columns
122 struct Row(int NONZERO_COLS) {
123 pure nothrow @trusted @nogc:
124   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
125     static if (NONZERO_COLS == 0) {
126       // nothing
127     } else static if (NONZERO_COLS == 1) {
128       immutable int dcval = (pSrc[0] << PASS1_BITS);
129       pTemp[0] = dcval;
130       pTemp[1] = dcval;
131       pTemp[2] = dcval;
132       pTemp[3] = dcval;
133       pTemp[4] = dcval;
134       pTemp[5] = dcval;
135       pTemp[6] = dcval;
136       pTemp[7] = dcval;
137     } else {
138       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
139       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
140       template ACCESS_COL(int x) {
141         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
142       }
143 
144       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
145 
146       immutable int z1 = (z2 + z3)*FIX_0_541196100;
147       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
148       immutable int tmp3 = z1 + z2*FIX_0_765366865;
149 
150       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
151       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
152 
153       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
154 
155       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
156 
157       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
158       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
159 
160       immutable int az1 = bz1*(-FIX_0_899976223);
161       immutable int az2 = bz2*(-FIX_2_562915447);
162       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
163       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
164 
165       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
166       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
167       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
168       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
169 
170       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
171       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
172       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
173       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
174       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
175       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
176       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
177       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
178     }
179   }
180 }
181 
182 
183 // Compiler creates a fast path 1D IDCT for X non-zero rows
184 struct Col (int NONZERO_ROWS) {
185 pure nothrow @trusted @nogc:
186   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
187     static assert(NONZERO_ROWS > 0);
188     static if (NONZERO_ROWS == 1) {
189       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
190       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
191       pDst_ptr[0*8] = dcval_clamped;
192       pDst_ptr[1*8] = dcval_clamped;
193       pDst_ptr[2*8] = dcval_clamped;
194       pDst_ptr[3*8] = dcval_clamped;
195       pDst_ptr[4*8] = dcval_clamped;
196       pDst_ptr[5*8] = dcval_clamped;
197       pDst_ptr[6*8] = dcval_clamped;
198       pDst_ptr[7*8] = dcval_clamped;
199     } else {
200       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
201       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
202       template ACCESS_ROW(int x) {
203         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
204       }
205 
206       immutable int z2 = mixin(ACCESS_ROW!2);
207       immutable int z3 = mixin(ACCESS_ROW!6);
208 
209       immutable int z1 = (z2 + z3)*FIX_0_541196100;
210       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
211       immutable int tmp3 = z1 + z2*FIX_0_765366865;
212 
213       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
214       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
215 
216       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
217 
218       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
219 
220       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
221       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
222 
223       immutable int az1 = bz1*(-FIX_0_899976223);
224       immutable int az2 = bz2*(-FIX_2_562915447);
225       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
226       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
227 
228       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
229       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
230       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
231       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
232 
233       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
234       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
235 
236       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
237       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
238 
239       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
240       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
241 
242       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
243       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
244 
245       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
246       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
247 
248       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
249       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
250 
251       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
252       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
253 
254       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
255       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
256     }
257   }
258 }
259 
260 
261 static immutable ubyte[512] s_idct_row_table = [
262   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
263   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
264   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
265   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
266   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
267   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
268   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
269   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
270 ];
271 
272 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
273 
274 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
275   assert(block_max_zag >= 1);
276   assert(block_max_zag <= 64);
277 
278   if (block_max_zag <= 1)
279   {
280     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
281     k = CLAMP(k);
282     k = k | (k<<8);
283     k = k | (k<<16);
284 
285     for (int i = 8; i > 0; i--)
286     {
287       *cast(int*)&pDst_ptr[0] = k;
288       *cast(int*)&pDst_ptr[4] = k;
289       pDst_ptr += 8;
290     }
291     return;
292   }
293 
294   int[64] temp;
295 
296   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
297   int* pTemp = temp.ptr;
298 
299   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
300   int i;
301   for (i = 8; i > 0; i--, pRow_tab++)
302   {
303     switch (*pRow_tab)
304     {
305       case 0: Row!(0).idct(pTemp, pSrc); break;
306       case 1: Row!(1).idct(pTemp, pSrc); break;
307       case 2: Row!(2).idct(pTemp, pSrc); break;
308       case 3: Row!(3).idct(pTemp, pSrc); break;
309       case 4: Row!(4).idct(pTemp, pSrc); break;
310       case 5: Row!(5).idct(pTemp, pSrc); break;
311       case 6: Row!(6).idct(pTemp, pSrc); break;
312       case 7: Row!(7).idct(pTemp, pSrc); break;
313       case 8: Row!(8).idct(pTemp, pSrc); break;
314       default: assert(0);
315     }
316 
317     pSrc += 8;
318     pTemp += 8;
319   }
320 
321   pTemp = temp.ptr;
322 
323   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
324   for (i = 8; i > 0; i--)
325   {
326     switch (nonzero_rows)
327     {
328       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
329       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
330       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
331       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
332       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
333       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
334       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
335       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
336       default: assert(0);
337     }
338 
339     pTemp++;
340     pDst_ptr++;
341   }
342 }
343 
344 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
345   int[64] temp;
346   int* pTemp = temp.ptr;
347   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
348 
349   for (int i = 4; i > 0; i--)
350   {
351     Row!(4).idct(pTemp, pSrc);
352     pSrc += 8;
353     pTemp += 8;
354   }
355 
356   pTemp = temp.ptr;
357   for (int i = 8; i > 0; i--)
358   {
359     Col!(4).idct(pDst_ptr, pTemp);
360     pTemp++;
361     pDst_ptr++;
362   }
363 }
364 
365 
366 // ////////////////////////////////////////////////////////////////////////// //
367 struct jpeg_decoder {
368 private import core.stdc.string : memcpy, memset;
369 private:
370   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
371   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
372 
373   alias jpgd_quant_t = short;
374   alias jpgd_block_t = short;
375   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
376 
377   static struct huff_tables {
378     bool ac_table;
379     uint[256] look_up;
380     uint[256] look_up2;
381     ubyte[256] code_size;
382     uint[512] tree;
383   }
384 
385   static struct coeff_buf {
386     ubyte* pData;
387     int block_num_x, block_num_y;
388     int block_len_x, block_len_y;
389     int block_size;
390   }
391 
392   static struct mem_block {
393     mem_block* m_pNext;
394     size_t m_used_count;
395     size_t m_size;
396     char[1] m_data;
397   }
398 
399   mem_block* m_pMem_blocks;
400   int m_image_x_size;
401   int m_image_y_size;
402   JpegStreamReadFunc readfn;
403   int m_progressive_flag;
404   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
405   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
406   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
407   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
408   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
409   int m_comps_in_frame;                         // # of components in frame
410   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
411   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
412   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
413   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
414   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
415   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
416   int m_comps_in_scan;                          // # of components in scan
417   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
418   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
419   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
420   int m_spectral_start;                         // spectral selection start
421   int m_spectral_end;                           // spectral selection end
422   int m_successive_low;                         // successive approximation low
423   int m_successive_high;                        // successive approximation high
424   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
425   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
426   int m_blocks_per_mcu;
427   int m_max_blocks_per_row;
428   int m_mcus_per_row, m_mcus_per_col;
429   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
430   int m_total_lines_left;                       // total # lines left in image
431   int m_mcu_lines_left;                         // total # lines left in this MCU
432   int m_real_dest_bytes_per_scan_line;
433   int m_dest_bytes_per_scan_line;               // rounded up
434   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
435   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
436   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
437   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
438   int m_eob_run;
439   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
440   ubyte* m_pIn_buf_ofs;
441   int m_in_buf_left;
442   int m_tem_flag;
443   bool m_eof_flag;
444   ubyte[128] m_in_buf_pad_start;
445   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
446   ubyte[128] m_in_buf_pad_end;
447   int m_bits_left;
448   uint m_bit_buf;
449   int m_restart_interval;
450   int m_restarts_left;
451   int m_next_restart_num;
452   int m_max_mcus_per_row;
453   int m_max_blocks_per_mcu;
454   int m_expanded_blocks_per_mcu;
455   int m_expanded_blocks_per_row;
456   int m_expanded_blocks_per_component;
457   bool m_freq_domain_chroma_upsample;
458   int m_max_mcus_per_col;
459   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
460   jpgd_block_t* m_pMCU_coefficients;
461   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
462   ubyte* m_pSample_buf;
463   int[256] m_crr;
464   int[256] m_cbb;
465   int[256] m_crg;
466   int[256] m_cbg;
467   ubyte* m_pScan_line_0;
468   ubyte* m_pScan_line_1;
469   jpgd_status m_error_code;
470   bool m_ready_flag;
471   int m_total_bytes_read;
472 
473 public:
474   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
475   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
476   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
477 
478   ~this () { free_all_blocks(); }
479 
480   @disable this (this); // no copies
481 
482   // Call this method after constructing the object to begin decompression.
483   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
484   int begin_decoding () {
485     if (m_ready_flag) return JPGD_SUCCESS;
486     if (m_error_code) return JPGD_FAILED;
487     try {
488       decode_start();
489       m_ready_flag = true;
490       return JPGD_SUCCESS;
491     } catch (Exception e) {
492       //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("ERROR: %.*s...\n", cast(int)e.msg.length, e.msg.ptr); }}
493       version(jpegd_test) {{ import std.stdio; stderr.writeln(e.toString); }}
494     }
495     return JPGD_FAILED;
496   }
497 
498   // Returns the next scan line.
499   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
500   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
501   // Returns JPGD_SUCCESS if a scan line has been returned.
502   // Returns JPGD_DONE if all scan lines have been returned.
503   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
504   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
505     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
506     if (m_total_lines_left == 0) return JPGD_DONE;
507     try {
508       if (m_mcu_lines_left == 0) {
509         if (m_progressive_flag) load_next_row(); else decode_next_row();
510         // Find the EOI marker if that was the last row.
511         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
512         m_mcu_lines_left = m_max_mcu_y_size;
513       }
514       if (m_freq_domain_chroma_upsample) {
515         expanded_convert();
516         *pScan_line = m_pScan_line_0;
517       } else {
518         switch (m_scan_type) {
519           case JPGD_YH2V2:
520             if ((m_mcu_lines_left & 1) == 0) {
521               H2V2Convert();
522               *pScan_line = m_pScan_line_0;
523             } else {
524               *pScan_line = m_pScan_line_1;
525             }
526             break;
527           case JPGD_YH2V1:
528             H2V1Convert();
529             *pScan_line = m_pScan_line_0;
530             break;
531           case JPGD_YH1V2:
532             if ((m_mcu_lines_left & 1) == 0) {
533               H1V2Convert();
534               *pScan_line = m_pScan_line_0;
535             } else {
536               *pScan_line = m_pScan_line_1;
537             }
538             break;
539           case JPGD_YH1V1:
540             H1V1Convert();
541             *pScan_line = m_pScan_line_0;
542             break;
543           case JPGD_GRAYSCALE:
544             gray_convert();
545             *pScan_line = m_pScan_line_0;
546             break;
547           default:
548         }
549       }
550       *pScan_line_len = m_real_dest_bytes_per_scan_line;
551       --m_mcu_lines_left;
552       --m_total_lines_left;
553       return JPGD_SUCCESS;
554     } catch (Exception) {}
555     return JPGD_FAILED;
556   }
557 
558   @property const pure nothrow @safe @nogc {
559     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
560 
561     int width () { pragma(inline, true); return m_image_x_size; }
562     int height () { pragma(inline, true); return m_image_y_size; }
563 
564     int num_components () { pragma(inline, true); return m_comps_in_frame; }
565 
566     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
567     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
568 
569     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
570     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
571   }
572 
573 private:
574   // Retrieve one character from the input stream.
575   uint get_char () {
576     // Any bytes remaining in buffer?
577     if (!m_in_buf_left) {
578       // Try to get more bytes.
579       prep_in_buffer();
580       // Still nothing to get?
581       if (!m_in_buf_left) {
582         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
583         int t = m_tem_flag;
584         m_tem_flag ^= 1;
585         return (t ? 0xD9 : 0xFF);
586       }
587     }
588     uint c = *m_pIn_buf_ofs++;
589     --m_in_buf_left;
590     return c;
591   }
592 
593   // Same as previous method, except can indicate if the character is a pad character or not.
594   uint get_char (bool* pPadding_flag) {
595     if (!m_in_buf_left) {
596       prep_in_buffer();
597       if (!m_in_buf_left) {
598         *pPadding_flag = true;
599         int t = m_tem_flag;
600         m_tem_flag ^= 1;
601         return (t ? 0xD9 : 0xFF);
602       }
603     }
604     *pPadding_flag = false;
605     uint c = *m_pIn_buf_ofs++;
606     --m_in_buf_left;
607     return c;
608   }
609 
610   // Inserts a previously retrieved character back into the input buffer.
611   void stuff_char (ubyte q) {
612     *(--m_pIn_buf_ofs) = q;
613     m_in_buf_left++;
614   }
615 
616   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
617   ubyte get_octet () {
618     bool padding_flag;
619     int c = get_char(&padding_flag);
620     if (c == 0xFF) {
621       if (padding_flag) return 0xFF;
622       c = get_char(&padding_flag);
623       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
624       if (c == 0x00) return 0xFF;
625       stuff_char(cast(ubyte)(c));
626       stuff_char(0xFF);
627       return 0xFF;
628     }
629     return cast(ubyte)(c);
630   }
631 
632   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
633   uint get_bits (int num_bits) {
634     if (!num_bits) return 0;
635     uint i = m_bit_buf >> (32 - num_bits);
636     if ((m_bits_left -= num_bits) <= 0) {
637       m_bit_buf <<= (num_bits += m_bits_left);
638       uint c1 = get_char();
639       uint c2 = get_char();
640       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
641       m_bit_buf <<= -m_bits_left;
642       m_bits_left += 16;
643       assert(m_bits_left >= 0);
644     } else {
645       m_bit_buf <<= num_bits;
646     }
647     return i;
648   }
649 
650   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
651   uint get_bits_no_markers (int num_bits) {
652     if (!num_bits) return 0;
653     uint i = m_bit_buf >> (32 - num_bits);
654     if ((m_bits_left -= num_bits) <= 0) {
655       m_bit_buf <<= (num_bits += m_bits_left);
656       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
657         uint c1 = get_octet();
658         uint c2 = get_octet();
659         m_bit_buf |= (c1 << 8) | c2;
660       } else {
661         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
662         m_in_buf_left -= 2;
663         m_pIn_buf_ofs += 2;
664       }
665       m_bit_buf <<= -m_bits_left;
666       m_bits_left += 16;
667       assert(m_bits_left >= 0);
668     } else {
669       m_bit_buf <<= num_bits;
670     }
671     return i;
672   }
673 
674   // Decodes a Huffman encoded symbol.
675   int huff_decode (huff_tables *pH) {
676     int symbol;
677     // Check first 8-bits: do we have a complete symbol?
678     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
679       // Decode more bits, use a tree traversal to find symbol.
680       int ofs = 23;
681       do {
682         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
683         --ofs;
684       } while (symbol < 0);
685       get_bits_no_markers(8 + (23 - ofs));
686     } else {
687       get_bits_no_markers(pH.code_size.ptr[symbol]);
688     }
689     return symbol;
690   }
691 
692   // Decodes a Huffman encoded symbol.
693   int huff_decode (huff_tables *pH, ref int extra_bits) {
694     int symbol;
695     // Check first 8-bits: do we have a complete symbol?
696     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
697       // Use a tree traversal to find symbol.
698       int ofs = 23;
699       do {
700         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
701         --ofs;
702       } while (symbol < 0);
703       get_bits_no_markers(8 + (23 - ofs));
704       extra_bits = get_bits_no_markers(symbol & 0xF);
705     } else {
706       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
707       if (symbol & 0x8000) {
708         get_bits_no_markers((symbol >> 8) & 31);
709         extra_bits = symbol >> 16;
710       } else {
711         int code_size = (symbol >> 8) & 31;
712         int num_extra_bits = symbol & 0xF;
713         int bits = code_size + num_extra_bits;
714         if (bits <= (m_bits_left + 16)) {
715           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
716         } else {
717           get_bits_no_markers(code_size);
718           extra_bits = get_bits_no_markers(num_extra_bits);
719         }
720       }
721       symbol &= 0xFF;
722     }
723     return symbol;
724   }
725 
726   // Tables and macro used to fully decode the DPCM differences.
727   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
728   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
729   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
730   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
731   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
732   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
733 
734   // Clamps a value between 0-255.
735   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
736   alias clamp = CLAMP;
737 
738   static struct DCT_Upsample {
739   static:
740     static struct Matrix44 {
741     pure nothrow @trusted @nogc:
742       alias Element_Type = int;
743       enum { NUM_ROWS = 4, NUM_COLS = 4 }
744 
745       Element_Type[NUM_COLS][NUM_ROWS] v;
746 
747       this() (in auto ref Matrix44 m) {
748         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
749       }
750 
751       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
752       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
753 
754       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
755 
756       ref Matrix44 opOpAssign(string op:"+") (in auto ref Matrix44 a) {
757         foreach (int r; 0..NUM_ROWS) {
758           at(r, 0) += a.at(r, 0);
759           at(r, 1) += a.at(r, 1);
760           at(r, 2) += a.at(r, 2);
761           at(r, 3) += a.at(r, 3);
762         }
763         return this;
764       }
765 
766       ref Matrix44 opOpAssign(string op:"-") (in auto ref Matrix44 a) {
767         foreach (int r; 0..NUM_ROWS) {
768           at(r, 0) -= a.at(r, 0);
769           at(r, 1) -= a.at(r, 1);
770           at(r, 2) -= a.at(r, 2);
771           at(r, 3) -= a.at(r, 3);
772         }
773         return this;
774       }
775 
776       Matrix44 opBinary(string op:"+") (in auto ref Matrix44 b) const {
777         alias a = this;
778         Matrix44 ret;
779         foreach (int r; 0..NUM_ROWS) {
780           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
781           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
782           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
783           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
784         }
785         return ret;
786       }
787 
788       Matrix44 opBinary(string op:"-") (in auto ref Matrix44 b) const {
789         alias a = this;
790         Matrix44 ret;
791         foreach (int r; 0..NUM_ROWS) {
792           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
793           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
794           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
795           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
796         }
797         return ret;
798       }
799 
800       static void add_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
801         foreach (int r; 0..4) {
802           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
803           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
804           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
805           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
806         }
807       }
808 
809       static void sub_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
810         foreach (int r; 0..4) {
811           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
812           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
813           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
814           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
815         }
816       }
817     }
818 
819     enum FRACT_BITS = 10;
820     enum SCALE = 1 << FRACT_BITS;
821 
822     alias Temp_Type = int;
823     //TODO: convert defines to mixins
824     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
825     //#define F(i) ((int)((i) * SCALE + .5f))
826     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
827     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
828 
829     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
830     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
831 
832     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
833     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
834       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
835         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
836         template AT(int c, int r) {
837           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
838         }
839         // 4x8 = 4x8 times 8x8, matrix 0 is constant
840         immutable Temp_Type X000 = mixin(AT!(0, 0));
841         immutable Temp_Type X001 = mixin(AT!(0, 1));
842         immutable Temp_Type X002 = mixin(AT!(0, 2));
843         immutable Temp_Type X003 = mixin(AT!(0, 3));
844         immutable Temp_Type X004 = mixin(AT!(0, 4));
845         immutable Temp_Type X005 = mixin(AT!(0, 5));
846         immutable Temp_Type X006 = mixin(AT!(0, 6));
847         immutable Temp_Type X007 = mixin(AT!(0, 7));
848         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
849         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
850         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
851         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
852         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
853         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
854         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
855         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
856         immutable Temp_Type X020 = mixin(AT!(4, 0));
857         immutable Temp_Type X021 = mixin(AT!(4, 1));
858         immutable Temp_Type X022 = mixin(AT!(4, 2));
859         immutable Temp_Type X023 = mixin(AT!(4, 3));
860         immutable Temp_Type X024 = mixin(AT!(4, 4));
861         immutable Temp_Type X025 = mixin(AT!(4, 5));
862         immutable Temp_Type X026 = mixin(AT!(4, 6));
863         immutable Temp_Type X027 = mixin(AT!(4, 7));
864         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
865         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
866         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
867         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
868         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
869         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
870         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
871         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
872 
873         // 4x4 = 4x8 times 8x4, matrix 1 is constant
874         P.at(0, 0) = X000;
875         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
876         P.at(0, 2) = X004;
877         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
878         P.at(1, 0) = X010;
879         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
880         P.at(1, 2) = X014;
881         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
882         P.at(2, 0) = X020;
883         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
884         P.at(2, 2) = X024;
885         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
886         P.at(3, 0) = X030;
887         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
888         P.at(3, 2) = X034;
889         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
890         // 40 muls 24 adds
891 
892         // 4x4 = 4x8 times 8x4, matrix 1 is constant
893         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
894         Q.at(0, 1) = X002;
895         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
896         Q.at(0, 3) = X006;
897         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
898         Q.at(1, 1) = X012;
899         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
900         Q.at(1, 3) = X016;
901         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
902         Q.at(2, 1) = X022;
903         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
904         Q.at(2, 3) = X026;
905         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
906         Q.at(3, 1) = X032;
907         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
908         Q.at(3, 3) = X036;
909         // 40 muls 24 adds
910       }
911     }
912 
913     static struct R_S(int NUM_ROWS, int NUM_COLS) {
914       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
915         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
916         template AT(int c, int r) {
917           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
918         }
919         // 4x8 = 4x8 times 8x8, matrix 0 is constant
920         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
921         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
922         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
923         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
924         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
925         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
926         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
927         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
928         immutable Temp_Type X110 = mixin(AT!(2, 0));
929         immutable Temp_Type X111 = mixin(AT!(2, 1));
930         immutable Temp_Type X112 = mixin(AT!(2, 2));
931         immutable Temp_Type X113 = mixin(AT!(2, 3));
932         immutable Temp_Type X114 = mixin(AT!(2, 4));
933         immutable Temp_Type X115 = mixin(AT!(2, 5));
934         immutable Temp_Type X116 = mixin(AT!(2, 6));
935         immutable Temp_Type X117 = mixin(AT!(2, 7));
936         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
937         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
938         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
939         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
940         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
941         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
942         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
943         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
944         immutable Temp_Type X130 = mixin(AT!(6, 0));
945         immutable Temp_Type X131 = mixin(AT!(6, 1));
946         immutable Temp_Type X132 = mixin(AT!(6, 2));
947         immutable Temp_Type X133 = mixin(AT!(6, 3));
948         immutable Temp_Type X134 = mixin(AT!(6, 4));
949         immutable Temp_Type X135 = mixin(AT!(6, 5));
950         immutable Temp_Type X136 = mixin(AT!(6, 6));
951         immutable Temp_Type X137 = mixin(AT!(6, 7));
952         // 80 muls 48 adds
953 
954         // 4x4 = 4x8 times 8x4, matrix 1 is constant
955         R.at(0, 0) = X100;
956         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
957         R.at(0, 2) = X104;
958         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
959         R.at(1, 0) = X110;
960         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
961         R.at(1, 2) = X114;
962         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
963         R.at(2, 0) = X120;
964         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
965         R.at(2, 2) = X124;
966         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
967         R.at(3, 0) = X130;
968         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
969         R.at(3, 2) = X134;
970         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
971         // 40 muls 24 adds
972         // 4x4 = 4x8 times 8x4, matrix 1 is constant
973         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
974         S.at(0, 1) = X102;
975         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
976         S.at(0, 3) = X106;
977         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
978         S.at(1, 1) = X112;
979         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
980         S.at(1, 3) = X116;
981         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
982         S.at(2, 1) = X122;
983         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
984         S.at(2, 3) = X126;
985         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
986         S.at(3, 1) = X132;
987         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
988         S.at(3, 3) = X136;
989         // 40 muls 24 adds
990       }
991     }
992   } // end namespace DCT_Upsample
993 
994   // Unconditionally frees all allocated m_blocks.
995   void free_all_blocks () {
996     //m_pStream = null;
997     readfn = null;
998     for (mem_block *b = m_pMem_blocks; b; ) {
999       mem_block* n = b.m_pNext;
1000       jpgd_free(b);
1001       b = n;
1002     }
1003     m_pMem_blocks = null;
1004   }
1005 
1006   // This method handles all errors. It will never return.
1007   // It could easily be changed to use C++ exceptions.
1008   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status, size_t line=__LINE__) {
1009     m_error_code = status;
1010     free_all_blocks();
1011     //longjmp(m_jmp_state, status);
1012     throw new Exception("jpeg decoding error", __FILE__, line);
1013   }
1014 
1015   void* alloc (size_t nSize, bool zero=false) {
1016     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1017     char *rv = null;
1018     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1019     {
1020       if ((b.m_used_count + nSize) <= b.m_size)
1021       {
1022         rv = b.m_data.ptr + b.m_used_count;
1023         b.m_used_count += nSize;
1024         break;
1025       }
1026     }
1027     if (!rv)
1028     {
1029       size_t capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1030       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1031       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1032       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1033       b.m_used_count = nSize;
1034       b.m_size = capacity;
1035       rv = b.m_data.ptr;
1036     }
1037     if (zero) memset(rv, 0, nSize);
1038     return rv;
1039   }
1040 
1041   void word_clear (void *p, ushort c, uint n) {
1042     ubyte *pD = cast(ubyte*)p;
1043     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1044     while (n)
1045     {
1046       pD[0] = l; pD[1] = h; pD += 2;
1047       n--;
1048     }
1049   }
1050 
1051   // Refill the input buffer.
1052   // This method will sit in a loop until (A) the buffer is full or (B)
1053   // the stream's read() method reports and end of file condition.
1054   void prep_in_buffer () {
1055     m_in_buf_left = 0;
1056     m_pIn_buf_ofs = m_in_buf.ptr;
1057 
1058     if (m_eof_flag)
1059       return;
1060 
1061     do
1062     {
1063       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1064       if (bytes_read == -1)
1065         stop_decoding(JPGD_STREAM_READ);
1066 
1067       m_in_buf_left += bytes_read;
1068     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1069 
1070     m_total_bytes_read += m_in_buf_left;
1071 
1072     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1073     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1074     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1075   }
1076 
1077   // Read a Huffman code table.
1078   void read_dht_marker () {
1079     int i, index, count;
1080     ubyte[17] huff_num;
1081     ubyte[256] huff_val;
1082 
1083     uint num_left = get_bits(16);
1084 
1085     if (num_left < 2)
1086       stop_decoding(JPGD_BAD_DHT_MARKER);
1087 
1088     num_left -= 2;
1089 
1090     while (num_left)
1091     {
1092       index = get_bits(8);
1093 
1094       huff_num.ptr[0] = 0;
1095 
1096       count = 0;
1097 
1098       for (i = 1; i <= 16; i++)
1099       {
1100         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1101         count += huff_num.ptr[i];
1102       }
1103 
1104       if (count > 255)
1105         stop_decoding(JPGD_BAD_DHT_COUNTS);
1106 
1107       for (i = 0; i < count; i++)
1108         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1109 
1110       i = 1 + 16 + count;
1111 
1112       if (num_left < cast(uint)i)
1113         stop_decoding(JPGD_BAD_DHT_MARKER);
1114 
1115       num_left -= i;
1116 
1117       if ((index & 0x10) > 0x10)
1118         stop_decoding(JPGD_BAD_DHT_INDEX);
1119 
1120       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1121 
1122       if (index >= JPGD_MAX_HUFF_TABLES)
1123         stop_decoding(JPGD_BAD_DHT_INDEX);
1124 
1125       if (!m_huff_num.ptr[index])
1126         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1127 
1128       if (!m_huff_val.ptr[index])
1129         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1130 
1131       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1132       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1133       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1134     }
1135   }
1136 
1137   // Read a quantization table.
1138   void read_dqt_marker () {
1139     int n, i, prec;
1140     uint num_left;
1141     uint temp;
1142 
1143     num_left = get_bits(16);
1144 
1145     if (num_left < 2)
1146       stop_decoding(JPGD_BAD_DQT_MARKER);
1147 
1148     num_left -= 2;
1149 
1150     while (num_left)
1151     {
1152       n = get_bits(8);
1153       prec = n >> 4;
1154       n &= 0x0F;
1155 
1156       if (n >= JPGD_MAX_QUANT_TABLES)
1157         stop_decoding(JPGD_BAD_DQT_TABLE);
1158 
1159       if (!m_quant.ptr[n])
1160         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1161 
1162       // read quantization entries, in zag order
1163       for (i = 0; i < 64; i++)
1164       {
1165         temp = get_bits(8);
1166 
1167         if (prec)
1168           temp = (temp << 8) + get_bits(8);
1169 
1170         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1171       }
1172 
1173       i = 64 + 1;
1174 
1175       if (prec)
1176         i += 64;
1177 
1178       if (num_left < cast(uint)i)
1179         stop_decoding(JPGD_BAD_DQT_LENGTH);
1180 
1181       num_left -= i;
1182     }
1183   }
1184 
1185   // Read the start of frame (SOF) marker.
1186   void read_sof_marker () {
1187     int i;
1188     uint num_left;
1189 
1190     num_left = get_bits(16);
1191 
1192     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1193       stop_decoding(JPGD_BAD_PRECISION);
1194 
1195     m_image_y_size = get_bits(16);
1196 
1197     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1198       stop_decoding(JPGD_BAD_HEIGHT);
1199 
1200     m_image_x_size = get_bits(16);
1201 
1202     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1203       stop_decoding(JPGD_BAD_WIDTH);
1204 
1205     m_comps_in_frame = get_bits(8);
1206 
1207     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1208       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1209 
1210     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1211       stop_decoding(JPGD_BAD_SOF_LENGTH);
1212 
1213     for (i = 0; i < m_comps_in_frame; i++)
1214     {
1215       m_comp_ident.ptr[i]  = get_bits(8);
1216       m_comp_h_samp.ptr[i] = get_bits(4);
1217       m_comp_v_samp.ptr[i] = get_bits(4);
1218       m_comp_quant.ptr[i]  = get_bits(8);
1219     }
1220   }
1221 
1222   // Used to skip unrecognized markers.
1223   void skip_variable_marker () {
1224     uint num_left;
1225 
1226     num_left = get_bits(16);
1227 
1228     if (num_left < 2)
1229       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1230 
1231     num_left -= 2;
1232 
1233     while (num_left)
1234     {
1235       get_bits(8);
1236       num_left--;
1237     }
1238   }
1239 
1240   // Read a define restart interval (DRI) marker.
1241   void read_dri_marker () {
1242     if (get_bits(16) != 4)
1243       stop_decoding(JPGD_BAD_DRI_LENGTH);
1244 
1245     m_restart_interval = get_bits(16);
1246   }
1247 
1248   // Read a start of scan (SOS) marker.
1249   void read_sos_marker () {
1250     uint num_left;
1251     int i, ci, n, c, cc;
1252 
1253     num_left = get_bits(16);
1254 
1255     n = get_bits(8);
1256 
1257     m_comps_in_scan = n;
1258 
1259     num_left -= 3;
1260 
1261     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1262       stop_decoding(JPGD_BAD_SOS_LENGTH);
1263 
1264     for (i = 0; i < n; i++)
1265     {
1266       cc = get_bits(8);
1267       c = get_bits(8);
1268       num_left -= 2;
1269 
1270       for (ci = 0; ci < m_comps_in_frame; ci++)
1271         if (cc == m_comp_ident.ptr[ci])
1272           break;
1273 
1274       if (ci >= m_comps_in_frame)
1275         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1276 
1277       m_comp_list.ptr[i]    = ci;
1278       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1279       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1280     }
1281 
1282     m_spectral_start  = get_bits(8);
1283     m_spectral_end    = get_bits(8);
1284     m_successive_high = get_bits(4);
1285     m_successive_low  = get_bits(4);
1286 
1287     if (!m_progressive_flag)
1288     {
1289       m_spectral_start = 0;
1290       m_spectral_end = 63;
1291     }
1292 
1293     num_left -= 3;
1294 
1295     /* read past whatever is num_left */
1296     while (num_left)
1297     {
1298       get_bits(8);
1299       num_left--;
1300     }
1301   }
1302 
1303   // Finds the next marker.
1304   int next_marker () {
1305     uint c, bytes;
1306 
1307     bytes = 0;
1308 
1309     do
1310     {
1311       do
1312       {
1313         bytes++;
1314         c = get_bits(8);
1315       } while (c != 0xFF);
1316 
1317       do
1318       {
1319         c = get_bits(8);
1320       } while (c == 0xFF);
1321 
1322     } while (c == 0);
1323 
1324     // If bytes > 0 here, there where extra bytes before the marker (not good).
1325 
1326     return c;
1327   }
1328 
1329   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1330   // encountered.
1331   int process_markers () {
1332     int c;
1333 
1334     for ( ; ; ) {
1335       c = next_marker();
1336 
1337       switch (c)
1338       {
1339         case M_SOF0:
1340         case M_SOF1:
1341         case M_SOF2:
1342         case M_SOF3:
1343         case M_SOF5:
1344         case M_SOF6:
1345         case M_SOF7:
1346         //case M_JPG:
1347         case M_SOF9:
1348         case M_SOF10:
1349         case M_SOF11:
1350         case M_SOF13:
1351         case M_SOF14:
1352         case M_SOF15:
1353         case M_SOI:
1354         case M_EOI:
1355         case M_SOS:
1356           return c;
1357         case M_DHT:
1358           read_dht_marker();
1359           break;
1360         // No arithmitic support - dumb patents!
1361         case M_DAC:
1362           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1363           break;
1364         case M_DQT:
1365           read_dqt_marker();
1366           break;
1367         case M_DRI:
1368           read_dri_marker();
1369           break;
1370         //case M_APP0:  /* no need to read the JFIF marker */
1371 
1372         case M_JPG:
1373         case M_RST0:    /* no parameters */
1374         case M_RST1:
1375         case M_RST2:
1376         case M_RST3:
1377         case M_RST4:
1378         case M_RST5:
1379         case M_RST6:
1380         case M_RST7:
1381         case M_TEM:
1382           stop_decoding(JPGD_UNEXPECTED_MARKER);
1383           break;
1384         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1385           skip_variable_marker();
1386           break;
1387       }
1388     }
1389   }
1390 
1391   // Finds the start of image (SOI) marker.
1392   // This code is rather defensive: it only checks the first 512 bytes to avoid
1393   // false positives.
1394   void locate_soi_marker () {
1395     uint lastchar, thischar;
1396     uint bytesleft;
1397 
1398     lastchar = get_bits(8);
1399 
1400     thischar = get_bits(8);
1401 
1402     /* ok if it's a normal JPEG file without a special header */
1403 
1404     if ((lastchar == 0xFF) && (thischar == M_SOI))
1405       return;
1406 
1407     bytesleft = 4096; //512;
1408 
1409     for ( ; ; )
1410     {
1411       if (--bytesleft == 0)
1412         stop_decoding(JPGD_NOT_JPEG);
1413 
1414       lastchar = thischar;
1415 
1416       thischar = get_bits(8);
1417 
1418       if (lastchar == 0xFF)
1419       {
1420         if (thischar == M_SOI)
1421           break;
1422         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1423           stop_decoding(JPGD_NOT_JPEG);
1424       }
1425     }
1426 
1427     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1428     thischar = (m_bit_buf >> 24) & 0xFF;
1429 
1430     if (thischar != 0xFF)
1431       stop_decoding(JPGD_NOT_JPEG);
1432   }
1433 
1434   // Find a start of frame (SOF) marker.
1435   void locate_sof_marker () {
1436     locate_soi_marker();
1437 
1438     int c = process_markers();
1439 
1440     switch (c)
1441     {
1442       case M_SOF2:
1443         m_progressive_flag = true;
1444         goto case;
1445       case M_SOF0:  /* baseline DCT */
1446       case M_SOF1:  /* extended sequential DCT */
1447         read_sof_marker();
1448         break;
1449       case M_SOF9:  /* Arithmitic coding */
1450         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1451         break;
1452       default:
1453         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1454         break;
1455     }
1456   }
1457 
1458   // Find a start of scan (SOS) marker.
1459   int locate_sos_marker () {
1460     int c;
1461 
1462     c = process_markers();
1463 
1464     if (c == M_EOI)
1465       return false;
1466     else if (c != M_SOS)
1467       stop_decoding(JPGD_UNEXPECTED_MARKER);
1468 
1469     read_sos_marker();
1470 
1471     return true;
1472   }
1473 
1474   // Reset everything to default/uninitialized state.
1475   void initit (JpegStreamReadFunc rfn) {
1476     m_pMem_blocks = null;
1477     m_error_code = JPGD_SUCCESS;
1478     m_ready_flag = false;
1479     m_image_x_size = m_image_y_size = 0;
1480     readfn = rfn;
1481     m_progressive_flag = false;
1482 
1483     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1484     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1485     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1486     memset(m_quant.ptr, 0, m_quant.sizeof);
1487 
1488     m_scan_type = 0;
1489     m_comps_in_frame = 0;
1490 
1491     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1492     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1493     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1494     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1495     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1496     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1497 
1498     m_comps_in_scan = 0;
1499     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1500     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1501     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1502 
1503     m_spectral_start = 0;
1504     m_spectral_end = 0;
1505     m_successive_low = 0;
1506     m_successive_high = 0;
1507     m_max_mcu_x_size = 0;
1508     m_max_mcu_y_size = 0;
1509     m_blocks_per_mcu = 0;
1510     m_max_blocks_per_row = 0;
1511     m_mcus_per_row = 0;
1512     m_mcus_per_col = 0;
1513     m_expanded_blocks_per_component = 0;
1514     m_expanded_blocks_per_mcu = 0;
1515     m_expanded_blocks_per_row = 0;
1516     m_freq_domain_chroma_upsample = false;
1517 
1518     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1519 
1520     m_total_lines_left = 0;
1521     m_mcu_lines_left = 0;
1522     m_real_dest_bytes_per_scan_line = 0;
1523     m_dest_bytes_per_scan_line = 0;
1524     m_dest_bytes_per_pixel = 0;
1525 
1526     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1527 
1528     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1529     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1530     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1531 
1532     m_eob_run = 0;
1533 
1534     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1535 
1536     m_pIn_buf_ofs = m_in_buf.ptr;
1537     m_in_buf_left = 0;
1538     m_eof_flag = false;
1539     m_tem_flag = 0;
1540 
1541     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1542     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1543     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1544 
1545     m_restart_interval = 0;
1546     m_restarts_left    = 0;
1547     m_next_restart_num = 0;
1548 
1549     m_max_mcus_per_row = 0;
1550     m_max_blocks_per_mcu = 0;
1551     m_max_mcus_per_col = 0;
1552 
1553     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1554     m_pMCU_coefficients = null;
1555     m_pSample_buf = null;
1556 
1557     m_total_bytes_read = 0;
1558 
1559     m_pScan_line_0 = null;
1560     m_pScan_line_1 = null;
1561 
1562     // Ready the input buffer.
1563     prep_in_buffer();
1564 
1565     // Prime the bit buffer.
1566     m_bits_left = 16;
1567     m_bit_buf = 0;
1568 
1569     get_bits(16);
1570     get_bits(16);
1571 
1572     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1573       m_mcu_block_max_zag.ptr[i] = 64;
1574   }
1575 
1576   enum SCALEBITS = 16;
1577   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1578   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1579 
1580   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1581   void create_look_ups () {
1582     for (int i = 0; i <= 255; i++)
1583     {
1584       int k = i - 128;
1585       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1586       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1587       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1588       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1589     }
1590   }
1591 
1592   // This method throws back into the stream any bytes that where read
1593   // into the bit buffer during initial marker scanning.
1594   void fix_in_buffer () {
1595     // In case any 0xFF's where pulled into the buffer during marker scanning.
1596     assert((m_bits_left & 7) == 0);
1597 
1598     if (m_bits_left == 16)
1599       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1600 
1601     if (m_bits_left >= 8)
1602       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1603 
1604     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1605     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1606 
1607     m_bits_left = 16;
1608     get_bits_no_markers(16);
1609     get_bits_no_markers(16);
1610   }
1611 
1612   void transform_mcu (int mcu_row) {
1613     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1614     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1615 
1616     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1617     {
1618       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1619       pSrc_ptr += 64;
1620       pDst_ptr += 64;
1621     }
1622   }
1623 
1624   static immutable ubyte[64] s_max_rc = [
1625     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1626     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1627     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1628     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1629   ];
1630 
1631   void transform_mcu_expand (int mcu_row) {
1632     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1633     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1634 
1635     // Y IDCT
1636     int mcu_block;
1637     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1638     {
1639       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1640       pSrc_ptr += 64;
1641       pDst_ptr += 64;
1642     }
1643 
1644     // Chroma IDCT, with upsampling
1645     jpgd_block_t[64] temp_block;
1646 
1647     for (int i = 0; i < 2; i++)
1648     {
1649       DCT_Upsample.Matrix44 P, Q, R, S;
1650 
1651       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1652       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1653 
1654       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1655       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1656       switch (s_max_rc.ptr[max_zag])
1657       {
1658       case 1*16+1:
1659         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1660         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1661         break;
1662       case 1*16+2:
1663         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1664         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1665         break;
1666       case 2*16+2:
1667         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1668         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1669         break;
1670       case 3*16+2:
1671         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1672         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1673         break;
1674       case 3*16+3:
1675         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1676         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1677         break;
1678       case 3*16+4:
1679         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1680         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1681         break;
1682       case 4*16+4:
1683         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1684         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1685         break;
1686       case 5*16+4:
1687         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1688         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1689         break;
1690       case 5*16+5:
1691         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1692         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1693         break;
1694       case 5*16+6:
1695         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1696         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1697         break;
1698       case 6*16+6:
1699         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1700         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1701         break;
1702       case 7*16+6:
1703         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1704         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1705         break;
1706       case 7*16+7:
1707         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1708         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1709         break;
1710       case 7*16+8:
1711         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1712         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1713         break;
1714       case 8*16+8:
1715         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1716         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1717         break;
1718       default:
1719         assert(false);
1720       }
1721 
1722       auto a = DCT_Upsample.Matrix44(P + Q);
1723       P -= Q;
1724       DCT_Upsample.Matrix44* b = &P;
1725       auto c = DCT_Upsample.Matrix44(R + S);
1726       R -= S;
1727       DCT_Upsample.Matrix44* d = &R;
1728 
1729       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1730       idct_4x4(temp_block.ptr, pDst_ptr);
1731       pDst_ptr += 64;
1732 
1733       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1734       idct_4x4(temp_block.ptr, pDst_ptr);
1735       pDst_ptr += 64;
1736 
1737       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1738       idct_4x4(temp_block.ptr, pDst_ptr);
1739       pDst_ptr += 64;
1740 
1741       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1742       idct_4x4(temp_block.ptr, pDst_ptr);
1743       pDst_ptr += 64;
1744 
1745       pSrc_ptr += 64;
1746     }
1747   }
1748 
1749   // Loads and dequantizes the next row of (already decoded) coefficients.
1750   // Progressive images only.
1751   void load_next_row () {
1752     int i;
1753     jpgd_block_t *p;
1754     jpgd_quant_t *q;
1755     int mcu_row, mcu_block, row_block = 0;
1756     int component_num, component_id;
1757     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1758 
1759     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1760 
1761     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1762     {
1763       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1764 
1765       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1766       {
1767         component_id = m_mcu_org.ptr[mcu_block];
1768         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1769 
1770         p = m_pMCU_coefficients + 64 * mcu_block;
1771 
1772         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1773         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1774         p[0] = pDC[0];
1775         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1776 
1777         for (i = 63; i > 0; i--)
1778           if (p[g_ZAG[i]])
1779             break;
1780 
1781         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1782 
1783         for ( ; i >= 0; i--)
1784           if (p[g_ZAG[i]])
1785             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1786 
1787         row_block++;
1788 
1789         if (m_comps_in_scan == 1)
1790           block_x_mcu.ptr[component_id]++;
1791         else
1792         {
1793           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1794           {
1795             block_x_mcu_ofs = 0;
1796 
1797             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1798             {
1799               block_y_mcu_ofs = 0;
1800 
1801               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1802             }
1803           }
1804         }
1805       }
1806 
1807       if (m_freq_domain_chroma_upsample)
1808         transform_mcu_expand(mcu_row);
1809       else
1810         transform_mcu(mcu_row);
1811     }
1812 
1813     if (m_comps_in_scan == 1)
1814       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1815     else
1816     {
1817       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1818       {
1819         component_id = m_comp_list.ptr[component_num];
1820 
1821         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1822       }
1823     }
1824   }
1825 
1826   // Restart interval processing.
1827   void process_restart () {
1828     int i;
1829     int c = 0;
1830 
1831     // Align to a byte boundry
1832     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1833     //get_bits_no_markers(m_bits_left & 7);
1834 
1835     // Let's scan a little bit to find the marker, but not _too_ far.
1836     // 1536 is a "fudge factor" that determines how much to scan.
1837     for (i = 1536; i > 0; i--)
1838       if (get_char() == 0xFF)
1839         break;
1840 
1841     if (i == 0)
1842       stop_decoding(JPGD_BAD_RESTART_MARKER);
1843 
1844     for ( ; i > 0; i--)
1845       if ((c = get_char()) != 0xFF)
1846         break;
1847 
1848     if (i == 0)
1849       stop_decoding(JPGD_BAD_RESTART_MARKER);
1850 
1851     // Is it the expected marker? If not, something bad happened.
1852     if (c != (m_next_restart_num + M_RST0))
1853       stop_decoding(JPGD_BAD_RESTART_MARKER);
1854 
1855     // Reset each component's DC prediction values.
1856     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1857 
1858     m_eob_run = 0;
1859 
1860     m_restarts_left = m_restart_interval;
1861 
1862     m_next_restart_num = (m_next_restart_num + 1) & 7;
1863 
1864     // Get the bit buffer going again...
1865 
1866     m_bits_left = 16;
1867     get_bits_no_markers(16);
1868     get_bits_no_markers(16);
1869   }
1870 
1871   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1872 
1873   // Decodes and dequantizes the next row of coefficients.
1874   void decode_next_row () {
1875     int row_block = 0;
1876 
1877     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1878     {
1879       if ((m_restart_interval) && (m_restarts_left == 0))
1880         process_restart();
1881 
1882       jpgd_block_t* p = m_pMCU_coefficients;
1883       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1884       {
1885         int component_id = m_mcu_org.ptr[mcu_block];
1886         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1887 
1888         int r, s;
1889         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1890         s = JPGD_HUFF_EXTEND(r, s);
1891 
1892         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1893 
1894         p[0] = cast(jpgd_block_t)(s * q[0]);
1895 
1896         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1897 
1898         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1899 
1900         int k;
1901         for (k = 1; k < 64; k++)
1902         {
1903           int extra_bits;
1904           s = huff_decode(pH, extra_bits);
1905 
1906           r = s >> 4;
1907           s &= 15;
1908 
1909           if (s)
1910           {
1911             if (r)
1912             {
1913               if ((k + r) > 63)
1914                 stop_decoding(JPGD_DECODE_ERROR);
1915 
1916               if (k < prev_num_set)
1917               {
1918                 int n = JPGD_MIN(r, prev_num_set - k);
1919                 int kt = k;
1920                 while (n--)
1921                   p[g_ZAG[kt++]] = 0;
1922               }
1923 
1924               k += r;
1925             }
1926 
1927             s = JPGD_HUFF_EXTEND(extra_bits, s);
1928 
1929             assert(k < 64);
1930 
1931             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1932           }
1933           else
1934           {
1935             if (r == 15)
1936             {
1937               if ((k + 16) > 64)
1938                 stop_decoding(JPGD_DECODE_ERROR);
1939 
1940               if (k < prev_num_set)
1941               {
1942                 int n = JPGD_MIN(16, prev_num_set - k);
1943                 int kt = k;
1944                 while (n--)
1945                 {
1946                   assert(kt <= 63);
1947                   p[g_ZAG[kt++]] = 0;
1948                 }
1949               }
1950 
1951               k += 16 - 1; // - 1 because the loop counter is k
1952               assert(p[g_ZAG[k]] == 0);
1953             }
1954             else
1955               break;
1956           }
1957         }
1958 
1959         if (k < prev_num_set)
1960         {
1961           int kt = k;
1962           while (kt < prev_num_set)
1963             p[g_ZAG[kt++]] = 0;
1964         }
1965 
1966         m_mcu_block_max_zag.ptr[mcu_block] = k;
1967 
1968         row_block++;
1969       }
1970 
1971       if (m_freq_domain_chroma_upsample)
1972         transform_mcu_expand(mcu_row);
1973       else
1974         transform_mcu(mcu_row);
1975 
1976       m_restarts_left--;
1977     }
1978   }
1979 
1980   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1981   void H1V1Convert () {
1982     int row = m_max_mcu_y_size - m_mcu_lines_left;
1983     ubyte *d = m_pScan_line_0;
1984     ubyte *s = m_pSample_buf + row * 8;
1985 
1986     for (int i = m_max_mcus_per_row; i > 0; i--)
1987     {
1988       for (int j = 0; j < 8; j++)
1989       {
1990         int y = s[j];
1991         int cb = s[64+j];
1992         int cr = s[128+j];
1993 
1994         d[0] = clamp(y + m_crr.ptr[cr]);
1995         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
1996         d[2] = clamp(y + m_cbb.ptr[cb]);
1997         d[3] = 255;
1998 
1999         d += 4;
2000       }
2001 
2002       s += 64*3;
2003     }
2004   }
2005 
2006   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2007   void H2V1Convert () {
2008     int row = m_max_mcu_y_size - m_mcu_lines_left;
2009     ubyte *d0 = m_pScan_line_0;
2010     ubyte *y = m_pSample_buf + row * 8;
2011     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2012 
2013     for (int i = m_max_mcus_per_row; i > 0; i--)
2014     {
2015       for (int l = 0; l < 2; l++)
2016       {
2017         for (int j = 0; j < 4; j++)
2018         {
2019           int cb = c[0];
2020           int cr = c[64];
2021 
2022           int rc = m_crr.ptr[cr];
2023           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2024           int bc = m_cbb.ptr[cb];
2025 
2026           int yy = y[j<<1];
2027           d0[0] = clamp(yy+rc);
2028           d0[1] = clamp(yy+gc);
2029           d0[2] = clamp(yy+bc);
2030           d0[3] = 255;
2031 
2032           yy = y[(j<<1)+1];
2033           d0[4] = clamp(yy+rc);
2034           d0[5] = clamp(yy+gc);
2035           d0[6] = clamp(yy+bc);
2036           d0[7] = 255;
2037 
2038           d0 += 8;
2039 
2040           c++;
2041         }
2042         y += 64;
2043       }
2044 
2045       y += 64*4 - 64*2;
2046       c += 64*4 - 8;
2047     }
2048   }
2049 
2050   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2051   void H1V2Convert () {
2052     int row = m_max_mcu_y_size - m_mcu_lines_left;
2053     ubyte *d0 = m_pScan_line_0;
2054     ubyte *d1 = m_pScan_line_1;
2055     ubyte *y;
2056     ubyte *c;
2057 
2058     if (row < 8)
2059       y = m_pSample_buf + row * 8;
2060     else
2061       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2062 
2063     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2064 
2065     for (int i = m_max_mcus_per_row; i > 0; i--)
2066     {
2067       for (int j = 0; j < 8; j++)
2068       {
2069         int cb = c[0+j];
2070         int cr = c[64+j];
2071 
2072         int rc = m_crr.ptr[cr];
2073         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2074         int bc = m_cbb.ptr[cb];
2075 
2076         int yy = y[j];
2077         d0[0] = clamp(yy+rc);
2078         d0[1] = clamp(yy+gc);
2079         d0[2] = clamp(yy+bc);
2080         d0[3] = 255;
2081 
2082         yy = y[8+j];
2083         d1[0] = clamp(yy+rc);
2084         d1[1] = clamp(yy+gc);
2085         d1[2] = clamp(yy+bc);
2086         d1[3] = 255;
2087 
2088         d0 += 4;
2089         d1 += 4;
2090       }
2091 
2092       y += 64*4;
2093       c += 64*4;
2094     }
2095   }
2096 
2097   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2098   void H2V2Convert () {
2099     int row = m_max_mcu_y_size - m_mcu_lines_left;
2100     ubyte *d0 = m_pScan_line_0;
2101     ubyte *d1 = m_pScan_line_1;
2102     ubyte *y;
2103     ubyte *c;
2104 
2105     if (row < 8)
2106       y = m_pSample_buf + row * 8;
2107     else
2108       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2109 
2110     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2111 
2112     for (int i = m_max_mcus_per_row; i > 0; i--)
2113     {
2114       for (int l = 0; l < 2; l++)
2115       {
2116         for (int j = 0; j < 8; j += 2)
2117         {
2118           int cb = c[0];
2119           int cr = c[64];
2120 
2121           int rc = m_crr.ptr[cr];
2122           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2123           int bc = m_cbb.ptr[cb];
2124 
2125           int yy = y[j];
2126           d0[0] = clamp(yy+rc);
2127           d0[1] = clamp(yy+gc);
2128           d0[2] = clamp(yy+bc);
2129           d0[3] = 255;
2130 
2131           yy = y[j+1];
2132           d0[4] = clamp(yy+rc);
2133           d0[5] = clamp(yy+gc);
2134           d0[6] = clamp(yy+bc);
2135           d0[7] = 255;
2136 
2137           yy = y[j+8];
2138           d1[0] = clamp(yy+rc);
2139           d1[1] = clamp(yy+gc);
2140           d1[2] = clamp(yy+bc);
2141           d1[3] = 255;
2142 
2143           yy = y[j+8+1];
2144           d1[4] = clamp(yy+rc);
2145           d1[5] = clamp(yy+gc);
2146           d1[6] = clamp(yy+bc);
2147           d1[7] = 255;
2148 
2149           d0 += 8;
2150           d1 += 8;
2151 
2152           c++;
2153         }
2154         y += 64;
2155       }
2156 
2157       y += 64*6 - 64*2;
2158       c += 64*6 - 8;
2159     }
2160   }
2161 
2162   // Y (1 block per MCU) to 8-bit grayscale
2163   void gray_convert () {
2164     int row = m_max_mcu_y_size - m_mcu_lines_left;
2165     ubyte *d = m_pScan_line_0;
2166     ubyte *s = m_pSample_buf + row * 8;
2167 
2168     for (int i = m_max_mcus_per_row; i > 0; i--)
2169     {
2170       *cast(uint*)d = *cast(uint*)s;
2171       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2172 
2173       s += 64;
2174       d += 8;
2175     }
2176   }
2177 
2178   void expanded_convert () {
2179     int row = m_max_mcu_y_size - m_mcu_lines_left;
2180 
2181     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2182 
2183     ubyte* d = m_pScan_line_0;
2184 
2185     for (int i = m_max_mcus_per_row; i > 0; i--)
2186     {
2187       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2188       {
2189         immutable int Y_ofs = k * 8;
2190         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2191         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2192         for (int j = 0; j < 8; j++)
2193         {
2194           int y = Py[Y_ofs + j];
2195           int cb = Py[Cb_ofs + j];
2196           int cr = Py[Cr_ofs + j];
2197 
2198           d[0] = clamp(y + m_crr.ptr[cr]);
2199           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2200           d[2] = clamp(y + m_cbb.ptr[cb]);
2201           d[3] = 255;
2202 
2203           d += 4;
2204         }
2205       }
2206 
2207       Py += 64 * m_expanded_blocks_per_mcu;
2208     }
2209   }
2210 
2211   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2212   void find_eoi () {
2213     if (!m_progressive_flag)
2214     {
2215       // Attempt to read the EOI marker.
2216       //get_bits_no_markers(m_bits_left & 7);
2217 
2218       // Prime the bit buffer
2219       m_bits_left = 16;
2220       get_bits(16);
2221       get_bits(16);
2222 
2223       // The next marker _should_ be EOI
2224       process_markers();
2225     }
2226 
2227     m_total_bytes_read -= m_in_buf_left;
2228   }
2229 
2230   // Creates the tables needed for efficient Huffman decoding.
2231   void make_huff_table (int index, huff_tables *pH) {
2232     int p, i, l, si;
2233     ubyte[257] huffsize;
2234     uint[257] huffcode;
2235     uint code;
2236     uint subtree;
2237     int code_size;
2238     int lastp;
2239     int nextfreeentry;
2240     int currententry;
2241 
2242     pH.ac_table = m_huff_ac.ptr[index] != 0;
2243 
2244     p = 0;
2245 
2246     for (l = 1; l <= 16; l++)
2247     {
2248       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2249         huffsize.ptr[p++] = cast(ubyte)(l);
2250     }
2251 
2252     huffsize.ptr[p] = 0;
2253 
2254     lastp = p;
2255 
2256     code = 0;
2257     si = huffsize.ptr[0];
2258     p = 0;
2259 
2260     while (huffsize.ptr[p])
2261     {
2262       while (huffsize.ptr[p] == si)
2263       {
2264         huffcode.ptr[p++] = code;
2265         code++;
2266       }
2267 
2268       code <<= 1;
2269       si++;
2270     }
2271 
2272     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2273     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2274     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2275     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2276 
2277     nextfreeentry = -1;
2278 
2279     p = 0;
2280 
2281     while (p < lastp)
2282     {
2283       i = m_huff_val.ptr[index][p];
2284       code = huffcode.ptr[p];
2285       code_size = huffsize.ptr[p];
2286 
2287       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2288 
2289       if (code_size <= 8)
2290       {
2291         code <<= (8 - code_size);
2292 
2293         for (l = 1 << (8 - code_size); l > 0; l--)
2294         {
2295           assert(i < 256);
2296 
2297           pH.look_up.ptr[code] = i;
2298 
2299           bool has_extrabits = false;
2300           int extra_bits = 0;
2301           int num_extra_bits = i & 15;
2302 
2303           int bits_to_fetch = code_size;
2304           if (num_extra_bits)
2305           {
2306             int total_codesize = code_size + num_extra_bits;
2307             if (total_codesize <= 8)
2308             {
2309               has_extrabits = true;
2310               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2311               assert(extra_bits <= 0x7FFF);
2312               bits_to_fetch += num_extra_bits;
2313             }
2314           }
2315 
2316           if (!has_extrabits)
2317             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2318           else
2319             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2320 
2321           code++;
2322         }
2323       }
2324       else
2325       {
2326         subtree = (code >> (code_size - 8)) & 0xFF;
2327 
2328         currententry = pH.look_up.ptr[subtree];
2329 
2330         if (currententry == 0)
2331         {
2332           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2333           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2334 
2335           nextfreeentry -= 2;
2336         }
2337 
2338         code <<= (16 - (code_size - 8));
2339 
2340         for (l = code_size; l > 9; l--)
2341         {
2342           if ((code & 0x8000) == 0)
2343             currententry--;
2344 
2345           if (pH.tree.ptr[-currententry - 1] == 0)
2346           {
2347             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2348 
2349             currententry = nextfreeentry;
2350 
2351             nextfreeentry -= 2;
2352           }
2353           else
2354             currententry = pH.tree.ptr[-currententry - 1];
2355 
2356           code <<= 1;
2357         }
2358 
2359         if ((code & 0x8000) == 0)
2360           currententry--;
2361 
2362         pH.tree.ptr[-currententry - 1] = i;
2363       }
2364 
2365       p++;
2366     }
2367   }
2368 
2369   // Verifies the quantization tables needed for this scan are available.
2370   void check_quant_tables () {
2371     for (int i = 0; i < m_comps_in_scan; i++)
2372       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2373         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2374   }
2375 
2376   // Verifies that all the Huffman tables needed for this scan are available.
2377   void check_huff_tables () {
2378     for (int i = 0; i < m_comps_in_scan; i++)
2379     {
2380       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2381         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2382 
2383       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2384         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2385     }
2386 
2387     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2388       if (m_huff_num.ptr[i])
2389       {
2390         if (!m_pHuff_tabs.ptr[i])
2391           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2392 
2393         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2394       }
2395   }
2396 
2397   // Determines the component order inside each MCU.
2398   // Also calcs how many MCU's are on each row, etc.
2399   void calc_mcu_block_order () {
2400     int component_num, component_id;
2401     int max_h_samp = 0, max_v_samp = 0;
2402 
2403     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2404     {
2405       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2406         max_h_samp = m_comp_h_samp.ptr[component_id];
2407 
2408       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2409         max_v_samp = m_comp_v_samp.ptr[component_id];
2410     }
2411 
2412     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2413     {
2414       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2415       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2416     }
2417 
2418     if (m_comps_in_scan == 1)
2419     {
2420       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2421       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2422     }
2423     else
2424     {
2425       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2426       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2427     }
2428 
2429     if (m_comps_in_scan == 1)
2430     {
2431       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2432 
2433       m_blocks_per_mcu = 1;
2434     }
2435     else
2436     {
2437       m_blocks_per_mcu = 0;
2438 
2439       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2440       {
2441         int num_blocks;
2442 
2443         component_id = m_comp_list.ptr[component_num];
2444 
2445         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2446 
2447         while (num_blocks--)
2448           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2449       }
2450     }
2451   }
2452 
2453   // Starts a new scan.
2454   int init_scan () {
2455     if (!locate_sos_marker())
2456       return false;
2457 
2458     calc_mcu_block_order();
2459 
2460     check_huff_tables();
2461 
2462     check_quant_tables();
2463 
2464     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2465 
2466     m_eob_run = 0;
2467 
2468     if (m_restart_interval)
2469     {
2470       m_restarts_left = m_restart_interval;
2471       m_next_restart_num = 0;
2472     }
2473 
2474     fix_in_buffer();
2475 
2476     return true;
2477   }
2478 
2479   // Starts a frame. Determines if the number of components or sampling factors
2480   // are supported.
2481   void init_frame () {
2482     int i;
2483 
2484     if (m_comps_in_frame == 1)
2485     {
2486       version(jpegd_test) {{ import std.stdio; stderr.writeln("m_comp_h_samp=", m_comp_h_samp.ptr[0], "; m_comp_v_samp=", m_comp_v_samp.ptr[0]); }}
2487 
2488       //if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2489       //  stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2490 
2491       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2492       {
2493         m_scan_type = JPGD_GRAYSCALE;
2494         m_max_blocks_per_mcu = 1;
2495         m_max_mcu_x_size = 8;
2496         m_max_mcu_y_size = 8;
2497       }
2498       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2499       {
2500         //k8: i added this, and i absolutely don't know what it means; but it decoded two sample images i found
2501         m_scan_type = JPGD_GRAYSCALE;
2502         m_max_blocks_per_mcu = 4;
2503         m_max_mcu_x_size = 8;
2504         m_max_mcu_y_size = 8;
2505       }
2506       else
2507         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2508     }
2509     else if (m_comps_in_frame == 3)
2510     {
2511       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2512            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2513         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2514 
2515       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2516       {
2517         m_scan_type = JPGD_YH1V1;
2518 
2519         m_max_blocks_per_mcu = 3;
2520         m_max_mcu_x_size = 8;
2521         m_max_mcu_y_size = 8;
2522       }
2523       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2524       {
2525         m_scan_type = JPGD_YH2V1;
2526         m_max_blocks_per_mcu = 4;
2527         m_max_mcu_x_size = 16;
2528         m_max_mcu_y_size = 8;
2529       }
2530       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2531       {
2532         m_scan_type = JPGD_YH1V2;
2533         m_max_blocks_per_mcu = 4;
2534         m_max_mcu_x_size = 8;
2535         m_max_mcu_y_size = 16;
2536       }
2537       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2538       {
2539         m_scan_type = JPGD_YH2V2;
2540         m_max_blocks_per_mcu = 6;
2541         m_max_mcu_x_size = 16;
2542         m_max_mcu_y_size = 16;
2543       }
2544       else
2545         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2546     }
2547     else
2548       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2549 
2550     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2551     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2552 
2553     // These values are for the *destination* pixels: after conversion.
2554     if (m_scan_type == JPGD_GRAYSCALE)
2555       m_dest_bytes_per_pixel = 1;
2556     else
2557       m_dest_bytes_per_pixel = 4;
2558 
2559     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2560 
2561     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2562 
2563     // Initialize two scan line buffers.
2564     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2565     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2566       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2567 
2568     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2569 
2570     // Should never happen
2571     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2572       stop_decoding(JPGD_ASSERTION_ERROR);
2573 
2574     // Allocate the coefficient buffer, enough for one MCU
2575     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2576 
2577     for (i = 0; i < m_max_blocks_per_mcu; i++)
2578       m_mcu_block_max_zag.ptr[i] = 64;
2579 
2580     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2581     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2582     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2583     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2584     m_freq_domain_chroma_upsample = false;
2585     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2586       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2587     }
2588 
2589     if (m_freq_domain_chroma_upsample)
2590       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2591     else
2592       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2593 
2594     m_total_lines_left = m_image_y_size;
2595 
2596     m_mcu_lines_left = 0;
2597 
2598     create_look_ups();
2599   }
2600 
2601   // The coeff_buf series of methods originally stored the coefficients
2602   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2603   // was used to make this process more efficient. Now, we can store the entire
2604   // thing in RAM.
2605   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2606     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2607 
2608     cb.block_num_x = block_num_x;
2609     cb.block_num_y = block_num_y;
2610     cb.block_len_x = block_len_x;
2611     cb.block_len_y = block_len_y;
2612     cb.block_size = cast(int)((block_len_x * block_len_y) * jpgd_block_t.sizeof);
2613     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2614     return cb;
2615   }
2616 
2617   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2618     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2619     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2620   }
2621 
2622   // The following methods decode the various types of m_blocks encountered
2623   // in progressively encoded images.
2624   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2625     int s, r;
2626     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2627 
2628     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2629     {
2630       r = pD.get_bits_no_markers(s);
2631       s = JPGD_HUFF_EXTEND(r, s);
2632     }
2633 
2634     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2635 
2636     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2637   }
2638 
2639   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2640     if (pD.get_bits_no_markers(1))
2641     {
2642       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2643 
2644       p[0] |= (1 << pD.m_successive_low);
2645     }
2646   }
2647 
2648   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2649     int k, s, r;
2650 
2651     if (pD.m_eob_run)
2652     {
2653       pD.m_eob_run--;
2654       return;
2655     }
2656 
2657     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2658 
2659     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2660     {
2661       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2662 
2663       r = s >> 4;
2664       s &= 15;
2665 
2666       if (s)
2667       {
2668         if ((k += r) > 63)
2669           pD.stop_decoding(JPGD_DECODE_ERROR);
2670 
2671         r = pD.get_bits_no_markers(s);
2672         s = JPGD_HUFF_EXTEND(r, s);
2673 
2674         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2675       }
2676       else
2677       {
2678         if (r == 15)
2679         {
2680           if ((k += 15) > 63)
2681             pD.stop_decoding(JPGD_DECODE_ERROR);
2682         }
2683         else
2684         {
2685           pD.m_eob_run = 1 << r;
2686 
2687           if (r)
2688             pD.m_eob_run += pD.get_bits_no_markers(r);
2689 
2690           pD.m_eob_run--;
2691 
2692           break;
2693         }
2694       }
2695     }
2696   }
2697 
2698   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2699     int s, k, r;
2700     int p1 = 1 << pD.m_successive_low;
2701     int m1 = (-1) << pD.m_successive_low;
2702     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2703 
2704     assert(pD.m_spectral_end <= 63);
2705 
2706     k = pD.m_spectral_start;
2707 
2708     if (pD.m_eob_run == 0)
2709     {
2710       for ( ; k <= pD.m_spectral_end; k++)
2711       {
2712         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2713 
2714         r = s >> 4;
2715         s &= 15;
2716 
2717         if (s)
2718         {
2719           if (s != 1)
2720             pD.stop_decoding(JPGD_DECODE_ERROR);
2721 
2722           if (pD.get_bits_no_markers(1))
2723             s = p1;
2724           else
2725             s = m1;
2726         }
2727         else
2728         {
2729           if (r != 15)
2730           {
2731             pD.m_eob_run = 1 << r;
2732 
2733             if (r)
2734               pD.m_eob_run += pD.get_bits_no_markers(r);
2735 
2736             break;
2737           }
2738         }
2739 
2740         do
2741         {
2742           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2743 
2744           if (*this_coef != 0)
2745           {
2746             if (pD.get_bits_no_markers(1))
2747             {
2748               if ((*this_coef & p1) == 0)
2749               {
2750                 if (*this_coef >= 0)
2751                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2752                 else
2753                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2754               }
2755             }
2756           }
2757           else
2758           {
2759             if (--r < 0)
2760               break;
2761           }
2762 
2763           k++;
2764 
2765         } while (k <= pD.m_spectral_end);
2766 
2767         if ((s) && (k < 64))
2768         {
2769           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2770         }
2771       }
2772     }
2773 
2774     if (pD.m_eob_run > 0)
2775     {
2776       for ( ; k <= pD.m_spectral_end; k++)
2777       {
2778         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2779 
2780         if (*this_coef != 0)
2781         {
2782           if (pD.get_bits_no_markers(1))
2783           {
2784             if ((*this_coef & p1) == 0)
2785             {
2786               if (*this_coef >= 0)
2787                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2788               else
2789                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2790             }
2791           }
2792         }
2793       }
2794 
2795       pD.m_eob_run--;
2796     }
2797   }
2798 
2799   // Decode a scan in a progressively encoded image.
2800   void decode_scan (pDecode_block_func decode_block_func) {
2801     int mcu_row, mcu_col, mcu_block;
2802     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2803     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2804 
2805     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2806 
2807     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2808     {
2809       int component_num, component_id;
2810 
2811       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2812 
2813       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2814       {
2815         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2816 
2817         if ((m_restart_interval) && (m_restarts_left == 0))
2818           process_restart();
2819 
2820         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2821         {
2822           component_id = m_mcu_org.ptr[mcu_block];
2823 
2824           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2825 
2826           if (m_comps_in_scan == 1)
2827             block_x_mcu.ptr[component_id]++;
2828           else
2829           {
2830             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2831             {
2832               block_x_mcu_ofs = 0;
2833 
2834               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2835               {
2836                 block_y_mcu_ofs = 0;
2837                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2838               }
2839             }
2840           }
2841         }
2842 
2843         m_restarts_left--;
2844       }
2845 
2846       if (m_comps_in_scan == 1)
2847         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2848       else
2849       {
2850         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2851         {
2852           component_id = m_comp_list.ptr[component_num];
2853           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2854         }
2855       }
2856     }
2857   }
2858 
2859   // Decode a progressively encoded image.
2860   void init_progressive () {
2861     int i;
2862 
2863     if (m_comps_in_frame == 4)
2864       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2865 
2866     // Allocate the coefficient buffers.
2867     for (i = 0; i < m_comps_in_frame; i++)
2868     {
2869       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2870       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2871     }
2872 
2873     for ( ; ; )
2874     {
2875       int dc_only_scan, refinement_scan;
2876       pDecode_block_func decode_block_func;
2877 
2878       if (!init_scan())
2879         break;
2880 
2881       dc_only_scan = (m_spectral_start == 0);
2882       refinement_scan = (m_successive_high != 0);
2883 
2884       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2885         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2886 
2887       if (dc_only_scan)
2888       {
2889         if (m_spectral_end)
2890           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2891       }
2892       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2893         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2894 
2895       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2896         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2897 
2898       if (dc_only_scan)
2899       {
2900         if (refinement_scan)
2901           decode_block_func = &decode_block_dc_refine;
2902         else
2903           decode_block_func = &decode_block_dc_first;
2904       }
2905       else
2906       {
2907         if (refinement_scan)
2908           decode_block_func = &decode_block_ac_refine;
2909         else
2910           decode_block_func = &decode_block_ac_first;
2911       }
2912 
2913       decode_scan(decode_block_func);
2914 
2915       m_bits_left = 16;
2916       get_bits(16);
2917       get_bits(16);
2918     }
2919 
2920     m_comps_in_scan = m_comps_in_frame;
2921 
2922     for (i = 0; i < m_comps_in_frame; i++)
2923       m_comp_list.ptr[i] = i;
2924 
2925     calc_mcu_block_order();
2926   }
2927 
2928   void init_sequential () {
2929     if (!init_scan())
2930       stop_decoding(JPGD_UNEXPECTED_MARKER);
2931   }
2932 
2933   void decode_start () {
2934     init_frame();
2935 
2936     if (m_progressive_flag)
2937       init_progressive();
2938     else
2939       init_sequential();
2940   }
2941 
2942   void decode_init (JpegStreamReadFunc rfn) {
2943     initit(rfn);
2944     locate_sof_marker();
2945   }
2946 }
2947 
2948 
2949 // ////////////////////////////////////////////////////////////////////////// //
2950 /// read JPEG image header, determine dimensions and number of components.
2951 /// return `false` if image is not JPEG (i hope).
2952 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2953   if (rfn is null) return false;
2954   auto decoder = jpeg_decoder(rfn);
2955   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2956   if (decoder.error_code != JPGD_SUCCESS) return false;
2957   width = decoder.width;
2958   height = decoder.height;
2959   actual_comps = decoder.num_components;
2960   return true;
2961 }
2962 
2963 
2964 // ////////////////////////////////////////////////////////////////////////// //
2965 /// read JPEG image header, determine dimensions and number of components.
2966 /// return `false` if image is not JPEG (i hope).
2967 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
2968   import core.stdc.stdio;
2969 
2970   FILE* m_pFile;
2971   bool m_eof_flag, m_error_flag;
2972 
2973   if (filename.length == 0) throw new Exception("cannot open unnamed file");
2974   if (filename.length < 512) {
2975     char[513] buffer;
2976     //import core.stdc.stdlib : alloca;
2977     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
2978     tfn[0..filename.length] = filename[];
2979     tfn[filename.length] = 0;
2980     m_pFile = fopen(tfn.ptr, "rb");
2981   } else {
2982     import core.stdc.stdlib : malloc, free;
2983     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
2984     if (tfn !is null) {
2985       scope(exit) free(tfn.ptr);
2986       m_pFile = fopen(tfn.ptr, "rb");
2987     }
2988   }
2989   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
2990   scope(exit) if (m_pFile) fclose(m_pFile);
2991 
2992   return detect_jpeg_image_from_stream(
2993     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
2994       if (m_pFile is null) return -1;
2995       if (m_eof_flag) {
2996         *pEOF_flag = true;
2997         return 0;
2998       }
2999       if (m_error_flag) return -1;
3000       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3001       if (bytes_read < max_bytes_to_read) {
3002         if (ferror(m_pFile)) {
3003           m_error_flag = true;
3004           return -1;
3005         }
3006         m_eof_flag = true;
3007         *pEOF_flag = true;
3008       }
3009       return bytes_read;
3010     },
3011     width, height, actual_comps);
3012 }
3013 
3014 
3015 // ////////////////////////////////////////////////////////////////////////// //
3016 /// read JPEG image header, determine dimensions and number of components.
3017 /// return `false` if image is not JPEG (i hope).
3018 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3019   size_t bufpos;
3020   return detect_jpeg_image_from_stream(
3021     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3022       import core.stdc.string : memcpy;
3023       if (bufpos >= buf.length) {
3024         *pEOF_flag = true;
3025         return 0;
3026       }
3027       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3028       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3029       bufpos += max_bytes_to_read;
3030       return max_bytes_to_read;
3031     },
3032     width, height, actual_comps);
3033 }
3034 
3035 
3036 // ////////////////////////////////////////////////////////////////////////// //
3037 /// decompress JPEG image, what else?
3038 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3039 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3040   import core.stdc.string : memcpy;
3041 
3042   //actual_comps = 0;
3043   if (rfn is null) return null;
3044   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3045 
3046   auto decoder = jpeg_decoder(rfn);
3047   if (decoder.error_code != JPGD_SUCCESS) return null;
3048   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3049 
3050   immutable int image_width = decoder.width;
3051   immutable int image_height = decoder.height;
3052   width = image_width;
3053   height = image_height;
3054   actual_comps = decoder.num_components;
3055   if (req_comps < 0) req_comps = decoder.num_components;
3056 
3057   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3058 
3059   immutable int dst_bpl = image_width*req_comps;
3060 
3061   static if (useMalloc) {
3062     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3063     if (pImage_data is null) return null;
3064     auto idata = pImage_data[0..dst_bpl*image_height];
3065   } else {
3066     auto idata = new ubyte[](dst_bpl*image_height);
3067     auto pImage_data = idata.ptr;
3068   }
3069 
3070   scope(failure) {
3071     static if (useMalloc) {
3072       jpgd_free(pImage_data);
3073     } else {
3074       import core.memory : GC;
3075       GC.free(idata.ptr);
3076       idata = null;
3077     }
3078   }
3079 
3080   for (int y = 0; y < image_height; ++y) {
3081     const(ubyte)* pScan_line;
3082     uint scan_line_len;
3083     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3084       static if (useMalloc) {
3085         jpgd_free(pImage_data);
3086       } else {
3087         import core.memory : GC;
3088         GC.free(idata.ptr);
3089         idata = null;
3090       }
3091       return null;
3092     }
3093 
3094     ubyte* pDst = pImage_data+y*dst_bpl;
3095 
3096     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3097       memcpy(pDst, pScan_line, dst_bpl);
3098     } else if (decoder.num_components == 1) {
3099       if (req_comps == 3) {
3100         for (int x = 0; x < image_width; ++x) {
3101           ubyte luma = pScan_line[x];
3102           pDst[0] = luma;
3103           pDst[1] = luma;
3104           pDst[2] = luma;
3105           pDst += 3;
3106         }
3107       } else {
3108         for (int x = 0; x < image_width; ++x) {
3109           ubyte luma = pScan_line[x];
3110           pDst[0] = luma;
3111           pDst[1] = luma;
3112           pDst[2] = luma;
3113           pDst[3] = 255;
3114           pDst += 4;
3115         }
3116       }
3117     } else if (decoder.num_components == 3) {
3118       if (req_comps == 1) {
3119         immutable int YR = 19595, YG = 38470, YB = 7471;
3120         for (int x = 0; x < image_width; ++x) {
3121           int r = pScan_line[x*4+0];
3122           int g = pScan_line[x*4+1];
3123           int b = pScan_line[x*4+2];
3124           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3125         }
3126       } else {
3127         for (int x = 0; x < image_width; ++x) {
3128           pDst[0] = pScan_line[x*4+0];
3129           pDst[1] = pScan_line[x*4+1];
3130           pDst[2] = pScan_line[x*4+2];
3131           pDst += 3;
3132         }
3133       }
3134     }
3135   }
3136 
3137   return idata;
3138 }
3139 
3140 
3141 // ////////////////////////////////////////////////////////////////////////// //
3142 /// decompress JPEG image from disk file.
3143 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3144 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3145   import core.stdc.stdio;
3146 
3147   FILE* m_pFile;
3148   bool m_eof_flag, m_error_flag;
3149 
3150   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3151   if (filename.length < 512) {
3152 	char[513] buffer;
3153     //import core.stdc.stdlib : alloca;
3154     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3155     tfn[0..filename.length] = filename[];
3156     tfn[filename.length] = 0;
3157     m_pFile = fopen(tfn.ptr, "rb");
3158   } else {
3159     import core.stdc.stdlib : malloc, free;
3160     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3161     if (tfn !is null) {
3162       scope(exit) free(tfn.ptr);
3163       m_pFile = fopen(tfn.ptr, "rb");
3164     }
3165   }
3166   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3167   scope(exit) if (m_pFile) fclose(m_pFile);
3168 
3169   return decompress_jpeg_image_from_stream!useMalloc(
3170     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3171       if (m_pFile is null) return -1;
3172       if (m_eof_flag) {
3173         *pEOF_flag = true;
3174         return 0;
3175       }
3176       if (m_error_flag) return -1;
3177       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3178       if (bytes_read < max_bytes_to_read) {
3179         if (ferror(m_pFile)) {
3180           m_error_flag = true;
3181           return -1;
3182         }
3183         m_eof_flag = true;
3184         *pEOF_flag = true;
3185       }
3186       return bytes_read;
3187     },
3188     width, height, actual_comps, req_comps);
3189 }
3190 
3191 
3192 // ////////////////////////////////////////////////////////////////////////// //
3193 /// decompress JPEG image from memory buffer.
3194 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3195 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3196   size_t bufpos;
3197   return decompress_jpeg_image_from_stream!useMalloc(
3198     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3199       import core.stdc.string : memcpy;
3200       if (bufpos >= buf.length) {
3201         *pEOF_flag = true;
3202         return 0;
3203       }
3204       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3205       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3206       bufpos += max_bytes_to_read;
3207       return max_bytes_to_read;
3208     },
3209     width, height, actual_comps, req_comps);
3210 }
3211 
3212 
3213 // ////////////////////////////////////////////////////////////////////////// //
3214 // if we have access "iv.vfs", add some handy API
3215 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3216 
3217 static if (JpegHasIVVFS) {
3218 import iv.vfs;
3219 
3220 // ////////////////////////////////////////////////////////////////////////// //
3221 /// decompress JPEG image from disk file.
3222 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3223 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3224   return decompress_jpeg_image_from_stream!useMalloc(
3225     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3226       if (!fl.isOpen) return -1;
3227       if (fl.eof) {
3228         *pEOF_flag = true;
3229         return 0;
3230       }
3231       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3232       if (fl.eof) *pEOF_flag = true;
3233       return cast(int)rd.length;
3234     },
3235     width, height, actual_comps, req_comps);
3236 }
3237 // vfs API
3238 }
3239 
3240 
3241 // ////////////////////////////////////////////////////////////////////////// //
3242 // if we have access "arsd.color", add some handy API
3243 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3244 
3245 static if (JpegHasArsd) {
3246 import arsd.color;
3247 
3248 // ////////////////////////////////////////////////////////////////////////// //
3249 /// decompress JPEG image, what else?
3250 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3251   import core.stdc.string : memcpy;
3252   enum req_comps = 4;
3253 
3254   if (rfn is null) return null;
3255 
3256   auto decoder = jpeg_decoder(rfn);
3257   if (decoder.error_code != JPGD_SUCCESS) return null;
3258   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3259 
3260   immutable int image_width = decoder.width;
3261   immutable int image_height = decoder.height;
3262   //width = image_width;
3263   //height = image_height;
3264   //actual_comps = decoder.num_components;
3265 
3266   version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("starting (%dx%d)...\n", image_width, image_height); }}
3267 
3268   if (decoder.begin_decoding() != JPGD_SUCCESS || image_width < 1 || image_height < 1) return null;
3269 
3270   immutable int dst_bpl = image_width*req_comps;
3271   auto img = new TrueColorImage(image_width, image_height);
3272   scope(failure) { img.clearInternal(); img = null; }
3273   ubyte* pImage_data = img.imageData.bytes.ptr;
3274 
3275   for (int y = 0; y < image_height; ++y) {
3276     //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("loading line %d...\n", y); }}
3277 
3278     const(ubyte)* pScan_line;
3279     uint scan_line_len;
3280     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3281       img.clearInternal();
3282       img = null;
3283       //jpgd_free(pImage_data);
3284       return null;
3285     }
3286 
3287     ubyte* pDst = pImage_data+y*dst_bpl;
3288 
3289     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3290       memcpy(pDst, pScan_line, dst_bpl);
3291     } else if (decoder.num_components == 1) {
3292       if (req_comps == 3) {
3293         for (int x = 0; x < image_width; ++x) {
3294           ubyte luma = pScan_line[x];
3295           pDst[0] = luma;
3296           pDst[1] = luma;
3297           pDst[2] = luma;
3298           pDst += 3;
3299         }
3300       } else {
3301         for (int x = 0; x < image_width; ++x) {
3302           ubyte luma = pScan_line[x];
3303           pDst[0] = luma;
3304           pDst[1] = luma;
3305           pDst[2] = luma;
3306           pDst[3] = 255;
3307           pDst += 4;
3308         }
3309       }
3310     } else if (decoder.num_components == 3) {
3311       if (req_comps == 1) {
3312         immutable int YR = 19595, YG = 38470, YB = 7471;
3313         for (int x = 0; x < image_width; ++x) {
3314           int r = pScan_line[x*4+0];
3315           int g = pScan_line[x*4+1];
3316           int b = pScan_line[x*4+2];
3317           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3318         }
3319       } else {
3320         for (int x = 0; x < image_width; ++x) {
3321           pDst[0] = pScan_line[x*4+0];
3322           pDst[1] = pScan_line[x*4+1];
3323           pDst[2] = pScan_line[x*4+2];
3324           pDst += 3;
3325         }
3326       }
3327     }
3328   }
3329 
3330   return img;
3331 }
3332 
3333 
3334 // ////////////////////////////////////////////////////////////////////////// //
3335 /// decompress JPEG image from disk file.
3336 public MemoryImage readJpeg (const(char)[] filename) {
3337   import core.stdc.stdio;
3338 
3339   FILE* m_pFile;
3340   bool m_eof_flag, m_error_flag;
3341 
3342   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3343   if (filename.length < 512) {
3344 	char[513] buffer;
3345     //import core.stdc.stdlib : alloca;
3346     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3347     tfn[0..filename.length] = filename[];
3348     tfn[filename.length] = 0;
3349     m_pFile = fopen(tfn.ptr, "rb");
3350   } else {
3351     import core.stdc.stdlib : malloc, free;
3352     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3353     if (tfn !is null) {
3354       scope(exit) free(tfn.ptr);
3355       m_pFile = fopen(tfn.ptr, "rb");
3356     }
3357   }
3358   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3359   scope(exit) if (m_pFile) fclose(m_pFile);
3360 
3361   return readJpegFromStream(
3362     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3363       if (m_pFile is null) return -1;
3364       if (m_eof_flag) {
3365         *pEOF_flag = true;
3366         return 0;
3367       }
3368       if (m_error_flag) return -1;
3369       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3370       if (bytes_read < max_bytes_to_read) {
3371         if (ferror(m_pFile)) {
3372           m_error_flag = true;
3373           return -1;
3374         }
3375         m_eof_flag = true;
3376         *pEOF_flag = true;
3377       }
3378       return bytes_read;
3379     }
3380   );
3381 }
3382 
3383 
3384 // ////////////////////////////////////////////////////////////////////////// //
3385 /// decompress JPEG image from memory buffer.
3386 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3387   size_t bufpos;
3388   return readJpegFromStream(
3389     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3390       import core.stdc.string : memcpy;
3391       if (bufpos >= buf.length) {
3392         *pEOF_flag = true;
3393         return 0;
3394       }
3395       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3396       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3397       bufpos += max_bytes_to_read;
3398       return max_bytes_to_read;
3399     }
3400   );
3401 }
3402 // done with arsd API
3403 }
3404 
3405 
3406 static if (JpegHasIVVFS) {
3407 public MemoryImage readJpeg (VFile fl) {
3408   return readJpegFromStream(
3409     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3410       if (!fl.isOpen) return -1;
3411       if (fl.eof) {
3412         *pEOF_flag = true;
3413         return 0;
3414       }
3415       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3416       if (fl.eof) *pEOF_flag = true;
3417       return cast(int)rd.length;
3418     }
3419   );
3420 }
3421 
3422 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3423   return detect_jpeg_image_from_stream(
3424     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3425       if (!fl.isOpen) return -1;
3426       if (fl.eof) {
3427         *pEOF_flag = true;
3428         return 0;
3429       }
3430       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3431       if (fl.eof) *pEOF_flag = true;
3432       return cast(int)rd.length;
3433     },
3434     width, height, actual_comps);
3435 }
3436 // vfs API
3437 }
3438 
3439 
3440 // ////////////////////////////////////////////////////////////////////////// //
3441 version(jpegd_test) {
3442 import arsd.color;
3443 import arsd.png;
3444 
3445 void main (string[] args) {
3446   import std.stdio;
3447   int width, height, comps;
3448   {
3449     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3450     writeln(width, "x", height, "x", comps);
3451     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3452     writeln(img.width, "x", img.height);
3453     writePng("z00.png", img);
3454   }
3455   {
3456     ubyte[] file;
3457     {
3458       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3459       file.length = cast(int)fl.size;
3460       fl.rawRead(file[]);
3461     }
3462     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3463     writeln(width, "x", height, "x", comps);
3464     auto img = readJpegFromMemory(file[]);
3465     writeln(img.width, "x", img.height);
3466     writePng("z01.png", img);
3467   }
3468 }
3469 }