1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  *
40  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
41  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
42  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
43  */
44 module arsd.jpeg;
45 
46 @system:
47 
48 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
49 // This is slower, but results in higher quality on images with highly saturated colors.
50 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
51 
52 /// Input stream interface.
53 /// This delegate is called when the internal input buffer is empty.
54 /// Parameters:
55 ///   pBuf - input buffer
56 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
57 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
58 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
59 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
60 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
61 
62 
63 // ////////////////////////////////////////////////////////////////////////// //
64 private:
65 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
66 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
67 
68 // Success/failure error codes.
69 alias jpgd_status = int;
70 enum /*jpgd_status*/ {
71   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
72   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
73   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
74   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
75   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
76   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
77   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
78   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
79   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
80 }
81 
82 enum {
83   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
84   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
85 }
86 
87 // DCT coefficients are stored in this sequence.
88 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
89 
90 alias JPEG_MARKER = int;
91 enum /*JPEG_MARKER*/ {
92   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
93   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
94   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
95   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
96   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
97   M_APP1  = 0xE1,
98 }
99 
100 alias JPEG_SUBSAMPLING = int;
101 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }
102 
103 enum CONST_BITS = 13;
104 enum PASS1_BITS = 2;
105 enum SCALEDONE = cast(int)1;
106 
107 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
108 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
109 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
110 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
111 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
112 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
113 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
114 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
115 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
116 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
117 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
118 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
119 
120 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
121 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
122 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
123 
124 
125 // Compiler creates a fast path 1D IDCT for X non-zero columns
126 struct Row(int NONZERO_COLS) {
127 pure nothrow @trusted @nogc:
128   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
129     static if (NONZERO_COLS == 0) {
130       // nothing
131     } else static if (NONZERO_COLS == 1) {
132       immutable int dcval = (pSrc[0] << PASS1_BITS);
133       pTemp[0] = dcval;
134       pTemp[1] = dcval;
135       pTemp[2] = dcval;
136       pTemp[3] = dcval;
137       pTemp[4] = dcval;
138       pTemp[5] = dcval;
139       pTemp[6] = dcval;
140       pTemp[7] = dcval;
141     } else {
142       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
143       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
144       template ACCESS_COL(int x) {
145         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
146       }
147 
148       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
149 
150       immutable int z1 = (z2 + z3)*FIX_0_541196100;
151       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
152       immutable int tmp3 = z1 + z2*FIX_0_765366865;
153 
154       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
155       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
156 
157       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
158 
159       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
160 
161       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
162       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
163 
164       immutable int az1 = bz1*(-FIX_0_899976223);
165       immutable int az2 = bz2*(-FIX_2_562915447);
166       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
167       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
168 
169       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
170       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
171       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
172       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
173 
174       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
175       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
176       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
177       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
178       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
179       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
180       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
181       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
182     }
183   }
184 }
185 
186 
187 // Compiler creates a fast path 1D IDCT for X non-zero rows
188 struct Col (int NONZERO_ROWS) {
189 pure nothrow @trusted @nogc:
190   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
191     static assert(NONZERO_ROWS > 0);
192     static if (NONZERO_ROWS == 1) {
193       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
194       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
195       pDst_ptr[0*8] = dcval_clamped;
196       pDst_ptr[1*8] = dcval_clamped;
197       pDst_ptr[2*8] = dcval_clamped;
198       pDst_ptr[3*8] = dcval_clamped;
199       pDst_ptr[4*8] = dcval_clamped;
200       pDst_ptr[5*8] = dcval_clamped;
201       pDst_ptr[6*8] = dcval_clamped;
202       pDst_ptr[7*8] = dcval_clamped;
203     } else {
204       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
205       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
206       template ACCESS_ROW(int x) {
207         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
208       }
209 
210       immutable int z2 = mixin(ACCESS_ROW!2);
211       immutable int z3 = mixin(ACCESS_ROW!6);
212 
213       immutable int z1 = (z2 + z3)*FIX_0_541196100;
214       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
215       immutable int tmp3 = z1 + z2*FIX_0_765366865;
216 
217       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
218       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
219 
220       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
221 
222       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
223 
224       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
225       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
226 
227       immutable int az1 = bz1*(-FIX_0_899976223);
228       immutable int az2 = bz2*(-FIX_2_562915447);
229       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
230       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
231 
232       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
233       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
234       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
235       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
236 
237       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
238       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
239 
240       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
241       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
242 
243       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
244       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
245 
246       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
247       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
248 
249       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
250       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
251 
252       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
253       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
254 
255       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
256       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
257 
258       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
259       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
260     }
261   }
262 }
263 
264 
265 static immutable ubyte[512] s_idct_row_table = [
266   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
267   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
268   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
269   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
270   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
271   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
272   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
273   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
274 ];
275 
276 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
277 
278 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
279   assert(block_max_zag >= 1);
280   assert(block_max_zag <= 64);
281 
282   if (block_max_zag <= 1)
283   {
284     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
285     k = CLAMP(k);
286     k = k | (k<<8);
287     k = k | (k<<16);
288 
289     for (int i = 8; i > 0; i--)
290     {
291       *cast(int*)&pDst_ptr[0] = k;
292       *cast(int*)&pDst_ptr[4] = k;
293       pDst_ptr += 8;
294     }
295     return;
296   }
297 
298   int[64] temp;
299 
300   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
301   int* pTemp = temp.ptr;
302 
303   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
304   int i;
305   for (i = 8; i > 0; i--, pRow_tab++)
306   {
307     switch (*pRow_tab)
308     {
309       case 0: Row!(0).idct(pTemp, pSrc); break;
310       case 1: Row!(1).idct(pTemp, pSrc); break;
311       case 2: Row!(2).idct(pTemp, pSrc); break;
312       case 3: Row!(3).idct(pTemp, pSrc); break;
313       case 4: Row!(4).idct(pTemp, pSrc); break;
314       case 5: Row!(5).idct(pTemp, pSrc); break;
315       case 6: Row!(6).idct(pTemp, pSrc); break;
316       case 7: Row!(7).idct(pTemp, pSrc); break;
317       case 8: Row!(8).idct(pTemp, pSrc); break;
318       default: assert(0);
319     }
320 
321     pSrc += 8;
322     pTemp += 8;
323   }
324 
325   pTemp = temp.ptr;
326 
327   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
328   for (i = 8; i > 0; i--)
329   {
330     switch (nonzero_rows)
331     {
332       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
333       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
334       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
335       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
336       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
337       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
338       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
339       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
340       default: assert(0);
341     }
342 
343     pTemp++;
344     pDst_ptr++;
345   }
346 }
347 
348 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
349   int[64] temp;
350   int* pTemp = temp.ptr;
351   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
352 
353   for (int i = 4; i > 0; i--)
354   {
355     Row!(4).idct(pTemp, pSrc);
356     pSrc += 8;
357     pTemp += 8;
358   }
359 
360   pTemp = temp.ptr;
361   for (int i = 8; i > 0; i--)
362   {
363     Col!(4).idct(pDst_ptr, pTemp);
364     pTemp++;
365     pDst_ptr++;
366   }
367 }
368 
369 
370 // ////////////////////////////////////////////////////////////////////////// //
371 struct jpeg_decoder {
372 private import core.stdc.string : memcpy, memset;
373 private:
374   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
375   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
376 
377   alias jpgd_quant_t = short;
378   alias jpgd_block_t = short;
379   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
380 
381   static struct huff_tables {
382     bool ac_table;
383     uint[256] look_up;
384     uint[256] look_up2;
385     ubyte[256] code_size;
386     uint[512] tree;
387   }
388 
389   static struct coeff_buf {
390     ubyte* pData;
391     int block_num_x, block_num_y;
392     int block_len_x, block_len_y;
393     int block_size;
394   }
395 
396   static struct mem_block {
397     mem_block* m_pNext;
398     size_t m_used_count;
399     size_t m_size;
400     char[1] m_data;
401   }
402 
403   mem_block* m_pMem_blocks;
404   int m_image_x_size;
405   int m_image_y_size;
406   JpegStreamReadFunc readfn;
407   int m_progressive_flag;
408   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
409   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
410   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
411   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
412   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
413   int m_comps_in_frame;                         // # of components in frame
414   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
415   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
416   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
417   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
418   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
419   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
420   int m_comps_in_scan;                          // # of components in scan
421   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
422   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
423   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
424   int m_spectral_start;                         // spectral selection start
425   int m_spectral_end;                           // spectral selection end
426   int m_successive_low;                         // successive approximation low
427   int m_successive_high;                        // successive approximation high
428   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
429   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
430   int m_blocks_per_mcu;
431   int m_max_blocks_per_row;
432   int m_mcus_per_row, m_mcus_per_col;
433   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
434   int m_total_lines_left;                       // total # lines left in image
435   int m_mcu_lines_left;                         // total # lines left in this MCU
436   int m_real_dest_bytes_per_scan_line;
437   int m_dest_bytes_per_scan_line;               // rounded up
438   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
439   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
440   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
441   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
442   int m_eob_run;
443   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
444   ubyte* m_pIn_buf_ofs;
445   int m_in_buf_left;
446   int m_tem_flag;
447   bool m_eof_flag;
448   ubyte[128] m_in_buf_pad_start;
449   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
450   ubyte[128] m_in_buf_pad_end;
451   int m_bits_left;
452   uint m_bit_buf;
453   int m_restart_interval;
454   int m_restarts_left;
455   int m_next_restart_num;
456   int m_max_mcus_per_row;
457   int m_max_blocks_per_mcu;
458   int m_expanded_blocks_per_mcu;
459   int m_expanded_blocks_per_row;
460   int m_expanded_blocks_per_component;
461   bool m_freq_domain_chroma_upsample;
462   int m_max_mcus_per_col;
463   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
464   jpgd_block_t* m_pMCU_coefficients;
465   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
466   ubyte* m_pSample_buf;
467   int[256] m_crr;
468   int[256] m_cbb;
469   int[256] m_crg;
470   int[256] m_cbg;
471   ubyte* m_pScan_line_0;
472   ubyte* m_pScan_line_1;
473   jpgd_status m_error_code;
474   bool m_ready_flag;
475   int m_total_bytes_read;
476 
477 public:
478   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
479   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
480   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
481 
482   ~this () { free_all_blocks(); }
483 
484   @disable this (this); // no copies
485 
486   // Call this method after constructing the object to begin decompression.
487   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
488   int begin_decoding () {
489     if (m_ready_flag) return JPGD_SUCCESS;
490     if (m_error_code) return JPGD_FAILED;
491     try {
492       decode_start();
493       m_ready_flag = true;
494       return JPGD_SUCCESS;
495     } catch (Exception e) {
496       //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("ERROR: %.*s...\n", cast(int)e.msg.length, e.msg.ptr); }}
497       version(jpegd_test) {{ import std.stdio; stderr.writeln(e.toString); }}
498     }
499     return JPGD_FAILED;
500   }
501 
502   // Returns the next scan line.
503   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
504   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
505   // Returns JPGD_SUCCESS if a scan line has been returned.
506   // Returns JPGD_DONE if all scan lines have been returned.
507   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
508   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
509     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
510     if (m_total_lines_left == 0) return JPGD_DONE;
511     try {
512       if (m_mcu_lines_left == 0) {
513         if (m_progressive_flag) load_next_row(); else decode_next_row();
514         // Find the EOI marker if that was the last row.
515         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
516         m_mcu_lines_left = m_max_mcu_y_size;
517       }
518       if (m_freq_domain_chroma_upsample) {
519         expanded_convert();
520         *pScan_line = m_pScan_line_0;
521       } else {
522         switch (m_scan_type) {
523           case JPGD_YH2V2:
524             if ((m_mcu_lines_left & 1) == 0) {
525               H2V2Convert();
526               *pScan_line = m_pScan_line_0;
527             } else {
528               *pScan_line = m_pScan_line_1;
529             }
530             break;
531           case JPGD_YH2V1:
532             H2V1Convert();
533             *pScan_line = m_pScan_line_0;
534             break;
535           case JPGD_YH1V2:
536             if ((m_mcu_lines_left & 1) == 0) {
537               H1V2Convert();
538               *pScan_line = m_pScan_line_0;
539             } else {
540               *pScan_line = m_pScan_line_1;
541             }
542             break;
543           case JPGD_YH1V1:
544             H1V1Convert();
545             *pScan_line = m_pScan_line_0;
546             break;
547           case JPGD_GRAYSCALE:
548             gray_convert();
549             *pScan_line = m_pScan_line_0;
550             break;
551           default:
552         }
553       }
554       *pScan_line_len = m_real_dest_bytes_per_scan_line;
555       --m_mcu_lines_left;
556       --m_total_lines_left;
557       return JPGD_SUCCESS;
558     } catch (Exception) {}
559     return JPGD_FAILED;
560   }
561 
562   @property const pure nothrow @trusted @nogc {
563     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
564 
565     int width () { pragma(inline, true); return m_image_x_size; }
566     int height () { pragma(inline, true); return m_image_y_size; }
567 
568     int num_components () { pragma(inline, true); return m_comps_in_frame; }
569 
570     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
571     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
572 
573     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
574     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
575   }
576 
577 private:
578   // Retrieve one character from the input stream.
579   uint get_char () {
580     // Any bytes remaining in buffer?
581     if (!m_in_buf_left) {
582       // Try to get more bytes.
583       prep_in_buffer();
584       // Still nothing to get?
585       if (!m_in_buf_left) {
586         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
587         int t = m_tem_flag;
588         m_tem_flag ^= 1;
589         return (t ? 0xD9 : 0xFF);
590       }
591     }
592     uint c = *m_pIn_buf_ofs++;
593     --m_in_buf_left;
594     return c;
595   }
596 
597   // Same as previous method, except can indicate if the character is a pad character or not.
598   uint get_char (bool* pPadding_flag) {
599     if (!m_in_buf_left) {
600       prep_in_buffer();
601       if (!m_in_buf_left) {
602         *pPadding_flag = true;
603         int t = m_tem_flag;
604         m_tem_flag ^= 1;
605         return (t ? 0xD9 : 0xFF);
606       }
607     }
608     *pPadding_flag = false;
609     uint c = *m_pIn_buf_ofs++;
610     --m_in_buf_left;
611     return c;
612   }
613 
614   // Inserts a previously retrieved character back into the input buffer.
615   void stuff_char (ubyte q) {
616     *(--m_pIn_buf_ofs) = q;
617     m_in_buf_left++;
618   }
619 
620   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
621   ubyte get_octet () {
622     bool padding_flag;
623     int c = get_char(&padding_flag);
624     if (c == 0xFF) {
625       if (padding_flag) return 0xFF;
626       c = get_char(&padding_flag);
627       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
628       if (c == 0x00) return 0xFF;
629       stuff_char(cast(ubyte)(c));
630       stuff_char(0xFF);
631       return 0xFF;
632     }
633     return cast(ubyte)(c);
634   }
635 
636   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
637   uint get_bits (int num_bits) {
638     if (!num_bits) return 0;
639     uint i = m_bit_buf >> (32 - num_bits);
640     if ((m_bits_left -= num_bits) <= 0) {
641       m_bit_buf <<= (num_bits += m_bits_left);
642       uint c1 = get_char();
643       uint c2 = get_char();
644       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
645       m_bit_buf <<= -m_bits_left;
646       m_bits_left += 16;
647       assert(m_bits_left >= 0);
648     } else {
649       m_bit_buf <<= num_bits;
650     }
651     return i;
652   }
653 
654   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
655   uint get_bits_no_markers (int num_bits) {
656     if (!num_bits) return 0;
657     uint i = m_bit_buf >> (32 - num_bits);
658     if ((m_bits_left -= num_bits) <= 0) {
659       m_bit_buf <<= (num_bits += m_bits_left);
660       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
661         uint c1 = get_octet();
662         uint c2 = get_octet();
663         m_bit_buf |= (c1 << 8) | c2;
664       } else {
665         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
666         m_in_buf_left -= 2;
667         m_pIn_buf_ofs += 2;
668       }
669       m_bit_buf <<= -m_bits_left;
670       m_bits_left += 16;
671       assert(m_bits_left >= 0);
672     } else {
673       m_bit_buf <<= num_bits;
674     }
675     return i;
676   }
677 
678   // Decodes a Huffman encoded symbol.
679   int huff_decode (huff_tables *pH) {
680     int symbol;
681     // Check first 8-bits: do we have a complete symbol?
682     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
683       // Decode more bits, use a tree traversal to find symbol.
684       int ofs = 23;
685       do {
686         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
687         --ofs;
688       } while (symbol < 0);
689       get_bits_no_markers(8 + (23 - ofs));
690     } else {
691       get_bits_no_markers(pH.code_size.ptr[symbol]);
692     }
693     return symbol;
694   }
695 
696   // Decodes a Huffman encoded symbol.
697   int huff_decode (huff_tables *pH, ref int extra_bits) {
698     int symbol;
699     // Check first 8-bits: do we have a complete symbol?
700     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
701       // Use a tree traversal to find symbol.
702       int ofs = 23;
703       do {
704         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
705         --ofs;
706       } while (symbol < 0);
707       get_bits_no_markers(8 + (23 - ofs));
708       extra_bits = get_bits_no_markers(symbol & 0xF);
709     } else {
710       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
711       if (symbol & 0x8000) {
712         get_bits_no_markers((symbol >> 8) & 31);
713         extra_bits = symbol >> 16;
714       } else {
715         int code_size = (symbol >> 8) & 31;
716         int num_extra_bits = symbol & 0xF;
717         int bits = code_size + num_extra_bits;
718         if (bits <= (m_bits_left + 16)) {
719           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
720         } else {
721           get_bits_no_markers(code_size);
722           extra_bits = get_bits_no_markers(num_extra_bits);
723         }
724       }
725       symbol &= 0xFF;
726     }
727     return symbol;
728   }
729 
730   // Tables and macro used to fully decode the DPCM differences.
731   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
732   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
733   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
734   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
735   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
736   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
737 
738   // Clamps a value between 0-255.
739   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
740   alias clamp = CLAMP;
741 
742   static struct DCT_Upsample {
743   static:
744     static struct Matrix44 {
745     pure nothrow @trusted @nogc:
746       alias Element_Type = int;
747       enum { NUM_ROWS = 4, NUM_COLS = 4 }
748 
749       Element_Type[NUM_COLS][NUM_ROWS] v;
750 
751       this() (const scope auto ref Matrix44 m) {
752         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
753       }
754 
755       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
756       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
757 
758       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
759 
760       ref Matrix44 opOpAssign(string op:"+") (const scope auto ref Matrix44 a) {
761         foreach (int r; 0..NUM_ROWS) {
762           at(r, 0) += a.at(r, 0);
763           at(r, 1) += a.at(r, 1);
764           at(r, 2) += a.at(r, 2);
765           at(r, 3) += a.at(r, 3);
766         }
767         return this;
768       }
769 
770       ref Matrix44 opOpAssign(string op:"-") (const scope auto ref Matrix44 a) {
771         foreach (int r; 0..NUM_ROWS) {
772           at(r, 0) -= a.at(r, 0);
773           at(r, 1) -= a.at(r, 1);
774           at(r, 2) -= a.at(r, 2);
775           at(r, 3) -= a.at(r, 3);
776         }
777         return this;
778       }
779 
780       Matrix44 opBinary(string op:"+") (const scope auto ref Matrix44 b) const {
781         alias a = this;
782         Matrix44 ret;
783         foreach (int r; 0..NUM_ROWS) {
784           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
785           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
786           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
787           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
788         }
789         return ret;
790       }
791 
792       Matrix44 opBinary(string op:"-") (const scope auto ref Matrix44 b) const {
793         alias a = this;
794         Matrix44 ret;
795         foreach (int r; 0..NUM_ROWS) {
796           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
797           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
798           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
799           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
800         }
801         return ret;
802       }
803 
804       static void add_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
805         foreach (int r; 0..4) {
806           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
807           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
808           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
809           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
810         }
811       }
812 
813       static void sub_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
814         foreach (int r; 0..4) {
815           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
816           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
817           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
818           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
819         }
820       }
821     }
822 
823     enum FRACT_BITS = 10;
824     enum SCALE = 1 << FRACT_BITS;
825 
826     alias Temp_Type = int;
827     //TODO: convert defines to mixins
828     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
829     //#define F(i) ((int)((i) * SCALE + .5f))
830     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
831     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
832 
833     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
834     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
835 
836     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
837     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
838       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
839         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
840         template AT(int c, int r) {
841           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
842         }
843         // 4x8 = 4x8 times 8x8, matrix 0 is constant
844         immutable Temp_Type X000 = mixin(AT!(0, 0));
845         immutable Temp_Type X001 = mixin(AT!(0, 1));
846         immutable Temp_Type X002 = mixin(AT!(0, 2));
847         immutable Temp_Type X003 = mixin(AT!(0, 3));
848         immutable Temp_Type X004 = mixin(AT!(0, 4));
849         immutable Temp_Type X005 = mixin(AT!(0, 5));
850         immutable Temp_Type X006 = mixin(AT!(0, 6));
851         immutable Temp_Type X007 = mixin(AT!(0, 7));
852         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
853         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
854         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
855         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
856         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
857         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
858         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
859         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
860         immutable Temp_Type X020 = mixin(AT!(4, 0));
861         immutable Temp_Type X021 = mixin(AT!(4, 1));
862         immutable Temp_Type X022 = mixin(AT!(4, 2));
863         immutable Temp_Type X023 = mixin(AT!(4, 3));
864         immutable Temp_Type X024 = mixin(AT!(4, 4));
865         immutable Temp_Type X025 = mixin(AT!(4, 5));
866         immutable Temp_Type X026 = mixin(AT!(4, 6));
867         immutable Temp_Type X027 = mixin(AT!(4, 7));
868         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
869         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
870         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
871         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
872         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
873         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
874         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
875         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
876 
877         // 4x4 = 4x8 times 8x4, matrix 1 is constant
878         P.at(0, 0) = X000;
879         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
880         P.at(0, 2) = X004;
881         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
882         P.at(1, 0) = X010;
883         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
884         P.at(1, 2) = X014;
885         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
886         P.at(2, 0) = X020;
887         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
888         P.at(2, 2) = X024;
889         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
890         P.at(3, 0) = X030;
891         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
892         P.at(3, 2) = X034;
893         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
894         // 40 muls 24 adds
895 
896         // 4x4 = 4x8 times 8x4, matrix 1 is constant
897         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
898         Q.at(0, 1) = X002;
899         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
900         Q.at(0, 3) = X006;
901         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
902         Q.at(1, 1) = X012;
903         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
904         Q.at(1, 3) = X016;
905         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
906         Q.at(2, 1) = X022;
907         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
908         Q.at(2, 3) = X026;
909         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
910         Q.at(3, 1) = X032;
911         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
912         Q.at(3, 3) = X036;
913         // 40 muls 24 adds
914       }
915     }
916 
917     static struct R_S(int NUM_ROWS, int NUM_COLS) {
918       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
919         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
920         template AT(int c, int r) {
921           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
922         }
923         // 4x8 = 4x8 times 8x8, matrix 0 is constant
924         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
925         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
926         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
927         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
928         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
929         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
930         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
931         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
932         immutable Temp_Type X110 = mixin(AT!(2, 0));
933         immutable Temp_Type X111 = mixin(AT!(2, 1));
934         immutable Temp_Type X112 = mixin(AT!(2, 2));
935         immutable Temp_Type X113 = mixin(AT!(2, 3));
936         immutable Temp_Type X114 = mixin(AT!(2, 4));
937         immutable Temp_Type X115 = mixin(AT!(2, 5));
938         immutable Temp_Type X116 = mixin(AT!(2, 6));
939         immutable Temp_Type X117 = mixin(AT!(2, 7));
940         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
941         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
942         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
943         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
944         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
945         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
946         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
947         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
948         immutable Temp_Type X130 = mixin(AT!(6, 0));
949         immutable Temp_Type X131 = mixin(AT!(6, 1));
950         immutable Temp_Type X132 = mixin(AT!(6, 2));
951         immutable Temp_Type X133 = mixin(AT!(6, 3));
952         immutable Temp_Type X134 = mixin(AT!(6, 4));
953         immutable Temp_Type X135 = mixin(AT!(6, 5));
954         immutable Temp_Type X136 = mixin(AT!(6, 6));
955         immutable Temp_Type X137 = mixin(AT!(6, 7));
956         // 80 muls 48 adds
957 
958         // 4x4 = 4x8 times 8x4, matrix 1 is constant
959         R.at(0, 0) = X100;
960         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
961         R.at(0, 2) = X104;
962         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
963         R.at(1, 0) = X110;
964         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
965         R.at(1, 2) = X114;
966         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
967         R.at(2, 0) = X120;
968         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
969         R.at(2, 2) = X124;
970         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
971         R.at(3, 0) = X130;
972         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
973         R.at(3, 2) = X134;
974         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
975         // 40 muls 24 adds
976         // 4x4 = 4x8 times 8x4, matrix 1 is constant
977         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
978         S.at(0, 1) = X102;
979         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
980         S.at(0, 3) = X106;
981         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
982         S.at(1, 1) = X112;
983         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
984         S.at(1, 3) = X116;
985         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
986         S.at(2, 1) = X122;
987         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
988         S.at(2, 3) = X126;
989         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
990         S.at(3, 1) = X132;
991         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
992         S.at(3, 3) = X136;
993         // 40 muls 24 adds
994       }
995     }
996   } // end namespace DCT_Upsample
997 
998   // Unconditionally frees all allocated m_blocks.
999   void free_all_blocks () {
1000     //m_pStream = null;
1001     readfn = null;
1002     for (mem_block *b = m_pMem_blocks; b; ) {
1003       mem_block* n = b.m_pNext;
1004       jpgd_free(b);
1005       b = n;
1006     }
1007     m_pMem_blocks = null;
1008   }
1009 
1010   // This method handles all errors. It will never return.
1011   // It could easily be changed to use C++ exceptions.
1012   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status, size_t line=__LINE__) {
1013     m_error_code = status;
1014     free_all_blocks();
1015     //longjmp(m_jmp_state, status);
1016     throw new Exception("jpeg decoding error", __FILE__, line);
1017   }
1018 
1019   void* alloc (size_t nSize, bool zero=false) {
1020     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1021     char *rv = null;
1022     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1023     {
1024       if ((b.m_used_count + nSize) <= b.m_size)
1025       {
1026         rv = b.m_data.ptr + b.m_used_count;
1027         b.m_used_count += nSize;
1028         break;
1029       }
1030     }
1031     if (!rv)
1032     {
1033       size_t capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1034       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1035       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1036       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1037       b.m_used_count = nSize;
1038       b.m_size = capacity;
1039       rv = b.m_data.ptr;
1040     }
1041     if (zero) memset(rv, 0, nSize);
1042     return rv;
1043   }
1044 
1045   void word_clear (void *p, ushort c, uint n) {
1046     ubyte *pD = cast(ubyte*)p;
1047     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1048     while (n)
1049     {
1050       pD[0] = l; pD[1] = h; pD += 2;
1051       n--;
1052     }
1053   }
1054 
1055   // Refill the input buffer.
1056   // This method will sit in a loop until (A) the buffer is full or (B)
1057   // the stream's read() method reports and end of file condition.
1058   void prep_in_buffer () {
1059     m_in_buf_left = 0;
1060     m_pIn_buf_ofs = m_in_buf.ptr;
1061 
1062     if (m_eof_flag)
1063       return;
1064 
1065     do
1066     {
1067       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1068       if (bytes_read == -1)
1069         stop_decoding(JPGD_STREAM_READ);
1070 
1071       m_in_buf_left += bytes_read;
1072     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1073 
1074     m_total_bytes_read += m_in_buf_left;
1075 
1076     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1077     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1078     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1079   }
1080 
1081   // Read a Huffman code table.
1082   void read_dht_marker () {
1083     int i, index, count;
1084     ubyte[17] huff_num;
1085     ubyte[256] huff_val;
1086 
1087     uint num_left = get_bits(16);
1088 
1089     if (num_left < 2)
1090       stop_decoding(JPGD_BAD_DHT_MARKER);
1091 
1092     num_left -= 2;
1093 
1094     while (num_left)
1095     {
1096       index = get_bits(8);
1097 
1098       huff_num.ptr[0] = 0;
1099 
1100       count = 0;
1101 
1102       for (i = 1; i <= 16; i++)
1103       {
1104         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1105         count += huff_num.ptr[i];
1106       }
1107 
1108       if (count > 255)
1109         stop_decoding(JPGD_BAD_DHT_COUNTS);
1110 
1111       for (i = 0; i < count; i++)
1112         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1113 
1114       i = 1 + 16 + count;
1115 
1116       if (num_left < cast(uint)i)
1117         stop_decoding(JPGD_BAD_DHT_MARKER);
1118 
1119       num_left -= i;
1120 
1121       if ((index & 0x10) > 0x10)
1122         stop_decoding(JPGD_BAD_DHT_INDEX);
1123 
1124       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1125 
1126       if (index >= JPGD_MAX_HUFF_TABLES)
1127         stop_decoding(JPGD_BAD_DHT_INDEX);
1128 
1129       if (!m_huff_num.ptr[index])
1130         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1131 
1132       if (!m_huff_val.ptr[index])
1133         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1134 
1135       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1136       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1137       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1138     }
1139   }
1140 
1141   // Read a quantization table.
1142   void read_dqt_marker () {
1143     int n, i, prec;
1144     uint num_left;
1145     uint temp;
1146 
1147     num_left = get_bits(16);
1148 
1149     if (num_left < 2)
1150       stop_decoding(JPGD_BAD_DQT_MARKER);
1151 
1152     num_left -= 2;
1153 
1154     while (num_left)
1155     {
1156       n = get_bits(8);
1157       prec = n >> 4;
1158       n &= 0x0F;
1159 
1160       if (n >= JPGD_MAX_QUANT_TABLES)
1161         stop_decoding(JPGD_BAD_DQT_TABLE);
1162 
1163       if (!m_quant.ptr[n])
1164         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1165 
1166       // read quantization entries, in zag order
1167       for (i = 0; i < 64; i++)
1168       {
1169         temp = get_bits(8);
1170 
1171         if (prec)
1172           temp = (temp << 8) + get_bits(8);
1173 
1174         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1175       }
1176 
1177       i = 64 + 1;
1178 
1179       if (prec)
1180         i += 64;
1181 
1182       if (num_left < cast(uint)i)
1183         stop_decoding(JPGD_BAD_DQT_LENGTH);
1184 
1185       num_left -= i;
1186     }
1187   }
1188 
1189   // Read the start of frame (SOF) marker.
1190   void read_sof_marker () {
1191     int i;
1192     uint num_left;
1193 
1194     num_left = get_bits(16);
1195 
1196     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1197       stop_decoding(JPGD_BAD_PRECISION);
1198 
1199     m_image_y_size = get_bits(16);
1200 
1201     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1202       stop_decoding(JPGD_BAD_HEIGHT);
1203 
1204     m_image_x_size = get_bits(16);
1205 
1206     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1207       stop_decoding(JPGD_BAD_WIDTH);
1208 
1209     m_comps_in_frame = get_bits(8);
1210 
1211     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1212       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1213 
1214     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1215       stop_decoding(JPGD_BAD_SOF_LENGTH);
1216 
1217     for (i = 0; i < m_comps_in_frame; i++)
1218     {
1219       m_comp_ident.ptr[i]  = get_bits(8);
1220       m_comp_h_samp.ptr[i] = get_bits(4);
1221       m_comp_v_samp.ptr[i] = get_bits(4);
1222       m_comp_quant.ptr[i]  = get_bits(8);
1223     }
1224   }
1225 
1226   private void exif_enforce(bool what) {
1227 	if(!what)
1228 		throw new Exception("jpeg exif data format error");
1229   }
1230 
1231   void read_exif_marker() {
1232     uint num_left;
1233 
1234     num_left = get_bits(16);
1235 
1236     if (num_left < 2)
1237       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1238 
1239     num_left -= 2;
1240 
1241     ubyte[] data;
1242     data.length = num_left;
1243     int offset;
1244 
1245     while (num_left)
1246     {
1247       data[offset++] = cast(ubyte) get_bits(8);
1248       num_left--;
1249     }
1250 
1251     if(data.length > 4 && data[0 .. 4] == "Exif") {
1252 	data = data[4 .. $];
1253 	while(data.length && data[0] == 0)
1254 		data = data[1 .. $];
1255 	if(data.length < 8)
1256 		return; // abandon the parse, no tiff header
1257 
1258 	int offsetAdjustment = 0;
1259 
1260 	bool bigEndian = data[0] == 'M';
1261 	// should be MM or II
1262 	exif_enforce(data[0] == data[1]);
1263 	if(!bigEndian)
1264 		exif_enforce(data[0] == 'I');
1265 	data = data[2 .. $];
1266 	offsetAdjustment += 2;
1267 
1268 	uint read4() {
1269 		exif_enforce(data.length >= 4);
1270 
1271 		uint ret;
1272 		if(bigEndian) {
1273 			ret |= data[0] << 24;
1274 			ret |= data[1] << 16;
1275 			ret |= data[2] <<  8;
1276 			ret |= data[3] <<  0;
1277 		} else {
1278 			ret |= data[3] << 24;
1279 			ret |= data[2] << 16;
1280 			ret |= data[1] <<  8;
1281 			ret |= data[0] <<  0;
1282 		}
1283 
1284 		data = data[4 .. $];
1285 		offsetAdjustment += 4;
1286 		return ret;
1287 	}
1288 
1289 	ushort read2() {
1290 		exif_enforce(data.length >= 2);
1291 
1292 		ushort ret;
1293 		if(bigEndian) {
1294 			ret |= data[0] << 8;
1295 			ret |= data[1] << 0;
1296 		} else {
1297 			ret |= data[1] << 8;
1298 			ret |= data[0] << 0;
1299 		}
1300 
1301 		data = data[2 .. $];
1302 		offsetAdjustment += 2;
1303 		return ret;
1304 	}
1305 
1306 	ubyte read1() {
1307 		exif_enforce(data.length >= 1);
1308 		ubyte ret = data[0];
1309 		data = data[1 .. $];
1310 		offsetAdjustment += 1;
1311 		return ret;
1312 	}
1313 
1314 	void jumpOffset(uint offset) {
1315 		exif_enforce(offsetAdjustment <= offset);
1316 		offset -= offsetAdjustment;
1317 		data = data[offset .. $];
1318 		offsetAdjustment += offset;
1319 	}
1320 
1321 	exif_enforce(read2() == 42);
1322 
1323 	while(data.length) {
1324 		auto nextIfdOffset = read4();
1325 		if(nextIfdOffset == 0)
1326 			return;
1327 		jumpOffset(nextIfdOffset);
1328 
1329 		// reading an ifd now
1330 		auto numberOfIfdEntries = read2();
1331 		foreach(item; 0 .. numberOfIfdEntries) {
1332 			auto tagId = read2();
1333 			auto fieldType = read2();
1334 			auto countOfType = read4();
1335 			auto valueOrOffset = read4();
1336 
1337 			// https://exiftool.org/TagNames/EXIF.html
1338 
1339 			// FIXME we could read a LOT more of this, but for now all i care about is orientation lol
1340 			if(tagId == 0x0112 && fieldType == 3 && countOfType == 1) {
1341 				/+
1342 					valueOrOffset can be:
1343 
1344 					1 = Horizontal (normal)
1345 					2 = Mirror horizontal
1346 					3 = Rotate 180
1347 					4 = Mirror vertical
1348 					5 = Mirror horizontal and rotate 270 CW
1349 					6 = Rotate 90 CW
1350 					7 = Mirror horizontal and rotate 90 CW
1351 					8 = Rotate 270 CW
1352 				+/
1353 
1354 				// it stores the data inline but packed into the first bytes
1355 				// so since this is a 16 bit thing packed to the left, we want to move it
1356 				// down to right slot based on endinanness. woof but meh.
1357 				if(bigEndian) {
1358 					this.orientation = valueOrOffset >> 16;
1359 				} else {
1360 					this.orientation = valueOrOffset;
1361 				}
1362 			}
1363 
1364 			// import std.stdio; writefln("%04x %d %d %d", tagId, fieldType, countOfType, valueOrOffset);
1365 		}
1366 	}
1367     }
1368 
1369     // format: Exif\0\0<tiff file bytes here>
1370     // are those two zero bytes just padding?
1371     /+
1372 	tiff file:
1373 
1374 	II or MM for byte order
1375 	then 16 bit number 42 (0x2a 0x00)
1376 	32 bit number containing byte offset of first IFD (should prolly be 8, saying it starts right after the header)
1377 
1378 	IFD:
1379 		16 bit number of fields
1380 		12-byte entries
1381 		4 byte offset of next ifd (0 if none)
1382 
1383 	IFD entry:
1384 		16 bit tag id
1385 		16 bit field type
1386 			1 = byte
1387 			2 = ascii stringz
1388 			3 = 16 bit ushort
1389 			4 = 32 bit ulong
1390 			5 = rational; numerator then denominator
1391 
1392 			and others, see https://web.archive.org/web/20210108174645/https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFF6.pdf
1393 		32 bit number of values (count of the type)
1394 		32 bit value or offset (must be even number, can point anywhere in file, but if the type is 4 bytes or less it is just packed in here, left-aligned)
1395     +/
1396   }
1397 
1398     /++
1399 	The exif orientation value from the file, if present (0 if it was not present).
1400 
1401 	You do not have to look at this if you leave [autoRotateBasedOnExifOrientation] as the default `true` value.
1402 
1403 	History:
1404 		Added May 6, 2025
1405     +/
1406     public int orientation = 0;
1407 
1408     /++
1409 	If true (the default), the image will have the orientation automatically applied to the pixels before returning.
1410 
1411 	Otherwise, you must see [orientation] to know the intended look.
1412 
1413 	History:
1414 		Added May 7, 2025
1415     +/
1416     public bool autoRotateBasedOnExifOrientation = true;
1417 
1418   // Used to skip unrecognized markers.
1419   void skip_variable_marker () {
1420     uint num_left;
1421 
1422     num_left = get_bits(16);
1423 
1424     if (num_left < 2)
1425       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1426 
1427     num_left -= 2;
1428 
1429     while (num_left)
1430     {
1431       get_bits(8);
1432       num_left--;
1433     }
1434   }
1435 
1436   // Read a define restart interval (DRI) marker.
1437   void read_dri_marker () {
1438     if (get_bits(16) != 4)
1439       stop_decoding(JPGD_BAD_DRI_LENGTH);
1440 
1441     m_restart_interval = get_bits(16);
1442   }
1443 
1444   // Read a start of scan (SOS) marker.
1445   void read_sos_marker () {
1446     uint num_left;
1447     int i, ci, n, c, cc;
1448 
1449     num_left = get_bits(16);
1450 
1451     n = get_bits(8);
1452 
1453     m_comps_in_scan = n;
1454 
1455     num_left -= 3;
1456 
1457     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1458       stop_decoding(JPGD_BAD_SOS_LENGTH);
1459 
1460     for (i = 0; i < n; i++)
1461     {
1462       cc = get_bits(8);
1463       c = get_bits(8);
1464       num_left -= 2;
1465 
1466       for (ci = 0; ci < m_comps_in_frame; ci++)
1467         if (cc == m_comp_ident.ptr[ci])
1468           break;
1469 
1470       if (ci >= m_comps_in_frame)
1471         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1472 
1473       m_comp_list.ptr[i]    = ci;
1474       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1475       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1476     }
1477 
1478     m_spectral_start  = get_bits(8);
1479     m_spectral_end    = get_bits(8);
1480     m_successive_high = get_bits(4);
1481     m_successive_low  = get_bits(4);
1482 
1483     if (!m_progressive_flag)
1484     {
1485       m_spectral_start = 0;
1486       m_spectral_end = 63;
1487     }
1488 
1489     num_left -= 3;
1490 
1491     /* read past whatever is num_left */
1492     while (num_left)
1493     {
1494       get_bits(8);
1495       num_left--;
1496     }
1497   }
1498 
1499   // Finds the next marker.
1500   int next_marker () {
1501     uint c, bytes;
1502 
1503     bytes = 0;
1504 
1505     do
1506     {
1507       do
1508       {
1509         bytes++;
1510         c = get_bits(8);
1511       } while (c != 0xFF);
1512 
1513       do
1514       {
1515         c = get_bits(8);
1516       } while (c == 0xFF);
1517 
1518     } while (c == 0);
1519 
1520     // If bytes > 0 here, there where extra bytes before the marker (not good).
1521 
1522     return c;
1523   }
1524 
1525   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1526   // encountered.
1527   int process_markers (bool allow_restarts = false) {
1528     int c;
1529 
1530     for ( ; ; ) {
1531       c = next_marker();
1532 
1533       switch (c)
1534       {
1535         case M_SOF0:
1536         case M_SOF1:
1537         case M_SOF2:
1538         case M_SOF3:
1539         case M_SOF5:
1540         case M_SOF6:
1541         case M_SOF7:
1542         //case M_JPG:
1543         case M_SOF9:
1544         case M_SOF10:
1545         case M_SOF11:
1546         case M_SOF13:
1547         case M_SOF14:
1548         case M_SOF15:
1549         case M_SOI:
1550         case M_EOI:
1551         case M_SOS:
1552           return c;
1553         case M_DHT:
1554           read_dht_marker();
1555           break;
1556         // No arithmitic support - dumb patents!
1557         case M_DAC:
1558           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1559           break;
1560         case M_DQT:
1561           read_dqt_marker();
1562           break;
1563         case M_DRI:
1564           read_dri_marker();
1565           break;
1566 	case M_APP1: /* likely EXIF data */
1567           read_exif_marker();
1568 
1569 	break;
1570         //case M_APP0:  /* no need to read the JFIF marker */
1571 
1572         case M_RST0:    /* no parameters */
1573         case M_RST1:
1574         case M_RST2:
1575         case M_RST3:
1576         case M_RST4:
1577         case M_RST5:
1578         case M_RST6:
1579         case M_RST7:
1580 		if(allow_restarts)
1581 			continue;
1582 		else
1583 			goto case;
1584         case M_JPG:
1585         case M_TEM:
1586           stop_decoding(JPGD_UNEXPECTED_MARKER);
1587           break;
1588         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1589           skip_variable_marker();
1590           break;
1591       }
1592     }
1593 
1594     assert(0);
1595   }
1596 
1597   // Finds the start of image (SOI) marker.
1598   // This code is rather defensive: it only checks the first 512 bytes to avoid
1599   // false positives.
1600   void locate_soi_marker () {
1601     uint lastchar, thischar;
1602     uint bytesleft;
1603 
1604     lastchar = get_bits(8);
1605 
1606     thischar = get_bits(8);
1607 
1608     /* ok if it's a normal JPEG file without a special header */
1609 
1610     if ((lastchar == 0xFF) && (thischar == M_SOI))
1611       return;
1612 
1613     bytesleft = 4096; //512;
1614 
1615     for ( ; ; )
1616     {
1617       if (--bytesleft == 0)
1618         stop_decoding(JPGD_NOT_JPEG);
1619 
1620       lastchar = thischar;
1621 
1622       thischar = get_bits(8);
1623 
1624       if (lastchar == 0xFF)
1625       {
1626         if (thischar == M_SOI)
1627           break;
1628         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1629           stop_decoding(JPGD_NOT_JPEG);
1630       }
1631     }
1632 
1633     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1634     thischar = (m_bit_buf >> 24) & 0xFF;
1635 
1636     if (thischar != 0xFF)
1637       stop_decoding(JPGD_NOT_JPEG);
1638   }
1639 
1640   // Find a start of frame (SOF) marker.
1641   void locate_sof_marker () {
1642     locate_soi_marker();
1643 
1644     int c = process_markers();
1645 
1646     switch (c)
1647     {
1648       case M_SOF2:
1649         m_progressive_flag = true;
1650         goto case;
1651       case M_SOF0:  /* baseline DCT */
1652       case M_SOF1:  /* extended sequential DCT */
1653         read_sof_marker();
1654         break;
1655       case M_SOF9:  /* Arithmitic coding */
1656         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1657         break;
1658       default:
1659         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1660         break;
1661     }
1662   }
1663 
1664   // Find a start of scan (SOS) marker.
1665   int locate_sos_marker () {
1666     int c;
1667 
1668     c = process_markers();
1669 
1670     if (c == M_EOI)
1671       return false;
1672     else if (c != M_SOS)
1673       stop_decoding(JPGD_UNEXPECTED_MARKER);
1674 
1675     read_sos_marker();
1676 
1677     return true;
1678   }
1679 
1680   // Reset everything to default/uninitialized state.
1681   void initit (JpegStreamReadFunc rfn) {
1682     m_pMem_blocks = null;
1683     m_error_code = JPGD_SUCCESS;
1684     m_ready_flag = false;
1685     m_image_x_size = m_image_y_size = 0;
1686     readfn = rfn;
1687     m_progressive_flag = false;
1688 
1689     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1690     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1691     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1692     memset(m_quant.ptr, 0, m_quant.sizeof);
1693 
1694     m_scan_type = 0;
1695     m_comps_in_frame = 0;
1696 
1697     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1698     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1699     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1700     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1701     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1702     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1703 
1704     m_comps_in_scan = 0;
1705     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1706     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1707     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1708 
1709     m_spectral_start = 0;
1710     m_spectral_end = 0;
1711     m_successive_low = 0;
1712     m_successive_high = 0;
1713     m_max_mcu_x_size = 0;
1714     m_max_mcu_y_size = 0;
1715     m_blocks_per_mcu = 0;
1716     m_max_blocks_per_row = 0;
1717     m_mcus_per_row = 0;
1718     m_mcus_per_col = 0;
1719     m_expanded_blocks_per_component = 0;
1720     m_expanded_blocks_per_mcu = 0;
1721     m_expanded_blocks_per_row = 0;
1722     m_freq_domain_chroma_upsample = false;
1723 
1724     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1725 
1726     m_total_lines_left = 0;
1727     m_mcu_lines_left = 0;
1728     m_real_dest_bytes_per_scan_line = 0;
1729     m_dest_bytes_per_scan_line = 0;
1730     m_dest_bytes_per_pixel = 0;
1731 
1732     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1733 
1734     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1735     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1736     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1737 
1738     m_eob_run = 0;
1739 
1740     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1741 
1742     m_pIn_buf_ofs = m_in_buf.ptr;
1743     m_in_buf_left = 0;
1744     m_eof_flag = false;
1745     m_tem_flag = 0;
1746 
1747     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1748     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1749     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1750 
1751     m_restart_interval = 0;
1752     m_restarts_left    = 0;
1753     m_next_restart_num = 0;
1754 
1755     m_max_mcus_per_row = 0;
1756     m_max_blocks_per_mcu = 0;
1757     m_max_mcus_per_col = 0;
1758 
1759     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1760     m_pMCU_coefficients = null;
1761     m_pSample_buf = null;
1762 
1763     m_total_bytes_read = 0;
1764 
1765     m_pScan_line_0 = null;
1766     m_pScan_line_1 = null;
1767 
1768     // Ready the input buffer.
1769     prep_in_buffer();
1770 
1771     // Prime the bit buffer.
1772     m_bits_left = 16;
1773     m_bit_buf = 0;
1774 
1775     get_bits(16);
1776     get_bits(16);
1777 
1778     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1779       m_mcu_block_max_zag.ptr[i] = 64;
1780   }
1781 
1782   enum SCALEBITS = 16;
1783   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1784   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1785 
1786   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1787   void create_look_ups () {
1788     for (int i = 0; i <= 255; i++)
1789     {
1790       int k = i - 128;
1791       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1792       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1793       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1794       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1795     }
1796   }
1797 
1798   // This method throws back into the stream any bytes that where read
1799   // into the bit buffer during initial marker scanning.
1800   void fix_in_buffer () {
1801     // In case any 0xFF's where pulled into the buffer during marker scanning.
1802     assert((m_bits_left & 7) == 0);
1803 
1804     if (m_bits_left == 16)
1805       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1806 
1807     if (m_bits_left >= 8)
1808       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1809 
1810     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1811     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1812 
1813     m_bits_left = 16;
1814     get_bits_no_markers(16);
1815     get_bits_no_markers(16);
1816   }
1817 
1818   void transform_mcu (int mcu_row) {
1819     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1820     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1821 
1822     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1823     {
1824       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1825       pSrc_ptr += 64;
1826       pDst_ptr += 64;
1827     }
1828   }
1829 
1830   static immutable ubyte[64] s_max_rc = [
1831     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1832     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1833     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1834     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1835   ];
1836 
1837   void transform_mcu_expand (int mcu_row) {
1838     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1839     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1840 
1841     // Y IDCT
1842     int mcu_block;
1843     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1844     {
1845       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1846       pSrc_ptr += 64;
1847       pDst_ptr += 64;
1848     }
1849 
1850     // Chroma IDCT, with upsampling
1851     jpgd_block_t[64] temp_block;
1852 
1853     for (int i = 0; i < 2; i++)
1854     {
1855       DCT_Upsample.Matrix44 P, Q, R, S;
1856 
1857       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1858       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1859 
1860       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1861       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1862       switch (s_max_rc.ptr[max_zag])
1863       {
1864       case 1*16+1:
1865         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1866         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1867         break;
1868       case 1*16+2:
1869         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1870         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1871         break;
1872       case 2*16+2:
1873         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1874         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1875         break;
1876       case 3*16+2:
1877         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1878         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1879         break;
1880       case 3*16+3:
1881         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1882         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1883         break;
1884       case 3*16+4:
1885         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1886         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1887         break;
1888       case 4*16+4:
1889         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1890         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1891         break;
1892       case 5*16+4:
1893         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1894         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1895         break;
1896       case 5*16+5:
1897         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1898         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1899         break;
1900       case 5*16+6:
1901         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1902         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1903         break;
1904       case 6*16+6:
1905         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1906         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1907         break;
1908       case 7*16+6:
1909         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1910         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1911         break;
1912       case 7*16+7:
1913         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1914         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1915         break;
1916       case 7*16+8:
1917         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1918         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1919         break;
1920       case 8*16+8:
1921         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1922         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1923         break;
1924       default:
1925         assert(false);
1926       }
1927 
1928       auto a = DCT_Upsample.Matrix44(P + Q);
1929       P -= Q;
1930       DCT_Upsample.Matrix44* b = &P;
1931       auto c = DCT_Upsample.Matrix44(R + S);
1932       R -= S;
1933       DCT_Upsample.Matrix44* d = &R;
1934 
1935       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1936       idct_4x4(temp_block.ptr, pDst_ptr);
1937       pDst_ptr += 64;
1938 
1939       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1940       idct_4x4(temp_block.ptr, pDst_ptr);
1941       pDst_ptr += 64;
1942 
1943       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1944       idct_4x4(temp_block.ptr, pDst_ptr);
1945       pDst_ptr += 64;
1946 
1947       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1948       idct_4x4(temp_block.ptr, pDst_ptr);
1949       pDst_ptr += 64;
1950 
1951       pSrc_ptr += 64;
1952     }
1953   }
1954 
1955   // Loads and dequantizes the next row of (already decoded) coefficients.
1956   // Progressive images only.
1957   void load_next_row () {
1958     int i;
1959     jpgd_block_t *p;
1960     jpgd_quant_t *q;
1961     int mcu_row, mcu_block, row_block = 0;
1962     int component_num, component_id;
1963     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1964 
1965     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1966 
1967     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1968     {
1969       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1970 
1971       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1972       {
1973         component_id = m_mcu_org.ptr[mcu_block];
1974         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1975 
1976         p = m_pMCU_coefficients + 64 * mcu_block;
1977 
1978         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1979         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1980         p[0] = pDC[0];
1981         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1982 
1983         for (i = 63; i > 0; i--)
1984           if (p[g_ZAG[i]])
1985             break;
1986 
1987         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1988 
1989         for ( ; i >= 0; i--)
1990           if (p[g_ZAG[i]])
1991             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1992 
1993         row_block++;
1994 
1995         if (m_comps_in_scan == 1)
1996           block_x_mcu.ptr[component_id]++;
1997         else
1998         {
1999           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2000           {
2001             block_x_mcu_ofs = 0;
2002 
2003             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2004             {
2005               block_y_mcu_ofs = 0;
2006 
2007               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2008             }
2009           }
2010         }
2011       }
2012 
2013       if (m_freq_domain_chroma_upsample)
2014         transform_mcu_expand(mcu_row);
2015       else
2016         transform_mcu(mcu_row);
2017     }
2018 
2019     if (m_comps_in_scan == 1)
2020       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2021     else
2022     {
2023       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2024       {
2025         component_id = m_comp_list.ptr[component_num];
2026 
2027         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2028       }
2029     }
2030   }
2031 
2032   // Restart interval processing.
2033   void process_restart () {
2034     int i;
2035     int c = 0;
2036 
2037     // Align to a byte boundry
2038     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
2039     //get_bits_no_markers(m_bits_left & 7);
2040 
2041     // Let's scan a little bit to find the marker, but not _too_ far.
2042     // 1536 is a "fudge factor" that determines how much to scan.
2043     for (i = 1536; i > 0; i--)
2044       if (get_char() == 0xFF)
2045         break;
2046 
2047     if (i == 0)
2048       stop_decoding(JPGD_BAD_RESTART_MARKER);
2049 
2050     for ( ; i > 0; i--)
2051       if ((c = get_char()) != 0xFF)
2052         break;
2053 
2054     if (i == 0)
2055       stop_decoding(JPGD_BAD_RESTART_MARKER);
2056 
2057     // Is it the expected marker? If not, something bad happened.
2058     if (c != (m_next_restart_num + M_RST0))
2059       stop_decoding(JPGD_BAD_RESTART_MARKER);
2060 
2061     // Reset each component's DC prediction values.
2062     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
2063 
2064     m_eob_run = 0;
2065 
2066     m_restarts_left = m_restart_interval;
2067 
2068     m_next_restart_num = (m_next_restart_num + 1) & 7;
2069 
2070     // Get the bit buffer going again...
2071 
2072     m_bits_left = 16;
2073     get_bits_no_markers(16);
2074     get_bits_no_markers(16);
2075   }
2076 
2077   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
2078 
2079   // Decodes and dequantizes the next row of coefficients.
2080   void decode_next_row () {
2081     int row_block = 0;
2082 
2083     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2084     {
2085       if ((m_restart_interval) && (m_restarts_left == 0))
2086         process_restart();
2087 
2088       jpgd_block_t* p = m_pMCU_coefficients;
2089       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
2090       {
2091         int component_id = m_mcu_org.ptr[mcu_block];
2092         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
2093 
2094         int r, s;
2095         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
2096         s = JPGD_HUFF_EXTEND(r, s);
2097 
2098         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
2099 
2100         p[0] = cast(jpgd_block_t)(s * q[0]);
2101 
2102         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
2103 
2104         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
2105 
2106         int k;
2107         for (k = 1; k < 64; k++)
2108         {
2109           int extra_bits;
2110           s = huff_decode(pH, extra_bits);
2111 
2112           r = s >> 4;
2113           s &= 15;
2114 
2115           if (s)
2116           {
2117             if (r)
2118             {
2119               if ((k + r) > 63)
2120                 stop_decoding(JPGD_DECODE_ERROR);
2121 
2122               if (k < prev_num_set)
2123               {
2124                 int n = JPGD_MIN(r, prev_num_set - k);
2125                 int kt = k;
2126                 while (n--)
2127                   p[g_ZAG[kt++]] = 0;
2128               }
2129 
2130               k += r;
2131             }
2132 
2133             s = JPGD_HUFF_EXTEND(extra_bits, s);
2134 
2135             assert(k < 64);
2136 
2137             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
2138           }
2139           else
2140           {
2141             if (r == 15)
2142             {
2143               if ((k + 16) > 64)
2144                 stop_decoding(JPGD_DECODE_ERROR);
2145 
2146               if (k < prev_num_set)
2147               {
2148                 int n = JPGD_MIN(16, prev_num_set - k);
2149                 int kt = k;
2150                 while (n--)
2151                 {
2152                   assert(kt <= 63);
2153                   p[g_ZAG[kt++]] = 0;
2154                 }
2155               }
2156 
2157               k += 16 - 1; // - 1 because the loop counter is k
2158               assert(p[g_ZAG[k]] == 0);
2159             }
2160             else
2161               break;
2162           }
2163         }
2164 
2165         if (k < prev_num_set)
2166         {
2167           int kt = k;
2168           while (kt < prev_num_set)
2169             p[g_ZAG[kt++]] = 0;
2170         }
2171 
2172         m_mcu_block_max_zag.ptr[mcu_block] = k;
2173 
2174         row_block++;
2175       }
2176 
2177       if (m_freq_domain_chroma_upsample)
2178         transform_mcu_expand(mcu_row);
2179       else
2180         transform_mcu(mcu_row);
2181 
2182       m_restarts_left--;
2183     }
2184   }
2185 
2186   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
2187   void H1V1Convert () {
2188     int row = m_max_mcu_y_size - m_mcu_lines_left;
2189     ubyte *d = m_pScan_line_0;
2190     ubyte *s = m_pSample_buf + row * 8;
2191 
2192     for (int i = m_max_mcus_per_row; i > 0; i--)
2193     {
2194       for (int j = 0; j < 8; j++)
2195       {
2196         int y = s[j];
2197         int cb = s[64+j];
2198         int cr = s[128+j];
2199 
2200         d[0] = clamp(y + m_crr.ptr[cr]);
2201         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2202         d[2] = clamp(y + m_cbb.ptr[cb]);
2203         d[3] = 255;
2204 
2205         d += 4;
2206       }
2207 
2208       s += 64*3;
2209     }
2210   }
2211 
2212   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2213   void H2V1Convert () {
2214     int row = m_max_mcu_y_size - m_mcu_lines_left;
2215     ubyte *d0 = m_pScan_line_0;
2216     ubyte *y = m_pSample_buf + row * 8;
2217     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2218 
2219     for (int i = m_max_mcus_per_row; i > 0; i--)
2220     {
2221       for (int l = 0; l < 2; l++)
2222       {
2223         for (int j = 0; j < 4; j++)
2224         {
2225           int cb = c[0];
2226           int cr = c[64];
2227 
2228           int rc = m_crr.ptr[cr];
2229           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2230           int bc = m_cbb.ptr[cb];
2231 
2232           int yy = y[j<<1];
2233           d0[0] = clamp(yy+rc);
2234           d0[1] = clamp(yy+gc);
2235           d0[2] = clamp(yy+bc);
2236           d0[3] = 255;
2237 
2238           yy = y[(j<<1)+1];
2239           d0[4] = clamp(yy+rc);
2240           d0[5] = clamp(yy+gc);
2241           d0[6] = clamp(yy+bc);
2242           d0[7] = 255;
2243 
2244           d0 += 8;
2245 
2246           c++;
2247         }
2248         y += 64;
2249       }
2250 
2251       y += 64*4 - 64*2;
2252       c += 64*4 - 8;
2253     }
2254   }
2255 
2256   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2257   void H1V2Convert () {
2258     int row = m_max_mcu_y_size - m_mcu_lines_left;
2259     ubyte *d0 = m_pScan_line_0;
2260     ubyte *d1 = m_pScan_line_1;
2261     ubyte *y;
2262     ubyte *c;
2263 
2264     if (row < 8)
2265       y = m_pSample_buf + row * 8;
2266     else
2267       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2268 
2269     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2270 
2271     for (int i = m_max_mcus_per_row; i > 0; i--)
2272     {
2273       for (int j = 0; j < 8; j++)
2274       {
2275         int cb = c[0+j];
2276         int cr = c[64+j];
2277 
2278         int rc = m_crr.ptr[cr];
2279         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2280         int bc = m_cbb.ptr[cb];
2281 
2282         int yy = y[j];
2283         d0[0] = clamp(yy+rc);
2284         d0[1] = clamp(yy+gc);
2285         d0[2] = clamp(yy+bc);
2286         d0[3] = 255;
2287 
2288         yy = y[8+j];
2289         d1[0] = clamp(yy+rc);
2290         d1[1] = clamp(yy+gc);
2291         d1[2] = clamp(yy+bc);
2292         d1[3] = 255;
2293 
2294         d0 += 4;
2295         d1 += 4;
2296       }
2297 
2298       y += 64*4;
2299       c += 64*4;
2300     }
2301   }
2302 
2303   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2304   void H2V2Convert () {
2305     int row = m_max_mcu_y_size - m_mcu_lines_left;
2306     ubyte *d0 = m_pScan_line_0;
2307     ubyte *d1 = m_pScan_line_1;
2308     ubyte *y;
2309     ubyte *c;
2310 
2311     if (row < 8)
2312       y = m_pSample_buf + row * 8;
2313     else
2314       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2315 
2316     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2317 
2318     for (int i = m_max_mcus_per_row; i > 0; i--)
2319     {
2320       for (int l = 0; l < 2; l++)
2321       {
2322         for (int j = 0; j < 8; j += 2)
2323         {
2324           int cb = c[0];
2325           int cr = c[64];
2326 
2327           int rc = m_crr.ptr[cr];
2328           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2329           int bc = m_cbb.ptr[cb];
2330 
2331           int yy = y[j];
2332           d0[0] = clamp(yy+rc);
2333           d0[1] = clamp(yy+gc);
2334           d0[2] = clamp(yy+bc);
2335           d0[3] = 255;
2336 
2337           yy = y[j+1];
2338           d0[4] = clamp(yy+rc);
2339           d0[5] = clamp(yy+gc);
2340           d0[6] = clamp(yy+bc);
2341           d0[7] = 255;
2342 
2343           yy = y[j+8];
2344           d1[0] = clamp(yy+rc);
2345           d1[1] = clamp(yy+gc);
2346           d1[2] = clamp(yy+bc);
2347           d1[3] = 255;
2348 
2349           yy = y[j+8+1];
2350           d1[4] = clamp(yy+rc);
2351           d1[5] = clamp(yy+gc);
2352           d1[6] = clamp(yy+bc);
2353           d1[7] = 255;
2354 
2355           d0 += 8;
2356           d1 += 8;
2357 
2358           c++;
2359         }
2360         y += 64;
2361       }
2362 
2363       y += 64*6 - 64*2;
2364       c += 64*6 - 8;
2365     }
2366   }
2367 
2368   // Y (1 block per MCU) to 8-bit grayscale
2369   void gray_convert () {
2370     int row = m_max_mcu_y_size - m_mcu_lines_left;
2371     ubyte *d = m_pScan_line_0;
2372     ubyte *s = m_pSample_buf + row * 8;
2373 
2374     for (int i = m_max_mcus_per_row; i > 0; i--)
2375     {
2376       *cast(uint*)d = *cast(uint*)s;
2377       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2378 
2379       s += 64;
2380       d += 8;
2381     }
2382   }
2383 
2384   void expanded_convert () {
2385     int row = m_max_mcu_y_size - m_mcu_lines_left;
2386 
2387     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2388 
2389     ubyte* d = m_pScan_line_0;
2390 
2391     for (int i = m_max_mcus_per_row; i > 0; i--)
2392     {
2393       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2394       {
2395         immutable int Y_ofs = k * 8;
2396         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2397         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2398         for (int j = 0; j < 8; j++)
2399         {
2400           int y = Py[Y_ofs + j];
2401           int cb = Py[Cb_ofs + j];
2402           int cr = Py[Cr_ofs + j];
2403 
2404           d[0] = clamp(y + m_crr.ptr[cr]);
2405           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2406           d[2] = clamp(y + m_cbb.ptr[cb]);
2407           d[3] = 255;
2408 
2409           d += 4;
2410         }
2411       }
2412 
2413       Py += 64 * m_expanded_blocks_per_mcu;
2414     }
2415   }
2416 
2417   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2418   void find_eoi () {
2419     if (!m_progressive_flag)
2420     {
2421       // Attempt to read the EOI marker.
2422       //get_bits_no_markers(m_bits_left & 7);
2423 
2424       // Prime the bit buffer
2425       m_bits_left = 16;
2426       get_bits(16);
2427       get_bits(16);
2428 
2429       // The next marker _should_ be EOI
2430       process_markers(true); // but restarts are allowed as we can harmlessly skip them at the end of the stream
2431     }
2432 
2433     m_total_bytes_read -= m_in_buf_left;
2434   }
2435 
2436   // Creates the tables needed for efficient Huffman decoding.
2437   void make_huff_table (int index, huff_tables *pH) {
2438     int p, i, l, si;
2439     ubyte[257] huffsize;
2440     uint[257] huffcode;
2441     uint code;
2442     uint subtree;
2443     int code_size;
2444     int lastp;
2445     int nextfreeentry;
2446     int currententry;
2447 
2448     pH.ac_table = m_huff_ac.ptr[index] != 0;
2449 
2450     p = 0;
2451 
2452     for (l = 1; l <= 16; l++)
2453     {
2454       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2455         huffsize.ptr[p++] = cast(ubyte)(l);
2456     }
2457 
2458     huffsize.ptr[p] = 0;
2459 
2460     lastp = p;
2461 
2462     code = 0;
2463     si = huffsize.ptr[0];
2464     p = 0;
2465 
2466     while (huffsize.ptr[p])
2467     {
2468       while (huffsize.ptr[p] == si)
2469       {
2470         huffcode.ptr[p++] = code;
2471         code++;
2472       }
2473 
2474       code <<= 1;
2475       si++;
2476     }
2477 
2478     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2479     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2480     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2481     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2482 
2483     nextfreeentry = -1;
2484 
2485     p = 0;
2486 
2487     while (p < lastp)
2488     {
2489       i = m_huff_val.ptr[index][p];
2490       code = huffcode.ptr[p];
2491       code_size = huffsize.ptr[p];
2492 
2493       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2494 
2495       if (code_size <= 8)
2496       {
2497         code <<= (8 - code_size);
2498 
2499         for (l = 1 << (8 - code_size); l > 0; l--)
2500         {
2501           assert(i < 256);
2502 
2503           pH.look_up.ptr[code] = i;
2504 
2505           bool has_extrabits = false;
2506           int extra_bits = 0;
2507           int num_extra_bits = i & 15;
2508 
2509           int bits_to_fetch = code_size;
2510           if (num_extra_bits)
2511           {
2512             int total_codesize = code_size + num_extra_bits;
2513             if (total_codesize <= 8)
2514             {
2515               has_extrabits = true;
2516               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2517               assert(extra_bits <= 0x7FFF);
2518               bits_to_fetch += num_extra_bits;
2519             }
2520           }
2521 
2522           if (!has_extrabits)
2523             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2524           else
2525             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2526 
2527           code++;
2528         }
2529       }
2530       else
2531       {
2532         subtree = (code >> (code_size - 8)) & 0xFF;
2533 
2534         currententry = pH.look_up.ptr[subtree];
2535 
2536         if (currententry == 0)
2537         {
2538           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2539           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2540 
2541           nextfreeentry -= 2;
2542         }
2543 
2544         code <<= (16 - (code_size - 8));
2545 
2546         for (l = code_size; l > 9; l--)
2547         {
2548           if ((code & 0x8000) == 0)
2549             currententry--;
2550 
2551           if (pH.tree.ptr[-currententry - 1] == 0)
2552           {
2553             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2554 
2555             currententry = nextfreeentry;
2556 
2557             nextfreeentry -= 2;
2558           }
2559           else
2560             currententry = pH.tree.ptr[-currententry - 1];
2561 
2562           code <<= 1;
2563         }
2564 
2565         if ((code & 0x8000) == 0)
2566           currententry--;
2567 
2568         pH.tree.ptr[-currententry - 1] = i;
2569       }
2570 
2571       p++;
2572     }
2573   }
2574 
2575   // Verifies the quantization tables needed for this scan are available.
2576   void check_quant_tables () {
2577     for (int i = 0; i < m_comps_in_scan; i++)
2578       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2579         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2580   }
2581 
2582   // Verifies that all the Huffman tables needed for this scan are available.
2583   void check_huff_tables () {
2584     for (int i = 0; i < m_comps_in_scan; i++)
2585     {
2586       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2587         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2588 
2589       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2590         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2591     }
2592 
2593     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2594       if (m_huff_num.ptr[i])
2595       {
2596         if (!m_pHuff_tabs.ptr[i])
2597           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2598 
2599         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2600       }
2601   }
2602 
2603   // Determines the component order inside each MCU.
2604   // Also calcs how many MCU's are on each row, etc.
2605   void calc_mcu_block_order () {
2606     int component_num, component_id;
2607     int max_h_samp = 0, max_v_samp = 0;
2608 
2609     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2610     {
2611       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2612         max_h_samp = m_comp_h_samp.ptr[component_id];
2613 
2614       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2615         max_v_samp = m_comp_v_samp.ptr[component_id];
2616     }
2617 
2618     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2619     {
2620       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2621       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2622     }
2623 
2624     if (m_comps_in_scan == 1)
2625     {
2626       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2627       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2628     }
2629     else
2630     {
2631       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2632       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2633     }
2634 
2635     if (m_comps_in_scan == 1)
2636     {
2637       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2638 
2639       m_blocks_per_mcu = 1;
2640     }
2641     else
2642     {
2643       m_blocks_per_mcu = 0;
2644 
2645       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2646       {
2647         int num_blocks;
2648 
2649         component_id = m_comp_list.ptr[component_num];
2650 
2651         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2652 
2653         while (num_blocks--)
2654           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2655       }
2656     }
2657   }
2658 
2659   // Starts a new scan.
2660   int init_scan () {
2661     if (!locate_sos_marker())
2662       return false;
2663 
2664     calc_mcu_block_order();
2665 
2666     check_huff_tables();
2667 
2668     check_quant_tables();
2669 
2670     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2671 
2672     m_eob_run = 0;
2673 
2674     if (m_restart_interval)
2675     {
2676       m_restarts_left = m_restart_interval;
2677       m_next_restart_num = 0;
2678     }
2679 
2680     fix_in_buffer();
2681 
2682     return true;
2683   }
2684 
2685   // Starts a frame. Determines if the number of components or sampling factors
2686   // are supported.
2687   void init_frame () {
2688     int i;
2689 
2690     if (m_comps_in_frame == 1)
2691     {
2692       version(jpegd_test) {{ import std.stdio; stderr.writeln("m_comp_h_samp=", m_comp_h_samp.ptr[0], "; m_comp_v_samp=", m_comp_v_samp.ptr[0]); }}
2693 
2694       //if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2695       //  stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2696 
2697       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2698       {
2699         m_scan_type = JPGD_GRAYSCALE;
2700         m_max_blocks_per_mcu = 1;
2701         m_max_mcu_x_size = 8;
2702         m_max_mcu_y_size = 8;
2703       }
2704       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2705       {
2706         //k8: i added this, and i absolutely don't know what it means; but it decoded two sample images i found
2707         m_scan_type = JPGD_GRAYSCALE;
2708         m_max_blocks_per_mcu = 4;
2709         m_max_mcu_x_size = 8;
2710         m_max_mcu_y_size = 8;
2711       }
2712       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2713       {
2714       	// adr added this. idk if it is right seems wrong since it the same as above but..... meh ship it.
2715         m_scan_type = JPGD_GRAYSCALE;
2716         m_max_blocks_per_mcu = 4;
2717         m_max_mcu_x_size = 8;
2718         m_max_mcu_y_size = 8;
2719       }
2720       else {
2721       // code -231 brings us here
2722       //import std.conv;
2723       //assert(0, to!string(m_comp_h_samp) ~ to!string(m_comp_v_samp));
2724         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2725       }
2726     }
2727     else if (m_comps_in_frame == 3)
2728     {
2729       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2730            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2731         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2732 
2733       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2734       {
2735         m_scan_type = JPGD_YH1V1;
2736 
2737         m_max_blocks_per_mcu = 3;
2738         m_max_mcu_x_size = 8;
2739         m_max_mcu_y_size = 8;
2740       }
2741       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2742       {
2743         m_scan_type = JPGD_YH2V1;
2744         m_max_blocks_per_mcu = 4;
2745         m_max_mcu_x_size = 16;
2746         m_max_mcu_y_size = 8;
2747       }
2748       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2749       {
2750         m_scan_type = JPGD_YH1V2;
2751         m_max_blocks_per_mcu = 4;
2752         m_max_mcu_x_size = 8;
2753         m_max_mcu_y_size = 16;
2754       }
2755       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2756       {
2757         m_scan_type = JPGD_YH2V2;
2758         m_max_blocks_per_mcu = 6;
2759         m_max_mcu_x_size = 16;
2760         m_max_mcu_y_size = 16;
2761       }
2762       else
2763         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2764     }
2765     else
2766       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2767 
2768     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2769     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2770 
2771     // These values are for the *destination* pixels: after conversion.
2772     if (m_scan_type == JPGD_GRAYSCALE)
2773       m_dest_bytes_per_pixel = 1;
2774     else
2775       m_dest_bytes_per_pixel = 4;
2776 
2777     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2778 
2779     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2780 
2781     // Initialize two scan line buffers.
2782     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2783     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2784       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2785 
2786     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2787 
2788     // Should never happen
2789     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2790       stop_decoding(JPGD_ASSERTION_ERROR);
2791 
2792     // Allocate the coefficient buffer, enough for one MCU
2793     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2794 
2795     for (i = 0; i < m_max_blocks_per_mcu; i++)
2796       m_mcu_block_max_zag.ptr[i] = 64;
2797 
2798     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2799     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2800     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2801     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2802     m_freq_domain_chroma_upsample = false;
2803     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2804       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2805     }
2806 
2807     if (m_freq_domain_chroma_upsample)
2808       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2809     else
2810       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2811 
2812     m_total_lines_left = m_image_y_size;
2813 
2814     m_mcu_lines_left = 0;
2815 
2816     create_look_ups();
2817   }
2818 
2819   // The coeff_buf series of methods originally stored the coefficients
2820   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2821   // was used to make this process more efficient. Now, we can store the entire
2822   // thing in RAM.
2823   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2824     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2825 
2826     cb.block_num_x = block_num_x;
2827     cb.block_num_y = block_num_y;
2828     cb.block_len_x = block_len_x;
2829     cb.block_len_y = block_len_y;
2830     cb.block_size = cast(int)((block_len_x * block_len_y) * jpgd_block_t.sizeof);
2831     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2832     return cb;
2833   }
2834 
2835   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2836     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2837     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2838   }
2839 
2840   // The following methods decode the various types of m_blocks encountered
2841   // in progressively encoded images.
2842   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2843     int s, r;
2844     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2845 
2846     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2847     {
2848       r = pD.get_bits_no_markers(s);
2849       s = JPGD_HUFF_EXTEND(r, s);
2850     }
2851 
2852     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2853 
2854     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2855   }
2856 
2857   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2858     if (pD.get_bits_no_markers(1))
2859     {
2860       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2861 
2862       p[0] |= (1 << pD.m_successive_low);
2863     }
2864   }
2865 
2866   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2867     int k, s, r;
2868 
2869     if (pD.m_eob_run)
2870     {
2871       pD.m_eob_run--;
2872       return;
2873     }
2874 
2875     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2876 
2877     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2878     {
2879       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2880 
2881       r = s >> 4;
2882       s &= 15;
2883 
2884       if (s)
2885       {
2886         if ((k += r) > 63)
2887           pD.stop_decoding(JPGD_DECODE_ERROR);
2888 
2889         r = pD.get_bits_no_markers(s);
2890         s = JPGD_HUFF_EXTEND(r, s);
2891 
2892         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2893       }
2894       else
2895       {
2896         if (r == 15)
2897         {
2898           if ((k += 15) > 63)
2899             pD.stop_decoding(JPGD_DECODE_ERROR);
2900         }
2901         else
2902         {
2903           pD.m_eob_run = 1 << r;
2904 
2905           if (r)
2906             pD.m_eob_run += pD.get_bits_no_markers(r);
2907 
2908           pD.m_eob_run--;
2909 
2910           break;
2911         }
2912       }
2913     }
2914   }
2915 
2916   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2917     int s, k, r;
2918     int p1 = 1 << pD.m_successive_low;
2919     int m1 = (-1) << pD.m_successive_low;
2920     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2921 
2922     assert(pD.m_spectral_end <= 63);
2923 
2924     k = pD.m_spectral_start;
2925 
2926     if (pD.m_eob_run == 0)
2927     {
2928       for ( ; k <= pD.m_spectral_end; k++)
2929       {
2930         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2931 
2932         r = s >> 4;
2933         s &= 15;
2934 
2935         if (s)
2936         {
2937           if (s != 1)
2938             pD.stop_decoding(JPGD_DECODE_ERROR);
2939 
2940           if (pD.get_bits_no_markers(1))
2941             s = p1;
2942           else
2943             s = m1;
2944         }
2945         else
2946         {
2947           if (r != 15)
2948           {
2949             pD.m_eob_run = 1 << r;
2950 
2951             if (r)
2952               pD.m_eob_run += pD.get_bits_no_markers(r);
2953 
2954             break;
2955           }
2956         }
2957 
2958         do
2959         {
2960           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2961 
2962           if (*this_coef != 0)
2963           {
2964             if (pD.get_bits_no_markers(1))
2965             {
2966               if ((*this_coef & p1) == 0)
2967               {
2968                 if (*this_coef >= 0)
2969                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2970                 else
2971                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2972               }
2973             }
2974           }
2975           else
2976           {
2977             if (--r < 0)
2978               break;
2979           }
2980 
2981           k++;
2982 
2983         } while (k <= pD.m_spectral_end);
2984 
2985         if ((s) && (k < 64))
2986         {
2987           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2988         }
2989       }
2990     }
2991 
2992     if (pD.m_eob_run > 0)
2993     {
2994       for ( ; k <= pD.m_spectral_end; k++)
2995       {
2996         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2997 
2998         if (*this_coef != 0)
2999         {
3000           if (pD.get_bits_no_markers(1))
3001           {
3002             if ((*this_coef & p1) == 0)
3003             {
3004               if (*this_coef >= 0)
3005                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
3006               else
3007                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
3008             }
3009           }
3010         }
3011       }
3012 
3013       pD.m_eob_run--;
3014     }
3015   }
3016 
3017   // Decode a scan in a progressively encoded image.
3018   void decode_scan (pDecode_block_func decode_block_func) {
3019     int mcu_row, mcu_col, mcu_block;
3020     int[JPGD_MAX_COMPONENTS] block_x_mcu;
3021     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
3022 
3023     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
3024 
3025     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
3026     {
3027       int component_num, component_id;
3028 
3029       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
3030 
3031       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
3032       {
3033         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
3034 
3035         if ((m_restart_interval) && (m_restarts_left == 0))
3036           process_restart();
3037 
3038         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
3039         {
3040           component_id = m_mcu_org.ptr[mcu_block];
3041 
3042           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
3043 
3044           if (m_comps_in_scan == 1)
3045             block_x_mcu.ptr[component_id]++;
3046           else
3047           {
3048             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
3049             {
3050               block_x_mcu_ofs = 0;
3051 
3052               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
3053               {
3054                 block_y_mcu_ofs = 0;
3055                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
3056               }
3057             }
3058           }
3059         }
3060 
3061         m_restarts_left--;
3062       }
3063 
3064       if (m_comps_in_scan == 1)
3065         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
3066       else
3067       {
3068         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
3069         {
3070           component_id = m_comp_list.ptr[component_num];
3071           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
3072         }
3073       }
3074     }
3075   }
3076 
3077   // Decode a progressively encoded image.
3078   void init_progressive () {
3079     int i;
3080 
3081     if (m_comps_in_frame == 4)
3082       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
3083 
3084     // Allocate the coefficient buffers.
3085     for (i = 0; i < m_comps_in_frame; i++)
3086     {
3087       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
3088       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
3089     }
3090 
3091     for ( ; ; )
3092     {
3093       int dc_only_scan, refinement_scan;
3094       pDecode_block_func decode_block_func;
3095 
3096       if (!init_scan())
3097         break;
3098 
3099       dc_only_scan = (m_spectral_start == 0);
3100       refinement_scan = (m_successive_high != 0);
3101 
3102       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
3103         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3104 
3105       if (dc_only_scan)
3106       {
3107         if (m_spectral_end)
3108           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3109       }
3110       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
3111         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3112 
3113       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
3114         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
3115 
3116       if (dc_only_scan)
3117       {
3118         if (refinement_scan)
3119           decode_block_func = &decode_block_dc_refine;
3120         else
3121           decode_block_func = &decode_block_dc_first;
3122       }
3123       else
3124       {
3125         if (refinement_scan)
3126           decode_block_func = &decode_block_ac_refine;
3127         else
3128           decode_block_func = &decode_block_ac_first;
3129       }
3130 
3131       decode_scan(decode_block_func);
3132 
3133       m_bits_left = 16;
3134       get_bits(16);
3135       get_bits(16);
3136     }
3137 
3138     m_comps_in_scan = m_comps_in_frame;
3139 
3140     for (i = 0; i < m_comps_in_frame; i++)
3141       m_comp_list.ptr[i] = i;
3142 
3143     calc_mcu_block_order();
3144   }
3145 
3146   void init_sequential () {
3147     if (!init_scan())
3148       stop_decoding(JPGD_UNEXPECTED_MARKER);
3149   }
3150 
3151   void decode_start () {
3152     init_frame();
3153 
3154     if (m_progressive_flag)
3155       init_progressive();
3156     else
3157       init_sequential();
3158   }
3159 
3160   void decode_init (JpegStreamReadFunc rfn) {
3161     initit(rfn);
3162     locate_sof_marker();
3163   }
3164 }
3165 
3166 
3167 // ////////////////////////////////////////////////////////////////////////// //
3168 /// read JPEG image header, determine dimensions and number of components.
3169 /// return `false` if image is not JPEG (i hope).
3170 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
3171   if (rfn is null) return false;
3172   auto decoder = jpeg_decoder(rfn);
3173   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3174   if (decoder.error_code != JPGD_SUCCESS) return false;
3175   width = decoder.width;
3176   height = decoder.height;
3177   actual_comps = decoder.num_components;
3178   return true;
3179 }
3180 
3181 
3182 // ////////////////////////////////////////////////////////////////////////// //
3183 /// read JPEG image header, determine dimensions and number of components.
3184 /// return `false` if image is not JPEG (i hope).
3185 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
3186   import core.stdc.stdio;
3187 
3188   FILE* m_pFile;
3189   bool m_eof_flag, m_error_flag;
3190 
3191   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3192   if (filename.length < 512) {
3193     char[513] buffer;
3194     //import core.stdc.stdlib : alloca;
3195     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3196     tfn[0..filename.length] = filename[];
3197     tfn[filename.length] = 0;
3198     m_pFile = fopen(tfn.ptr, "rb");
3199   } else {
3200     import core.stdc.stdlib : malloc, free;
3201     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3202     if (tfn !is null) {
3203       scope(exit) free(tfn.ptr);
3204       m_pFile = fopen(tfn.ptr, "rb");
3205     }
3206   }
3207   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3208   scope(exit) if (m_pFile) fclose(m_pFile);
3209 
3210   return detect_jpeg_image_from_stream(
3211     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3212       if (m_pFile is null) return -1;
3213       if (m_eof_flag) {
3214         *pEOF_flag = true;
3215         return 0;
3216       }
3217       if (m_error_flag) return -1;
3218       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3219       if (bytes_read < max_bytes_to_read) {
3220         if (ferror(m_pFile)) {
3221           m_error_flag = true;
3222           return -1;
3223         }
3224         m_eof_flag = true;
3225         *pEOF_flag = true;
3226       }
3227       return bytes_read;
3228     },
3229     width, height, actual_comps);
3230 }
3231 
3232 
3233 // ////////////////////////////////////////////////////////////////////////// //
3234 /// read JPEG image header, determine dimensions and number of components.
3235 /// return `false` if image is not JPEG (i hope).
3236 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3237   size_t bufpos;
3238   return detect_jpeg_image_from_stream(
3239     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3240       import core.stdc.string : memcpy;
3241       if (bufpos >= buf.length) {
3242         *pEOF_flag = true;
3243         return 0;
3244       }
3245       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3246       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3247       bufpos += max_bytes_to_read;
3248       return max_bytes_to_read;
3249     },
3250     width, height, actual_comps);
3251 }
3252 
3253 
3254 // ////////////////////////////////////////////////////////////////////////// //
3255 /// decompress JPEG image, what else?
3256 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3257 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3258   import core.stdc.string : memcpy;
3259 
3260   //actual_comps = 0;
3261   if (rfn is null) return null;
3262   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3263 
3264   auto decoder = jpeg_decoder(rfn);
3265   if (decoder.error_code != JPGD_SUCCESS) return null;
3266   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3267 
3268   immutable int image_width = decoder.width;
3269   immutable int image_height = decoder.height;
3270   width = image_width;
3271   height = image_height;
3272   actual_comps = decoder.num_components;
3273   if (req_comps < 0) req_comps = decoder.num_components;
3274 
3275   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3276 
3277   immutable int dst_bpl = image_width*req_comps;
3278 
3279   static if (useMalloc) {
3280     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3281     if (pImage_data is null) return null;
3282     auto idata = pImage_data[0..dst_bpl*image_height];
3283   } else {
3284     auto idata = new ubyte[](dst_bpl*image_height);
3285     auto pImage_data = idata.ptr;
3286   }
3287 
3288   scope(failure) {
3289     static if (useMalloc) {
3290       jpgd_free(pImage_data);
3291     } else {
3292       import core.memory : GC;
3293       GC.free(idata.ptr);
3294       idata = null;
3295     }
3296   }
3297 
3298   for (int y = 0; y < image_height; ++y) {
3299     const(ubyte)* pScan_line;
3300     uint scan_line_len;
3301     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3302       static if (useMalloc) {
3303         jpgd_free(pImage_data);
3304       } else {
3305         import core.memory : GC;
3306         GC.free(idata.ptr);
3307         idata = null;
3308       }
3309       return null;
3310     }
3311 
3312     ubyte* pDst = pImage_data+y*dst_bpl;
3313 
3314     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3315       memcpy(pDst, pScan_line, dst_bpl);
3316     } else if (decoder.num_components == 1) {
3317       if (req_comps == 3) {
3318         for (int x = 0; x < image_width; ++x) {
3319           ubyte luma = pScan_line[x];
3320           pDst[0] = luma;
3321           pDst[1] = luma;
3322           pDst[2] = luma;
3323           pDst += 3;
3324         }
3325       } else {
3326         for (int x = 0; x < image_width; ++x) {
3327           ubyte luma = pScan_line[x];
3328           pDst[0] = luma;
3329           pDst[1] = luma;
3330           pDst[2] = luma;
3331           pDst[3] = 255;
3332           pDst += 4;
3333         }
3334       }
3335     } else if (decoder.num_components == 3) {
3336       if (req_comps == 1) {
3337         immutable int YR = 19595, YG = 38470, YB = 7471;
3338         for (int x = 0; x < image_width; ++x) {
3339           int r = pScan_line[x*4+0];
3340           int g = pScan_line[x*4+1];
3341           int b = pScan_line[x*4+2];
3342           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3343         }
3344       } else {
3345         for (int x = 0; x < image_width; ++x) {
3346           pDst[0] = pScan_line[x*4+0];
3347           pDst[1] = pScan_line[x*4+1];
3348           pDst[2] = pScan_line[x*4+2];
3349           pDst += 3;
3350         }
3351       }
3352     }
3353   }
3354 
3355   return idata;
3356 }
3357 
3358 
3359 // ////////////////////////////////////////////////////////////////////////// //
3360 /// decompress JPEG image from disk file.
3361 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3362 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3363   import core.stdc.stdio;
3364 
3365   FILE* m_pFile;
3366   bool m_eof_flag, m_error_flag;
3367 
3368   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3369   if (filename.length < 512) {
3370 	char[513] buffer;
3371     //import core.stdc.stdlib : alloca;
3372     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3373     tfn[0..filename.length] = filename[];
3374     tfn[filename.length] = 0;
3375     m_pFile = fopen(tfn.ptr, "rb");
3376   } else {
3377     import core.stdc.stdlib : malloc, free;
3378     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3379     if (tfn !is null) {
3380       scope(exit) free(tfn.ptr);
3381       m_pFile = fopen(tfn.ptr, "rb");
3382     }
3383   }
3384   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3385   scope(exit) if (m_pFile) fclose(m_pFile);
3386 
3387   return decompress_jpeg_image_from_stream!useMalloc(
3388     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3389       if (m_pFile is null) return -1;
3390       if (m_eof_flag) {
3391         *pEOF_flag = true;
3392         return 0;
3393       }
3394       if (m_error_flag) return -1;
3395       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3396       if (bytes_read < max_bytes_to_read) {
3397         if (ferror(m_pFile)) {
3398           m_error_flag = true;
3399           return -1;
3400         }
3401         m_eof_flag = true;
3402         *pEOF_flag = true;
3403       }
3404       return bytes_read;
3405     },
3406     width, height, actual_comps, req_comps);
3407 }
3408 
3409 
3410 // ////////////////////////////////////////////////////////////////////////// //
3411 /// decompress JPEG image from memory buffer.
3412 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3413 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3414   size_t bufpos;
3415   return decompress_jpeg_image_from_stream!useMalloc(
3416     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3417       import core.stdc.string : memcpy;
3418       if (bufpos >= buf.length) {
3419         *pEOF_flag = true;
3420         return 0;
3421       }
3422       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3423       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3424       bufpos += max_bytes_to_read;
3425       return max_bytes_to_read;
3426     },
3427     width, height, actual_comps, req_comps);
3428 }
3429 
3430 
3431 // ////////////////////////////////////////////////////////////////////////// //
3432 // if we have access "iv.vfs", add some handy API
3433 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3434 
3435 static if (JpegHasIVVFS) {
3436 import iv.vfs;
3437 
3438 // ////////////////////////////////////////////////////////////////////////// //
3439 /// decompress JPEG image from disk file.
3440 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3441 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3442   return decompress_jpeg_image_from_stream!useMalloc(
3443     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3444       if (!fl.isOpen) return -1;
3445       if (fl.eof) {
3446         *pEOF_flag = true;
3447         return 0;
3448       }
3449       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3450       if (fl.eof) *pEOF_flag = true;
3451       return cast(int)rd.length;
3452     },
3453     width, height, actual_comps, req_comps);
3454 }
3455 // vfs API
3456 }
3457 
3458 
3459 // ////////////////////////////////////////////////////////////////////////// //
3460 // if we have access "arsd.color", add some handy API
3461 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3462 
3463 
3464 
3465 public struct LastJpegError {
3466 	int stage;
3467 	int code;
3468 	int details;
3469 }
3470 
3471 public LastJpegError lastJpegError;
3472 
3473 
3474 static if (JpegHasArsd) {
3475 import arsd.color;
3476 static import arsd.core;
3477 
3478 // ////////////////////////////////////////////////////////////////////////// //
3479 /// decompress JPEG image, what else?
3480 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3481   import core.stdc.string : memcpy;
3482   enum req_comps = 4;
3483 
3484   if (rfn is null) return null;
3485 
3486   auto decoder = jpeg_decoder(rfn);
3487   if (decoder.error_code != JPGD_SUCCESS) { lastJpegError = LastJpegError(1, decoder.error_code); return null; }
3488   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3489 
3490   immutable int image_width = decoder.width;
3491   immutable int image_height = decoder.height;
3492   //width = image_width;
3493   //height = image_height;
3494   //actual_comps = decoder.num_components;
3495 
3496   version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("starting (%dx%d)...\n", image_width, image_height); }}
3497 
3498   auto err = decoder.begin_decoding();
3499   if (err != JPGD_SUCCESS || image_width < 1 || image_height < 1) {
3500 		lastJpegError = LastJpegError(2, err, decoder.m_error_code);
3501 		return null;
3502   }
3503 
3504   immutable int dst_bpl = image_width*req_comps;
3505   auto img = new TrueColorImage(image_width, image_height);
3506   scope(failure) { img.clearInternal(); img = null; }
3507   ubyte* pImage_data = img.imageData.bytes.ptr;
3508 
3509   for (int y = 0; y < image_height; ++y) {
3510     //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("loading line %d...\n", y); }}
3511 
3512     const(ubyte)* pScan_line;
3513     uint scan_line_len;
3514     err = decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len);
3515     if (err != JPGD_SUCCESS) {
3516       lastJpegError = LastJpegError(3, err);
3517       img.clearInternal();
3518       img = null;
3519       //jpgd_free(pImage_data);
3520       return null;
3521     }
3522 
3523     ubyte* pDst = pImage_data+y*dst_bpl;
3524 
3525     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3526       memcpy(pDst, pScan_line, dst_bpl);
3527     } else if (decoder.num_components == 1) {
3528       if (req_comps == 3) {
3529         for (int x = 0; x < image_width; ++x) {
3530           ubyte luma = pScan_line[x];
3531           pDst[0] = luma;
3532           pDst[1] = luma;
3533           pDst[2] = luma;
3534           pDst += 3;
3535         }
3536       } else {
3537         for (int x = 0; x < image_width; ++x) {
3538           ubyte luma = pScan_line[x];
3539           pDst[0] = luma;
3540           pDst[1] = luma;
3541           pDst[2] = luma;
3542           pDst[3] = 255;
3543           pDst += 4;
3544         }
3545       }
3546     } else if (decoder.num_components == 3) {
3547       if (req_comps == 1) {
3548         immutable int YR = 19595, YG = 38470, YB = 7471;
3549         for (int x = 0; x < image_width; ++x) {
3550           int r = pScan_line[x*4+0];
3551           int g = pScan_line[x*4+1];
3552           int b = pScan_line[x*4+2];
3553           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3554         }
3555       } else {
3556         for (int x = 0; x < image_width; ++x) {
3557           pDst[0] = pScan_line[x*4+0];
3558           pDst[1] = pScan_line[x*4+1];
3559           pDst[2] = pScan_line[x*4+2];
3560           pDst += 3;
3561         }
3562       }
3563     }
3564   }
3565 
3566   import arsd.color;
3567   if(decoder.autoRotateBasedOnExifOrientation && img.imageData.colors.length)
3568   switch(decoder.orientation) {
3569   	case 0:
3570   	case 1:
3571 		// no work required
3572 	break;
3573 	case 2:
3574 		// mirror horizontal
3575 		ImageTransforms.mirrorHorizontally(img);
3576 	break;
3577 	case 3:
3578 		// rotate 180
3579 		ImageTransforms.rotate180(img);
3580 	break;
3581 	case 4:
3582 		// mirror vertical
3583 		ImageTransforms.mirrorVertically(img);
3584 	break;
3585 	case 5:
3586 		// mirror horizontal and rotate 270 CW
3587 		ImageTransforms.mirrorHorizontally(img);
3588 		ImageTransforms.rotate180(img);
3589 		img = ImageTransforms.rotate90(img);
3590 	break;
3591 	case 6:
3592 		// rotate 90 CW
3593 		img = ImageTransforms.rotate90(img);
3594 	break;
3595 	case 7:
3596 		// mirror horizontal and rotate 90 CW
3597 		ImageTransforms.mirrorHorizontally(img);
3598 		img = ImageTransforms.rotate90(img);
3599 	break;
3600 	case 8:
3601 		// rotate 270 CW aka 90 CCW
3602 		ImageTransforms.rotate180(img);
3603 		img = ImageTransforms.rotate90(img);
3604 	break;
3605 
3606 	default:
3607 		// unknown, just leave it alone
3608   }
3609 
3610   return img;
3611 }
3612 
3613 
3614 // ////////////////////////////////////////////////////////////////////////// //
3615 /// decompress JPEG image from disk file.
3616 /// Returns null if loading failed for any reason.
3617 public MemoryImage readJpeg (const(char)[] filename) {
3618   import core.stdc.stdio;
3619 
3620   FILE* m_pFile;
3621   bool m_eof_flag, m_error_flag;
3622 
3623   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3624   if (filename.length < 512) {
3625 	char[513] buffer;
3626     //import core.stdc.stdlib : alloca;
3627     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3628     tfn[0..filename.length] = filename[];
3629     tfn[filename.length] = 0;
3630     m_pFile = fopen(tfn.ptr, "rb");
3631   } else {
3632     import core.stdc.stdlib : malloc, free;
3633     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3634     if (tfn !is null) {
3635       scope(exit) free(tfn.ptr);
3636       m_pFile = fopen(tfn.ptr, "rb");
3637     }
3638   }
3639   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3640   scope(exit) if (m_pFile) fclose(m_pFile);
3641 
3642   return readJpegFromStream(
3643     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3644       if (m_pFile is null) return -1;
3645       if (m_eof_flag) {
3646         *pEOF_flag = true;
3647         return 0;
3648       }
3649       if (m_error_flag) return -1;
3650       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3651       if (bytes_read < max_bytes_to_read) {
3652         if (ferror(m_pFile)) {
3653           m_error_flag = true;
3654           return -1;
3655         }
3656         m_eof_flag = true;
3657         *pEOF_flag = true;
3658       }
3659       return bytes_read;
3660     }
3661   );
3662 }
3663 
3664 /++
3665 	History:
3666 		Added January 22, 2021 (release version 9.2)
3667 +/
3668 public void writeJpeg(const(char)[] filename, TrueColorImage img, JpegParams params = JpegParams.init) {
3669 	if(!compress_image_to_jpeg_file(filename, img.width, img.height, 4, img.imageData.bytes, params))
3670 		throw new Exception("jpeg write failed"); // FIXME: check errno?
3671 }
3672 
3673 /++
3674   	Encodes an image as jpeg in memory.
3675 
3676 	History:
3677 		Added January 22, 2021 (release version 9.2)
3678 +/
3679 public ubyte[] encodeJpeg(TrueColorImage img, JpegParams params = JpegParams.init) {
3680   	ubyte[] data;
3681 	encodeJpeg((const scope ubyte[] i) {
3682 		data ~= i;
3683 		return true;
3684 	}, img, params);
3685 
3686 	return data;
3687 }
3688 
3689 /// ditto
3690 public void encodeJpeg(scope bool delegate(const scope ubyte[]) dg, TrueColorImage img, JpegParams params = JpegParams.init) {
3691 	if(!compress_image_to_jpeg_stream(
3692 		dg,
3693 		img.width, img.height, 4, img.imageData.bytes, params))
3694 		throw new Exception("encode");
3695 }
3696 
3697 
3698 // ////////////////////////////////////////////////////////////////////////// //
3699 /// decompress JPEG image from memory buffer.
3700 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3701   size_t bufpos;
3702   return readJpegFromStream(
3703     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3704       import core.stdc.string : memcpy;
3705       if (bufpos >= buf.length) {
3706         *pEOF_flag = true;
3707         return 0;
3708       }
3709       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3710       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3711       bufpos += max_bytes_to_read;
3712       return max_bytes_to_read;
3713     }
3714   );
3715 }
3716 // done with arsd API
3717 }
3718 
3719 
3720 static if (JpegHasIVVFS) {
3721 public MemoryImage readJpeg (VFile fl) {
3722   return readJpegFromStream(
3723     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3724       if (!fl.isOpen) return -1;
3725       if (fl.eof) {
3726         *pEOF_flag = true;
3727         return 0;
3728       }
3729       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3730       if (fl.eof) *pEOF_flag = true;
3731       return cast(int)rd.length;
3732     }
3733   );
3734 }
3735 
3736 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3737   return detect_jpeg_image_from_stream(
3738     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3739       if (!fl.isOpen) return -1;
3740       if (fl.eof) {
3741         *pEOF_flag = true;
3742         return 0;
3743       }
3744       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3745       if (fl.eof) *pEOF_flag = true;
3746       return cast(int)rd.length;
3747     },
3748     width, height, actual_comps);
3749 }
3750 // vfs API
3751 }
3752 
3753 
3754 // ////////////////////////////////////////////////////////////////////////// //
3755 version(jpegd_test) {
3756 import arsd.color;
3757 import arsd.png;
3758 
3759 void main (string[] args) {
3760   import std.stdio;
3761   int width, height, comps;
3762   {
3763     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3764     writeln(width, "x", height, "x", comps);
3765     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3766     writeln(img.width, "x", img.height);
3767     writePng("z00.png", img);
3768   }
3769   {
3770     ubyte[] file;
3771     {
3772       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3773       file.length = cast(int)fl.size;
3774       fl.rawRead(file[]);
3775     }
3776     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3777     writeln(width, "x", height, "x", comps);
3778     auto img = readJpegFromMemory(file[]);
3779     writeln(img.width, "x", img.height);
3780     writePng("z01.png", img);
3781   }
3782 }
3783 }
3784 
3785 // jpge.cpp - C++ class for JPEG compression.
3786 // Public domain, Rich Geldreich <richgel99@gmail.com>
3787 // Alex Evans: Added RGBA support, linear memory allocator.
3788 // v1.01, Dec. 18, 2010 - Initial release
3789 // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
3790 // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
3791 //                        Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
3792 // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to null in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
3793 //                       Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
3794 //                       Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
3795 // D translation by Ketmar // Invisible Vector
3796 //
3797 // This is free and unencumbered software released into the public domain.
3798 //
3799 // Anyone is free to copy, modify, publish, use, compile, sell, or
3800 // distribute this software, either in source code form or as a compiled
3801 // binary, for any purpose, commercial or non-commercial, and by any
3802 // means.
3803 //
3804 // In jurisdictions that recognize copyright laws, the author or authors
3805 // of this software dedicate any and all copyright interest in the
3806 // software to the public domain. We make this dedication for the benefit
3807 // of the public at large and to the detriment of our heirs and
3808 // successors. We intend this dedication to be an overt act of
3809 // relinquishment in perpetuity of all present and future rights to this
3810 // software under copyright law.
3811 //
3812 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3813 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3814 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
3815 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
3816 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
3817 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
3818 // OTHER DEALINGS IN THE SOFTWARE.
3819 //
3820 // For more information, please refer to <http://unlicense.org/>
3821 /**
3822  * Writes a JPEG image to a file or stream.
3823  * num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3824  * note that alpha will not be stored in jpeg file.
3825  */
3826 
3827 public:
3828 // ////////////////////////////////////////////////////////////////////////// //
3829 // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
3830 enum JpegSubsampling { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }
3831 
3832 /// JPEG compression parameters structure.
3833 public struct JpegParams {
3834   /// Quality: 1-100, higher is better. Typical values are around 50-95.
3835   int quality = 85;
3836 
3837   /// subsampling:
3838   /// 0 = Y (grayscale) only
3839   /// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
3840   /// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
3841   /// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
3842   JpegSubsampling subsampling = JpegSubsampling.H2V2;
3843 
3844   /// Disables CbCr discrimination - only intended for testing.
3845   /// If true, the Y quantization table is also used for the CbCr channels.
3846   bool noChromaDiscrimFlag = false;
3847 
3848   ///
3849   bool twoPass = true;
3850 
3851   ///
3852   bool check () const pure nothrow @trusted @nogc {
3853     if (quality < 1 || quality > 100) return false;
3854     if (cast(uint)subsampling > cast(uint)JpegSubsampling.H2V2) return false;
3855     return true;
3856   }
3857 }
3858 
3859 
3860 // ////////////////////////////////////////////////////////////////////////// //
3861 /// Writes JPEG image to file.
3862 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3863 /// note that alpha will not be stored in jpeg file.
3864 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_stream(wfn, width, height, num_channels, pImage_data, JpegParams()); }
3865 
3866 /// Writes JPEG image to file.
3867 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3868 /// note that alpha will not be stored in jpeg file.
3869 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data, in JpegParams comp_params) {
3870   jpeg_encoder dst_image;
3871   if (!dst_image.setup(wfn, width, height, num_channels, comp_params)) return false;
3872   for (uint pass_index = 0; pass_index < dst_image.total_passes(); pass_index++) {
3873     for (int i = 0; i < height; i++) {
3874       const(ubyte)* pBuf = pImage_data.ptr+i*width*num_channels;
3875       if (!dst_image.process_scanline(pBuf)) return false;
3876     }
3877     if (!dst_image.process_scanline(null)) return false;
3878   }
3879   dst_image.deinit();
3880   //return dst_stream.close();
3881   return true;
3882 }
3883 
3884 
3885 /// Writes JPEG image to file.
3886 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3887 /// note that alpha will not be stored in jpeg file.
3888 bool compress_image_to_jpeg_file (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_file(fname, width, height, num_channels, pImage_data, JpegParams()); }
3889 
3890 /// Writes JPEG image to file.
3891 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3892 /// note that alpha will not be stored in jpeg file.
3893 bool compress_image_to_jpeg_file() (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data, const scope auto ref JpegParams comp_params) {
3894   import std.internal.cstring;
3895   import core.stdc.stdio : FILE, fopen, fclose, fwrite;
3896   FILE* fl = fopen(fname.tempCString, "wb");
3897   if (fl is null) return false;
3898   scope(exit) if (fl !is null) fclose(fl);
3899   auto res = compress_image_to_jpeg_stream(
3900     delegate bool (scope const(ubyte)[] buf) {
3901       if (fwrite(buf.ptr, 1, buf.length, fl) != buf.length) return false;
3902       return true;
3903     }, width, height, num_channels, pImage_data, comp_params);
3904   if (res) {
3905     if (fclose(fl) != 0) res = false;
3906     fl = null;
3907   }
3908   return res;
3909 }
3910 
3911 
3912 // ////////////////////////////////////////////////////////////////////////// //
3913 private:
3914 nothrow @trusted @nogc {
3915 auto JPGE_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
3916 auto JPGE_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
3917 
3918 void *jpge_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
3919 void jpge_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
3920 
3921 
3922 // Various JPEG enums and tables.
3923 enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }
3924 
3925 static immutable ubyte[64] s_zag = [ 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
3926 static immutable short[64] s_std_lum_quant = [ 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 ];
3927 static immutable short[64] s_std_croma_quant = [ 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 ];
3928 static immutable ubyte[17] s_dc_lum_bits = [ 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 ];
3929 static immutable ubyte[DC_LUM_CODES] s_dc_lum_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3930 static immutable ubyte[17] s_ac_lum_bits = [ 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d ];
3931 static immutable ubyte[AC_LUM_CODES] s_ac_lum_val = [
3932   0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
3933   0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
3934   0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
3935   0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
3936   0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3937   0xf9,0xfa
3938 ];
3939 static immutable ubyte[17] s_dc_chroma_bits = [ 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 ];
3940 static immutable ubyte[DC_CHROMA_CODES] s_dc_chroma_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3941 static immutable ubyte[17] s_ac_chroma_bits = [ 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 ];
3942 static immutable ubyte[AC_CHROMA_CODES] s_ac_chroma_val = [
3943   0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
3944   0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
3945   0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
3946   0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
3947   0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3948   0xf9,0xfa
3949 ];
3950 
3951 // Low-level helper functions.
3952 //template <class T> inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
3953 
3954 enum YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; // int
3955 //ubyte clamp (int i) { if (cast(uint)(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return cast(ubyte)(i); }
3956 ubyte clamp() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i)>>31)&0xFF) : i); }
3957 
3958 void RGB_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3959   for (; num_pixels; pDst += 3, pSrc += 3, --num_pixels) {
3960     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3961     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3962     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3963     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3964   }
3965 }
3966 
3967 void RGB_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3968   for (; num_pixels; ++pDst, pSrc += 3, --num_pixels) {
3969     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3970   }
3971 }
3972 
3973 void RGBA_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3974   for (; num_pixels; pDst += 3, pSrc += 4, --num_pixels) {
3975     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3976     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3977     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3978     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3979   }
3980 }
3981 
3982 void RGBA_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3983   for (; num_pixels; ++pDst, pSrc += 4, --num_pixels) {
3984     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3985   }
3986 }
3987 
3988 void Y_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3989   for (; num_pixels; pDst += 3, ++pSrc, --num_pixels) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
3990 }
3991 
3992 // Forward DCT - DCT derived from jfdctint.
3993 enum { ROW_BITS = 2 }
3994 //#define DCT_DESCALE(x, n) (((x)+(((int)1)<<((n)-1)))>>(n))
3995 int DCT_DESCALE() (int x, int n) { pragma(inline, true); return (((x)+((cast(int)1)<<((n)-1)))>>(n)); }
3996 //#define DCT_MUL(var, c) (cast(short)(var)*cast(int)(c))
3997 
3998 //#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7)
3999 enum DCT1D = q{{
4000   int t0 = s0+s7, t7 = s0-s7, t1 = s1+s6, t6 = s1-s6, t2 = s2+s5, t5 = s2-s5, t3 = s3+s4, t4 = s3-s4;
4001   int t10 = t0+t3, t13 = t0-t3, t11 = t1+t2, t12 = t1-t2;
4002   int u1 = (cast(short)(t12+t13)*cast(int)(4433));
4003   s2 = u1+(cast(short)(t13)*cast(int)(6270));
4004   s6 = u1+(cast(short)(t12)*cast(int)(-15137));
4005   u1 = t4+t7;
4006   int u2 = t5+t6, u3 = t4+t6, u4 = t5+t7;
4007   int z5 = (cast(short)(u3+u4)*cast(int)(9633));
4008   t4 = (cast(short)(t4)*cast(int)(2446)); t5 = (cast(short)(t5)*cast(int)(16819));
4009   t6 = (cast(short)(t6)*cast(int)(25172)); t7 = (cast(short)(t7)*cast(int)(12299));
4010   u1 = (cast(short)(u1)*cast(int)(-7373)); u2 = (cast(short)(u2)*cast(int)(-20995));
4011   u3 = (cast(short)(u3)*cast(int)(-16069)); u4 = (cast(short)(u4)*cast(int)(-3196));
4012   u3 += z5; u4 += z5;
4013   s0 = t10+t11; s1 = t7+u1+u4; s3 = t6+u2+u3; s4 = t10-t11; s5 = t5+u2+u4; s7 = t4+u1+u3;
4014 }};
4015 
4016 void DCT2D (int* p) {
4017   int c;
4018   int* q = p;
4019   for (c = 7; c >= 0; --c, q += 8) {
4020     int s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
4021     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
4022     mixin(DCT1D);
4023     q[0] = s0<<ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
4024     q[4] = s4<<ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
4025   }
4026   for (q = p, c = 7; c >= 0; --c, ++q) {
4027     int s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
4028     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
4029     mixin(DCT1D);
4030     q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
4031     q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
4032   }
4033 }
4034 
4035 struct sym_freq { uint m_key, m_sym_index; }
4036 
4037 // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
4038 sym_freq* radix_sort_syms (uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) {
4039   const uint cMaxPasses = 4;
4040   uint[256*cMaxPasses] hist;
4041   //clear_obj(hist);
4042   for (uint i = 0; i < num_syms; i++) {
4043     uint freq = pSyms0[i].m_key;
4044     ++hist[freq&0xFF];
4045     ++hist[256+((freq>>8)&0xFF)];
4046     ++hist[256*2+((freq>>16)&0xFF)];
4047     ++hist[256*3+((freq>>24)&0xFF)];
4048   }
4049   sym_freq* pCur_syms = pSyms0;
4050   sym_freq* pNew_syms = pSyms1;
4051   uint total_passes = cMaxPasses; while (total_passes > 1 && num_syms == hist[(total_passes-1)*256]) --total_passes;
4052   uint[256] offsets;
4053   for (uint pass_shift = 0, pass = 0; pass < total_passes; ++pass, pass_shift += 8) {
4054     const(uint)* pHist = &hist[pass<<8];
4055     uint cur_ofs = 0;
4056     for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
4057     for (uint i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key>>pass_shift)&0xFF]++] = pCur_syms[i];
4058     sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
4059   }
4060   return pCur_syms;
4061 }
4062 
4063 // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
4064 void calculate_minimum_redundancy (sym_freq* A, int n) {
4065   int root, leaf, next, avbl, used, dpth;
4066   if (n == 0) return;
4067   if (n == 1) { A[0].m_key = 1; return; }
4068   A[0].m_key += A[1].m_key; root = 0; leaf = 2;
4069   for (next=1; next < n-1; next++)
4070   {
4071     if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key;
4072     if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key;
4073   }
4074   A[n-2].m_key = 0;
4075   for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
4076   avbl = 1; used = dpth = 0; root = n-2; next = n-1;
4077   while (avbl>0)
4078   {
4079     while (root >= 0 && cast(int)A[root].m_key == dpth) { used++; root--; }
4080     while (avbl>used) { A[next--].m_key = dpth; avbl--; }
4081     avbl = 2*used; dpth++; used = 0;
4082   }
4083 }
4084 
4085 // Limits canonical Huffman code table's max code size to max_code_size.
4086 void huffman_enforce_max_code_size (int* pNum_codes, int code_list_len, int max_code_size) {
4087   if (code_list_len <= 1) return;
4088   for (int i = max_code_size+1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
4089   uint total = 0;
4090   for (int i = max_code_size; i > 0; i--) total += ((cast(uint)pNum_codes[i])<<(max_code_size-i));
4091   while (total != (1UL<<max_code_size)) {
4092     pNum_codes[max_code_size]--;
4093     for (int i = max_code_size-1; i > 0; i--) {
4094       if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i+1] += 2; break; }
4095     }
4096     total--;
4097   }
4098 }
4099 }
4100 
4101 
4102 // ////////////////////////////////////////////////////////////////////////// //
4103 // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
4104 struct jpeg_encoder {
4105 public:
4106   alias WriteFunc = bool delegate (scope const(ubyte)[] buf);
4107 
4108 nothrow /*@trusted @nogc*/:
4109 private:
4110   alias sample_array_t = int;
4111 
4112   WriteFunc m_pStream;
4113   JpegParams m_params;
4114   ubyte m_num_components;
4115   ubyte[3] m_comp_h_samp;
4116   ubyte[3] m_comp_v_samp;
4117   int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
4118   int m_image_x_mcu, m_image_y_mcu;
4119   int m_image_bpl_xlt, m_image_bpl_mcu;
4120   int m_mcus_per_row;
4121   int m_mcu_x, m_mcu_y;
4122   ubyte*[16] m_mcu_lines;
4123   ubyte m_mcu_y_ofs;
4124   sample_array_t[64] m_sample_array;
4125   short[64] m_coefficient_array;
4126   int[64][2] m_quantization_tables;
4127   uint[256][4] m_huff_codes;
4128   ubyte[256][4] m_huff_code_sizes;
4129   ubyte[17][4] m_huff_bits;
4130   ubyte[256][4] m_huff_val;
4131   uint[256][4] m_huff_count;
4132   int[3] m_last_dc_val;
4133   enum JPGE_OUT_BUF_SIZE = 2048;
4134   ubyte[JPGE_OUT_BUF_SIZE] m_out_buf;
4135   ubyte* m_pOut_buf;
4136   uint m_out_buf_left;
4137   uint m_bit_buffer;
4138   uint m_bits_in;
4139   ubyte m_pass_num;
4140   bool m_all_stream_writes_succeeded = true;
4141 
4142 private:
4143   // Generates an optimized offman table.
4144   void optimize_huffman_table (int table_num, int table_len) {
4145     sym_freq[MAX_HUFF_SYMBOLS] syms0;
4146     sym_freq[MAX_HUFF_SYMBOLS] syms1;
4147     syms0[0].m_key = 1; syms0[0].m_sym_index = 0;  // dummy symbol, assures that no valid code contains all 1's
4148     int num_used_syms = 1;
4149     const uint *pSym_count = &m_huff_count[table_num][0];
4150     for (int i = 0; i < table_len; i++) {
4151       if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i+1; }
4152     }
4153     sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0.ptr, syms1.ptr);
4154     calculate_minimum_redundancy(pSyms, num_used_syms);
4155 
4156     // Count the # of symbols of each code size.
4157     int[1+MAX_HUFF_CODESIZE] num_codes;
4158     //clear_obj(num_codes);
4159     for (int i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
4160 
4161     enum JPGE_CODE_SIZE_LIMIT = 16u; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
4162     huffman_enforce_max_code_size(num_codes.ptr, num_used_syms, JPGE_CODE_SIZE_LIMIT);
4163 
4164     // Compute m_huff_bits array, which contains the # of symbols per code size.
4165     //clear_obj(m_huff_bits[table_num]);
4166     m_huff_bits[table_num][] = 0;
4167     for (int i = 1; i <= cast(int)JPGE_CODE_SIZE_LIMIT; i++) m_huff_bits[table_num][i] = cast(ubyte)(num_codes[i]);
4168 
4169     // Remove the dummy symbol added above, which must be in largest bucket.
4170     for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) {
4171       if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
4172     }
4173 
4174     // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
4175     for (int i = num_used_syms-1; i >= 1; i--) m_huff_val[table_num][num_used_syms-1-i] = cast(ubyte)(pSyms[i].m_sym_index-1);
4176   }
4177 
4178   bool put_obj(T) (T v) {
4179     try {
4180       return (m_pStream !is null && m_pStream((&v)[0..1]));
4181     } catch (Exception) {}
4182     return false;
4183   }
4184 
4185   bool put_buf() (const(void)* v, uint len) {
4186     try {
4187       return (m_pStream !is null && m_pStream((cast(ubyte*)v)[0..len]));
4188     } catch (Exception) {}
4189     return false;
4190   }
4191 
4192   // JPEG marker generation.
4193   void emit_byte (ubyte i) {
4194     m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_obj(i);
4195   }
4196 
4197   void emit_word(uint i) {
4198     emit_byte(cast(ubyte)(i>>8));
4199     emit_byte(cast(ubyte)(i&0xFF));
4200   }
4201 
4202   void emit_marker (int marker) {
4203     emit_byte(cast(ubyte)(0xFF));
4204     emit_byte(cast(ubyte)(marker));
4205   }
4206 
4207   // Emit JFIF marker
4208   void emit_jfif_app0 () {
4209     emit_marker(M_APP0);
4210     emit_word(2+4+1+2+1+2+2+1+1);
4211     emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
4212     emit_byte(0);
4213     emit_byte(1); /* Major version */
4214     emit_byte(1); /* Minor version */
4215     emit_byte(0); /* Density unit */
4216     emit_word(1);
4217     emit_word(1);
4218     emit_byte(0); /* No thumbnail image */
4219     emit_byte(0);
4220   }
4221 
4222   // Emit quantization tables
4223   void emit_dqt () {
4224     for (int i = 0; i < (m_num_components == 3 ? 2 : 1); i++) {
4225       emit_marker(M_DQT);
4226       emit_word(64+1+2);
4227       emit_byte(cast(ubyte)(i));
4228       for (int j = 0; j < 64; j++) emit_byte(cast(ubyte)(m_quantization_tables[i][j]));
4229     }
4230   }
4231 
4232   // Emit start of frame marker
4233   void emit_sof () {
4234     emit_marker(M_SOF0); /* baseline */
4235     emit_word(3*m_num_components+2+5+1);
4236     emit_byte(8); /* precision */
4237     emit_word(m_image_y);
4238     emit_word(m_image_x);
4239     emit_byte(m_num_components);
4240     for (int i = 0; i < m_num_components; i++) {
4241       emit_byte(cast(ubyte)(i+1)); /* component ID */
4242       emit_byte(cast(ubyte)((m_comp_h_samp[i]<<4)+m_comp_v_samp[i])); /* h and v sampling */
4243       emit_byte(i > 0); /* quant. table num */
4244     }
4245   }
4246 
4247   // Emit Huffman table.
4248   void emit_dht (ubyte* bits, ubyte* val, int index, bool ac_flag) {
4249     emit_marker(M_DHT);
4250     int length = 0;
4251     for (int i = 1; i <= 16; i++) length += bits[i];
4252     emit_word(length+2+1+16);
4253     emit_byte(cast(ubyte)(index+(ac_flag<<4)));
4254     for (int i = 1; i <= 16; i++) emit_byte(bits[i]);
4255     for (int i = 0; i < length; i++) emit_byte(val[i]);
4256   }
4257 
4258   // Emit all Huffman tables.
4259   void emit_dhts () {
4260     emit_dht(m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr, 0, false);
4261     emit_dht(m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr, 0, true);
4262     if (m_num_components == 3) {
4263       emit_dht(m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr, 1, false);
4264       emit_dht(m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr, 1, true);
4265     }
4266   }
4267 
4268   // emit start of scan
4269   void emit_sos () {
4270     emit_marker(M_SOS);
4271     emit_word(2*m_num_components+2+1+3);
4272     emit_byte(m_num_components);
4273     for (int i = 0; i < m_num_components; i++) {
4274       emit_byte(cast(ubyte)(i+1));
4275       if (i == 0)
4276         emit_byte((0<<4)+0);
4277       else
4278         emit_byte((1<<4)+1);
4279     }
4280     emit_byte(0); /* spectral selection */
4281     emit_byte(63);
4282     emit_byte(0);
4283   }
4284 
4285   // Emit all markers at beginning of image file.
4286   void emit_markers () {
4287     emit_marker(M_SOI);
4288     emit_jfif_app0();
4289     emit_dqt();
4290     emit_sof();
4291     emit_dhts();
4292     emit_sos();
4293   }
4294 
4295   // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
4296   void compute_huffman_table (uint* codes, ubyte* code_sizes, ubyte* bits, ubyte* val) {
4297     import core.stdc.string : memset;
4298 
4299     int i, l, last_p, si;
4300     ubyte[257] huff_size;
4301     uint[257] huff_code;
4302     uint code;
4303 
4304     int p = 0;
4305     for (l = 1; l <= 16; l++)
4306       for (i = 1; i <= bits[l]; i++)
4307         huff_size[p++] = cast(ubyte)l;
4308 
4309     huff_size[p] = 0; last_p = p; // write sentinel
4310 
4311     code = 0; si = huff_size[0]; p = 0;
4312 
4313     while (huff_size[p])
4314     {
4315       while (huff_size[p] == si)
4316         huff_code[p++] = code++;
4317       code <<= 1;
4318       si++;
4319     }
4320 
4321     memset(codes, 0, codes[0].sizeof*256);
4322     memset(code_sizes, 0, code_sizes[0].sizeof*256);
4323     for (p = 0; p < last_p; p++)
4324     {
4325       codes[val[p]]      = huff_code[p];
4326       code_sizes[val[p]] = huff_size[p];
4327     }
4328   }
4329 
4330   // Quantization table generation.
4331   void compute_quant_table (int* pDst, const(short)* pSrc) {
4332     int q;
4333     if (m_params.quality < 50)
4334       q = 5000/m_params.quality;
4335     else
4336       q = 200-m_params.quality*2;
4337     for (int i = 0; i < 64; i++) {
4338       int j = *pSrc++; j = (j*q+50L)/100L;
4339       *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
4340     }
4341   }
4342 
4343   // Higher-level methods.
4344   void first_pass_init () {
4345     import core.stdc.string : memset;
4346     m_bit_buffer = 0; m_bits_in = 0;
4347     memset(m_last_dc_val.ptr, 0, 3*m_last_dc_val[0].sizeof);
4348     m_mcu_y_ofs = 0;
4349     m_pass_num = 1;
4350   }
4351 
4352   bool second_pass_init () {
4353     compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr);
4354     compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr);
4355     if (m_num_components > 1)
4356     {
4357       compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr);
4358       compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr);
4359     }
4360     first_pass_init();
4361     emit_markers();
4362     m_pass_num = 2;
4363     return true;
4364   }
4365 
4366   bool jpg_open (int p_x_res, int p_y_res, int src_channels) {
4367     m_num_components = 3;
4368     switch (m_params.subsampling) {
4369       case JpegSubsampling.Y_ONLY:
4370         m_num_components = 1;
4371         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4372         m_mcu_x          = 8; m_mcu_y          = 8;
4373         break;
4374       case JpegSubsampling.H1V1:
4375         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4376         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4377         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4378         m_mcu_x          = 8; m_mcu_y          = 8;
4379         break;
4380       case JpegSubsampling.H2V1:
4381         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
4382         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4383         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4384         m_mcu_x          = 16; m_mcu_y         = 8;
4385         break;
4386       case JpegSubsampling.H2V2:
4387         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
4388         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4389         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4390         m_mcu_x          = 16; m_mcu_y         = 16;
4391         break;
4392       default: assert(0);
4393     }
4394 
4395     m_image_x        = p_x_res; m_image_y = p_y_res;
4396     m_image_bpp      = src_channels;
4397     m_image_bpl      = m_image_x*src_channels;
4398     m_image_x_mcu    = (m_image_x+m_mcu_x-1)&(~(m_mcu_x-1));
4399     m_image_y_mcu    = (m_image_y+m_mcu_y-1)&(~(m_mcu_y-1));
4400     m_image_bpl_xlt  = m_image_x*m_num_components;
4401     m_image_bpl_mcu  = m_image_x_mcu*m_num_components;
4402     m_mcus_per_row   = m_image_x_mcu/m_mcu_x;
4403 
4404     if ((m_mcu_lines[0] = cast(ubyte*)(jpge_malloc(m_image_bpl_mcu*m_mcu_y))) is null) return false;
4405     for (int i = 1; i < m_mcu_y; i++)
4406       m_mcu_lines[i] = m_mcu_lines[i-1]+m_image_bpl_mcu;
4407 
4408     compute_quant_table(m_quantization_tables[0].ptr, s_std_lum_quant.ptr);
4409     compute_quant_table(m_quantization_tables[1].ptr, (m_params.noChromaDiscrimFlag ? s_std_lum_quant.ptr : s_std_croma_quant.ptr));
4410 
4411     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4412     m_pOut_buf = m_out_buf.ptr;
4413 
4414     if (m_params.twoPass)
4415     {
4416       //clear_obj(m_huff_count);
4417       import core.stdc.string : memset;
4418       memset(m_huff_count.ptr, 0, m_huff_count.sizeof);
4419       first_pass_init();
4420     }
4421     else
4422     {
4423       import core.stdc.string : memcpy;
4424       memcpy(m_huff_bits[0+0].ptr, s_dc_lum_bits.ptr, 17);    memcpy(m_huff_val[0+0].ptr, s_dc_lum_val.ptr, DC_LUM_CODES);
4425       memcpy(m_huff_bits[2+0].ptr, s_ac_lum_bits.ptr, 17);    memcpy(m_huff_val[2+0].ptr, s_ac_lum_val.ptr, AC_LUM_CODES);
4426       memcpy(m_huff_bits[0+1].ptr, s_dc_chroma_bits.ptr, 17); memcpy(m_huff_val[0+1].ptr, s_dc_chroma_val.ptr, DC_CHROMA_CODES);
4427       memcpy(m_huff_bits[2+1].ptr, s_ac_chroma_bits.ptr, 17); memcpy(m_huff_val[2+1].ptr, s_ac_chroma_val.ptr, AC_CHROMA_CODES);
4428       if (!second_pass_init()) return false;   // in effect, skip over the first pass
4429     }
4430     return m_all_stream_writes_succeeded;
4431   }
4432 
4433   void load_block_8_8_grey (int x) {
4434     ubyte *pSrc;
4435     sample_array_t *pDst = m_sample_array.ptr;
4436     x <<= 3;
4437     for (int i = 0; i < 8; i++, pDst += 8)
4438     {
4439       pSrc = m_mcu_lines[i]+x;
4440       pDst[0] = pSrc[0]-128; pDst[1] = pSrc[1]-128; pDst[2] = pSrc[2]-128; pDst[3] = pSrc[3]-128;
4441       pDst[4] = pSrc[4]-128; pDst[5] = pSrc[5]-128; pDst[6] = pSrc[6]-128; pDst[7] = pSrc[7]-128;
4442     }
4443   }
4444 
4445   void load_block_8_8 (int x, int y, int c) {
4446     ubyte *pSrc;
4447     sample_array_t *pDst = m_sample_array.ptr;
4448     x = (x*(8*3))+c;
4449     y <<= 3;
4450     for (int i = 0; i < 8; i++, pDst += 8)
4451     {
4452       pSrc = m_mcu_lines[y+i]+x;
4453       pDst[0] = pSrc[0*3]-128; pDst[1] = pSrc[1*3]-128; pDst[2] = pSrc[2*3]-128; pDst[3] = pSrc[3*3]-128;
4454       pDst[4] = pSrc[4*3]-128; pDst[5] = pSrc[5*3]-128; pDst[6] = pSrc[6*3]-128; pDst[7] = pSrc[7*3]-128;
4455     }
4456   }
4457 
4458   void load_block_16_8 (int x, int c) {
4459     ubyte* pSrc1;
4460     ubyte* pSrc2;
4461     sample_array_t *pDst = m_sample_array.ptr;
4462     x = (x*(16*3))+c;
4463     int a = 0, b = 2;
4464     for (int i = 0; i < 16; i += 2, pDst += 8)
4465     {
4466       pSrc1 = m_mcu_lines[i+0]+x;
4467       pSrc2 = m_mcu_lines[i+1]+x;
4468       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3]+pSrc2[ 0*3]+pSrc2[ 1*3]+a)>>2)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3]+pSrc2[ 2*3]+pSrc2[ 3*3]+b)>>2)-128;
4469       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3]+pSrc2[ 4*3]+pSrc2[ 5*3]+a)>>2)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3]+pSrc2[ 6*3]+pSrc2[ 7*3]+b)>>2)-128;
4470       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3]+pSrc2[ 8*3]+pSrc2[ 9*3]+a)>>2)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3]+pSrc2[10*3]+pSrc2[11*3]+b)>>2)-128;
4471       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3]+pSrc2[12*3]+pSrc2[13*3]+a)>>2)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3]+pSrc2[14*3]+pSrc2[15*3]+b)>>2)-128;
4472       int temp = a; a = b; b = temp;
4473     }
4474   }
4475 
4476   void load_block_16_8_8 (int x, int c) {
4477     ubyte *pSrc1;
4478     sample_array_t *pDst = m_sample_array.ptr;
4479     x = (x*(16*3))+c;
4480     for (int i = 0; i < 8; i++, pDst += 8) {
4481       pSrc1 = m_mcu_lines[i+0]+x;
4482       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3])>>1)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3])>>1)-128;
4483       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3])>>1)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3])>>1)-128;
4484       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3])>>1)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3])>>1)-128;
4485       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3])>>1)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3])>>1)-128;
4486     }
4487   }
4488 
4489   void load_quantized_coefficients (int component_num) {
4490     int *q = m_quantization_tables[component_num > 0].ptr;
4491     short *pDst = m_coefficient_array.ptr;
4492     for (int i = 0; i < 64; i++)
4493     {
4494       sample_array_t j = m_sample_array[s_zag[i]];
4495       if (j < 0)
4496       {
4497         if ((j = -j+(*q>>1)) < *q)
4498           *pDst++ = 0;
4499         else
4500           *pDst++ = cast(short)(-(j/ *q));
4501       }
4502       else
4503       {
4504         if ((j = j+(*q>>1)) < *q)
4505           *pDst++ = 0;
4506         else
4507           *pDst++ = cast(short)((j/ *q));
4508       }
4509       q++;
4510     }
4511   }
4512 
4513   void flush_output_buffer () {
4514     if (m_out_buf_left != JPGE_OUT_BUF_SIZE) m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_buf(m_out_buf.ptr, JPGE_OUT_BUF_SIZE-m_out_buf_left);
4515     m_pOut_buf = m_out_buf.ptr;
4516     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4517   }
4518 
4519   void put_bits (uint bits, uint len) {
4520     m_bit_buffer |= (cast(uint)bits<<(24-(m_bits_in += len)));
4521     while (m_bits_in >= 8) {
4522       ubyte c;
4523       //#define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
4524       //JPGE_PUT_BYTE(c = (ubyte)((m_bit_buffer>>16)&0xFF));
4525       //if (c == 0xFF) JPGE_PUT_BYTE(0);
4526       c = cast(ubyte)((m_bit_buffer>>16)&0xFF);
4527       *m_pOut_buf++ = c;
4528       if (--m_out_buf_left == 0) flush_output_buffer();
4529       if (c == 0xFF) {
4530         *m_pOut_buf++ = 0;
4531         if (--m_out_buf_left == 0) flush_output_buffer();
4532       }
4533       m_bit_buffer <<= 8;
4534       m_bits_in -= 8;
4535     }
4536   }
4537 
4538   void code_coefficients_pass_one (int component_num) {
4539     if (component_num >= 3) return; // just to shut up static analysis
4540     int i, run_len, nbits, temp1;
4541     short *src = m_coefficient_array.ptr;
4542     uint *dc_count = (component_num ? m_huff_count[0+1].ptr : m_huff_count[0+0].ptr);
4543     uint *ac_count = (component_num ? m_huff_count[2+1].ptr : m_huff_count[2+0].ptr);
4544 
4545     temp1 = src[0]-m_last_dc_val[component_num];
4546     m_last_dc_val[component_num] = src[0];
4547     if (temp1 < 0) temp1 = -temp1;
4548 
4549     nbits = 0;
4550     while (temp1)
4551     {
4552       nbits++; temp1 >>= 1;
4553     }
4554 
4555     dc_count[nbits]++;
4556     for (run_len = 0, i = 1; i < 64; i++)
4557     {
4558       if ((temp1 = m_coefficient_array[i]) == 0)
4559         run_len++;
4560       else
4561       {
4562         while (run_len >= 16)
4563         {
4564           ac_count[0xF0]++;
4565           run_len -= 16;
4566         }
4567         if (temp1 < 0) temp1 = -temp1;
4568         nbits = 1;
4569         while (temp1 >>= 1) nbits++;
4570         ac_count[(run_len<<4)+nbits]++;
4571         run_len = 0;
4572       }
4573     }
4574     if (run_len) ac_count[0]++;
4575   }
4576 
4577   void code_coefficients_pass_two (int component_num) {
4578     int i, j, run_len, nbits, temp1, temp2;
4579     short *pSrc = m_coefficient_array.ptr;
4580     uint*[2] codes;
4581     ubyte*[2] code_sizes;
4582 
4583     if (component_num == 0)
4584     {
4585       codes[0] = m_huff_codes[0+0].ptr; codes[1] = m_huff_codes[2+0].ptr;
4586       code_sizes[0] = m_huff_code_sizes[0+0].ptr; code_sizes[1] = m_huff_code_sizes[2+0].ptr;
4587     }
4588     else
4589     {
4590       codes[0] = m_huff_codes[0+1].ptr; codes[1] = m_huff_codes[2+1].ptr;
4591       code_sizes[0] = m_huff_code_sizes[0+1].ptr; code_sizes[1] = m_huff_code_sizes[2+1].ptr;
4592     }
4593 
4594     temp1 = temp2 = pSrc[0]-m_last_dc_val[component_num];
4595     m_last_dc_val[component_num] = pSrc[0];
4596 
4597     if (temp1 < 0)
4598     {
4599       temp1 = -temp1; temp2--;
4600     }
4601 
4602     nbits = 0;
4603     while (temp1)
4604     {
4605       nbits++; temp1 >>= 1;
4606     }
4607 
4608     put_bits(codes[0][nbits], code_sizes[0][nbits]);
4609     if (nbits) put_bits(temp2&((1<<nbits)-1), nbits);
4610 
4611     for (run_len = 0, i = 1; i < 64; i++)
4612     {
4613       if ((temp1 = m_coefficient_array[i]) == 0)
4614         run_len++;
4615       else
4616       {
4617         while (run_len >= 16)
4618         {
4619           put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
4620           run_len -= 16;
4621         }
4622         if ((temp2 = temp1) < 0)
4623         {
4624           temp1 = -temp1;
4625           temp2--;
4626         }
4627         nbits = 1;
4628         while (temp1 >>= 1)
4629           nbits++;
4630         j = (run_len<<4)+nbits;
4631         put_bits(codes[1][j], code_sizes[1][j]);
4632         put_bits(temp2&((1<<nbits)-1), nbits);
4633         run_len = 0;
4634       }
4635     }
4636     if (run_len)
4637       put_bits(codes[1][0], code_sizes[1][0]);
4638   }
4639 
4640   void code_block (int component_num) {
4641     DCT2D(m_sample_array.ptr);
4642     load_quantized_coefficients(component_num);
4643     if (m_pass_num == 1)
4644       code_coefficients_pass_one(component_num);
4645     else
4646       code_coefficients_pass_two(component_num);
4647   }
4648 
4649   void process_mcu_row () {
4650     if (m_num_components == 1)
4651     {
4652       for (int i = 0; i < m_mcus_per_row; i++)
4653       {
4654         load_block_8_8_grey(i); code_block(0);
4655       }
4656     }
4657     else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
4658     {
4659       for (int i = 0; i < m_mcus_per_row; i++)
4660       {
4661         load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
4662       }
4663     }
4664     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
4665     {
4666       for (int i = 0; i < m_mcus_per_row; i++)
4667       {
4668         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4669         load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
4670       }
4671     }
4672     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
4673     {
4674       for (int i = 0; i < m_mcus_per_row; i++)
4675       {
4676         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4677         load_block_8_8(i*2+0, 1, 0); code_block(0); load_block_8_8(i*2+1, 1, 0); code_block(0);
4678         load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
4679       }
4680     }
4681   }
4682 
4683   bool terminate_pass_one () {
4684     optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
4685     if (m_num_components > 1)
4686     {
4687       optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
4688     }
4689     return second_pass_init();
4690   }
4691 
4692   bool terminate_pass_two () {
4693     put_bits(0x7F, 7);
4694     flush_output_buffer();
4695     emit_marker(M_EOI);
4696     m_pass_num++; // purposely bump up m_pass_num, for debugging
4697     return true;
4698   }
4699 
4700   bool process_end_of_image () {
4701     if (m_mcu_y_ofs)
4702     {
4703       if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
4704       {
4705         for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) {
4706           import core.stdc.string : memcpy;
4707           memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs-1], m_image_bpl_mcu);
4708         }
4709       }
4710       process_mcu_row();
4711     }
4712 
4713     if (m_pass_num == 1)
4714       return terminate_pass_one();
4715     else
4716       return terminate_pass_two();
4717   }
4718 
4719   void load_mcu (const(void)* pSrc) {
4720     import core.stdc.string : memcpy;
4721     const(ubyte)* Psrc = cast(const(ubyte)*)(pSrc);
4722 
4723     ubyte* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
4724 
4725     if (m_num_components == 1)
4726     {
4727       if (m_image_bpp == 4)
4728         RGBA_to_Y(pDst, Psrc, m_image_x);
4729       else if (m_image_bpp == 3)
4730         RGB_to_Y(pDst, Psrc, m_image_x);
4731       else
4732         memcpy(pDst, Psrc, m_image_x);
4733     }
4734     else
4735     {
4736       if (m_image_bpp == 4)
4737         RGBA_to_YCC(pDst, Psrc, m_image_x);
4738       else if (m_image_bpp == 3)
4739         RGB_to_YCC(pDst, Psrc, m_image_x);
4740       else
4741         Y_to_YCC(pDst, Psrc, m_image_x);
4742     }
4743 
4744     // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
4745     if (m_num_components == 1) {
4746       import core.stdc.string : memset;
4747       memset(m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt, pDst[m_image_bpl_xlt-1], m_image_x_mcu-m_image_x);
4748     } else
4749     {
4750       const ubyte y = pDst[m_image_bpl_xlt-3+0], cb = pDst[m_image_bpl_xlt-3+1], cr = pDst[m_image_bpl_xlt-3+2];
4751       ubyte *q = m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt;
4752       for (int i = m_image_x; i < m_image_x_mcu; i++)
4753       {
4754         *q++ = y; *q++ = cb; *q++ = cr;
4755       }
4756     }
4757 
4758     if (++m_mcu_y_ofs == m_mcu_y)
4759     {
4760       process_mcu_row();
4761       m_mcu_y_ofs = 0;
4762     }
4763   }
4764 
4765   void clear() {
4766     m_mcu_lines[0] = null;
4767     m_pass_num = 0;
4768     m_all_stream_writes_succeeded = true;
4769   }
4770 
4771 
4772 public:
4773   //this () { clear(); }
4774   ~this () { deinit(); }
4775 
4776   @disable this (this); // no copies
4777 
4778   // Initializes the compressor.
4779   // pStream: The stream object to use for writing compressed data.
4780   // comp_params - Compression parameters structure, defined above.
4781   // width, height  - Image dimensions.
4782   // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
4783   // Returns false on out of memory or if a stream write fails.
4784   bool setup() (WriteFunc pStream, int width, int height, int src_channels, const scope auto ref JpegParams comp_params) {
4785     deinit();
4786     if ((pStream is null || width < 1 || height < 1) || (src_channels != 1 && src_channels != 3 && src_channels != 4) || !comp_params.check()) return false;
4787     m_pStream = pStream;
4788     m_params = comp_params;
4789     return jpg_open(width, height, src_channels);
4790   }
4791 
4792   bool setup() (WriteFunc pStream, int width, int height, int src_channels) { return setup(pStream, width, height, src_channels, JpegParams()); }
4793 
4794   @property ref inout(JpegParams) params () return inout pure nothrow @trusted @nogc { pragma(inline, true); return m_params; }
4795 
4796   // Deinitializes the compressor, freeing any allocated memory. May be called at any time.
4797   void deinit () {
4798     jpge_free(m_mcu_lines[0]);
4799     clear();
4800   }
4801 
4802   @property uint total_passes () const pure nothrow @trusted @nogc { pragma(inline, true); return (m_params.twoPass ? 2 : 1); }
4803   @property uint cur_pass () const pure nothrow @trusted @nogc { pragma(inline, true); return m_pass_num; }
4804 
4805   // Call this method with each source scanline.
4806   // width*src_channels bytes per scanline is expected (RGB or Y format).
4807   // You must call with null after all scanlines are processed to finish compression.
4808   // Returns false on out of memory or if a stream write fails.
4809   bool process_scanline (const(void)* pScanline) {
4810     if (m_pass_num < 1 || m_pass_num > 2) return false;
4811     if (m_all_stream_writes_succeeded) {
4812       if (pScanline is null) {
4813         if (!process_end_of_image()) return false;
4814       } else {
4815         load_mcu(pScanline);
4816       }
4817     }
4818     return m_all_stream_writes_succeeded;
4819   }
4820 }