1 // Ogg Vorbis audio decoder - v1.10 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix
33 //
34 // Partial history:
35 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
36 //    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
37 //    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
38 //                           avoid discarding last frame of audio data
39 //    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
40 //                           some more crash fixes when out of memory or with corrupt files
41 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
42 //                           some crash fixes when out of memory or with corrupt files
43 //                           fix some inappropriately signed shifts
44 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
45 //    1.04    - 2014/08/27 - fix missing const-correct case in API
46 //    1.03    - 2014/08/07 - warning fixes
47 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
48 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
49 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
50 //                           (API change) report sample rate for decode-full-file funcs
51 //    0.99996 -            - bracket #include <malloc.h> for macintosh compilation
52 //    0.99995 -            - avoid alias-optimization issue in float-to-int conversion
53 //
54 // See end of file for full version history.
55 // D translation by Ketmar // Invisible Vector
56 module iv.stb.vorbis;
57 
58 import core.stdc.stdio : FILE;
59 
60 version(Windows)
61 	extern(C) int lrintf(float f) { return cast(int) f; }
62 
63 nothrow /*@trusted*/:
64 @nogc { // code block, as c macro helper is not @nogc; yet it's CTFE-only
65 // import it here, as druntime has no `@nogc` on it (for a reason)
66 private extern(C) void qsort (void* base, size_t nmemb, size_t size, int function(in void*, in void*) compar);
67 
68 
69 //////////////////////////////////////////////////////////////////////////////
70 //
71 //  HEADER BEGINS HERE
72 //
73 
74 ///////////   THREAD SAFETY
75 
76 // Individual VorbisDecoder handles are not thread-safe; you cannot decode from
77 // them from multiple threads at the same time. However, you can have multiple
78 // VorbisDecoder handles and decode from them independently in multiple thrads.
79 
80 
81 ///////////   MEMORY ALLOCATION
82 
83 // normally stb_vorbis uses malloc() to allocate memory at startup,
84 // and alloca() to allocate temporary memory during a frame on the
85 // stack. (Memory consumption will depend on the amount of setup
86 // data in the file and how you set the compile flags for speed
87 // vs. size. In my test files the maximal-size usage is ~150KB.)
88 //
89 // You can modify the wrapper functions in the source (setup_malloc,
90 // setup_temp_malloc, temp_malloc) to change this behavior, or you
91 // can use a simpler allocation model: you pass in a buffer from
92 // which stb_vorbis will allocate _all_ its memory (including the
93 // temp memory). "open" may fail with a VORBIS_outofmem if you
94 // do not pass in enough data; there is no way to determine how
95 // much you do need except to succeed (at which point you can
96 // query get_info to find the exact amount required. yes I know
97 // this is lame).
98 //
99 // If you pass in a non-null buffer of the type below, allocation
100 // will occur from it as described above. Otherwise just pass null
101 // to use malloc()/alloca()
102 
103 public struct stb_vorbis_alloc {
104   ubyte* alloc_buffer;
105   int alloc_buffer_length_in_bytes;
106 }
107 
108 
109 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
110 
111 /*
112 public struct stb_vorbis_info {
113   uint sample_rate;
114   int channels;
115 
116   uint setup_memory_required;
117   uint setup_temp_memory_required;
118   uint temp_memory_required;
119 
120   int max_frame_size;
121 }
122 */
123 
124 
125 /* ************************************************************************** *
126 // get general information about the file
127 stb_vorbis_info stb_vorbis_get_info (VorbisDecoder f);
128 
129 // get the last error detected (clears it, too)
130 int stb_vorbis_get_error (VorbisDecoder f);
131 
132 // close an ogg vorbis file and free all memory in use
133 void stb_vorbis_close (VorbisDecoder f);
134 
135 // this function returns the offset (in samples) from the beginning of the
136 // file that will be returned by the next decode, if it is known, or -1
137 // otherwise. after a flush_pushdata() call, this may take a while before
138 // it becomes valid again.
139 // NOT WORKING YET after a seek with PULLDATA API
140 int stb_vorbis_get_sample_offset (VorbisDecoder f);
141 
142 // returns the current seek point within the file, or offset from the beginning
143 // of the memory buffer. In pushdata mode it returns 0.
144 uint stb_vorbis_get_file_offset (VorbisDecoder f);
145 
146 
147 ///////////   PUSHDATA API
148 
149 // this API allows you to get blocks of data from any source and hand
150 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
151 // you how much it used, and you have to give it the rest next time;
152 // and stb_vorbis may not have enough data to work with and you will
153 // need to give it the same data again PLUS more. Note that the Vorbis
154 // specification does not bound the size of an individual frame.
155 
156 // create a vorbis decoder by passing in the initial data block containing
157 //    the ogg&vorbis headers (you don't need to do parse them, just provide
158 //    the first N bytes of the file--you're told if it's not enough, see below)
159 // on success, returns an VorbisDecoder, does not set error, returns the amount of
160 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
161 // on failure, returns null on error and sets *error, does not change *datablock_memory_consumed
162 // if returns null and *error is VORBIS_need_more_data, then the input block was
163 //       incomplete and you need to pass in a larger block from the start of the file
164 VorbisDecoder stb_vorbis_open_pushdata (
165               ubyte* datablock, int datablock_length_in_bytes,
166               int* datablock_memory_consumed_in_bytes,
167               int* error,
168               stb_vorbis_alloc* alloc_buffer
169             );
170 
171 // decode a frame of audio sample data if possible from the passed-in data block
172 //
173 // return value: number of bytes we used from datablock
174 //
175 // possible cases:
176 //     0 bytes used, 0 samples output (need more data)
177 //     N bytes used, 0 samples output (resynching the stream, keep going)
178 //     N bytes used, M samples output (one frame of data)
179 // note that after opening a file, you will ALWAYS get one N-bytes, 0-sample
180 // frame, because Vorbis always "discards" the first frame.
181 //
182 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
183 // instead only datablock_length_in_bytes-3 or less. This is because it wants
184 // to avoid missing parts of a page header if they cross a datablock boundary,
185 // without writing state-machiney code to record a partial detection.
186 //
187 // The number of channels returned are stored in *channels (which can be
188 // null--it is always the same as the number of channels reported by
189 // get_info). *output will contain an array of float* buffers, one per
190 // channel. In other words, (*output)[0][0] contains the first sample from
191 // the first channel, and (*output)[1][0] contains the first sample from
192 // the second channel.
193 int stb_vorbis_decode_frame_pushdata (
194       VorbisDecoder f, ubyte* datablock, int datablock_length_in_bytes,
195       int* channels,   // place to write number of float * buffers
196       float*** output, // place to write float ** array of float * buffers
197       int* samples     // place to write number of output samples
198     );
199 
200 // inform stb_vorbis that your next datablock will not be contiguous with
201 // previous ones (e.g. you've seeked in the data); future attempts to decode
202 // frames will cause stb_vorbis to resynchronize (as noted above), and
203 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
204 // will begin decoding the _next_ frame.
205 //
206 // if you want to seek using pushdata, you need to seek in your file, then
207 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
208 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
209 // if you don't like the result, seek your file again and repeat.
210 void stb_vorbis_flush_pushdata (VorbisDecoder f);
211 
212 
213 //////////   PULLING INPUT API
214 
215 // This API assumes stb_vorbis is allowed to pull data from a source--
216 // either a block of memory containing the _entire_ vorbis stream, or a
217 // FILE* that you or it create, or possibly some other reading mechanism
218 // if you go modify the source to replace the FILE* case with some kind
219 // of callback to your code. (But if you don't support seeking, you may
220 // just want to go ahead and use pushdata.)
221 
222 // decode an entire file and output the data interleaved into a malloc()ed
223 // buffer stored in *output. The return value is the number of samples
224 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
225 // When you're done with it, just free() the pointer returned in *output.
226 int stb_vorbis_decode_filename (const(char)* filename, int* channels, int* sample_rate, short** output);
227 int stb_vorbis_decode_memory (const(ubyte)* mem, int len, int* channels, int* sample_rate, short** output);
228 
229 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
230 // this must be the entire stream!). on failure, returns null and sets *error
231 VorbisDecoder stb_vorbis_open_memory (const(ubyte)* data, int len, int* error, stb_vorbis_alloc* alloc_buffer);
232 
233 // create an ogg vorbis decoder from a filename via fopen(). on failure,
234 // returns null and sets *error (possibly to VORBIS_file_open_failure).
235 VorbisDecoder stb_vorbis_open_filename (const(char)* filename, int* error, stb_vorbis_alloc* alloc_buffer);
236 
237 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
238 // the _current_ seek point (ftell). on failure, returns null and sets *error.
239 // note that stb_vorbis must "own" this stream; if you seek it in between
240 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
241 // perform stb_vorbis_seek_*() operations on this file, it will assume it
242 // owns the _entire_ rest of the file after the start point. Use the next
243 // function, stb_vorbis_open_file_section(), to limit it.
244 VorbisDecoder stb_vorbis_open_file (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer);
245 
246 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
247 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
248 // on failure, returns null and sets *error. note that stb_vorbis must "own"
249 // this stream; if you seek it in between calls to stb_vorbis, it will become
250 // confused.
251 VorbisDecoder stb_vorbis_open_file_section (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer, uint len);
252 
253 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
254 // after calling seek_frame(), the next call to get_frame_*() will include
255 // the specified sample. after calling stb_vorbis_seek(), the next call to
256 // stb_vorbis_get_samples_* will start with the specified sample. If you
257 // do not need to seek to EXACTLY the target sample when using get_samples_*,
258 // you can also use seek_frame().
259 int stb_vorbis_seek_frame (VorbisDecoder f, uint sample_number);
260 int stb_vorbis_seek (VorbisDecoder f, uint sample_number);
261 
262 // this function is equivalent to stb_vorbis_seek(f, 0)
263 int stb_vorbis_seek_start (VorbisDecoder f);
264 
265 // these functions return the total length of the vorbis stream
266 uint stb_vorbis_stream_length_in_samples (VorbisDecoder f);
267 float stb_vorbis_stream_length_in_seconds (VorbisDecoder f);
268 
269 // decode the next frame and return the number of samples. the number of
270 // channels returned are stored in *channels (which can be null--it is always
271 // the same as the number of channels reported by get_info). *output will
272 // contain an array of float* buffers, one per channel. These outputs will
273 // be overwritten on the next call to stb_vorbis_get_frame_*.
274 //
275 // You generally should not intermix calls to stb_vorbis_get_frame_*()
276 // and stb_vorbis_get_samples_*(), since the latter calls the former.
277 int stb_vorbis_get_frame_float (VorbisDecoder f, int* channels, float*** output);
278 
279 // decode the next frame and return the number of *samples* per channel.
280 // Note that for interleaved data, you pass in the number of shorts (the
281 // size of your array), but the return value is the number of samples per
282 // channel, not the total number of samples.
283 //
284 // The data is coerced to the number of channels you request according to the
285 // channel coercion rules (see below). You must pass in the size of your
286 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
287 // The maximum buffer size needed can be gotten from get_info(); however,
288 // the Vorbis I specification implies an absolute maximum of 4096 samples
289 // per channel.
290 int stb_vorbis_get_frame_short_interleaved (VorbisDecoder f, int num_c, short* buffer, int num_shorts);
291 int stb_vorbis_get_frame_short (VorbisDecoder f, int num_c, short** buffer, int num_samples);
292 
293 // Channel coercion rules:
294 //    Let M be the number of channels requested, and N the number of channels present,
295 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
296 //    and stereo R be the sum of all R and center channels (channel assignment from the
297 //    vorbis spec).
298 //        M    N       output
299 //        1    k      sum(Ck) for all k
300 //        2    *      stereo L, stereo R
301 //        k    l      k > l, the first l channels, then 0s
302 //        k    l      k <= l, the first k channels
303 //    Note that this is not _good_ surround etc. mixing at all! It's just so
304 //    you get something useful.
305 
306 // gets num_samples samples, not necessarily on a frame boundary--this requires
307 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
308 // Returns the number of samples stored per channel; it may be less than requested
309 // at the end of the file. If there are no more samples in the file, returns 0.
310 int stb_vorbis_get_samples_float_interleaved (VorbisDecoder f, int channels, float* buffer, int num_floats);
311 int stb_vorbis_get_samples_float (VorbisDecoder f, int channels, float** buffer, int num_samples);
312 
313 // gets num_samples samples, not necessarily on a frame boundary--this requires
314 // buffering so you have to supply the buffers. Applies the coercion rules above
315 // to produce 'channels' channels. Returns the number of samples stored per channel;
316 // it may be less than requested at the end of the file. If there are no more
317 // samples in the file, returns 0.
318 int stb_vorbis_get_samples_short_interleaved (VorbisDecoder f, int channels, short* buffer, int num_shorts);
319 int stb_vorbis_get_samples_short (VorbisDecoder f, int channels, short** buffer, int num_samples);
320 */
321 
322 ////////   ERROR CODES
323 
324 public enum STBVorbisError {
325   no_error,
326 
327   need_more_data = 1,    // not a real error
328 
329   invalid_api_mixing,    // can't mix API modes
330   outofmem,              // not enough memory
331   feature_not_supported, // uses floor 0
332   too_many_channels,     // STB_VORBIS_MAX_CHANNELS is too small
333   file_open_failure,     // fopen() failed
334   seek_without_length,   // can't seek in unknown-length file
335 
336   unexpected_eof = 10,   // file is truncated?
337   seek_invalid,          // seek past EOF
338 
339   // decoding errors (corrupt/invalid stream) -- you probably
340   // don't care about the exact details of these
341 
342   // vorbis errors:
343   invalid_setup = 20,
344   invalid_stream,
345 
346   // ogg errors:
347   missing_capture_pattern = 30,
348   invalid_stream_structure_version,
349   continued_packet_flag_invalid,
350   incorrect_stream_serial_number,
351   invalid_first_page,
352   bad_packet_type,
353   cant_find_last_page,
354   seek_failed,
355 }
356 //
357 //  HEADER ENDS HERE
358 //
359 //////////////////////////////////////////////////////////////////////////////
360 
361 
362 // global configuration settings (e.g. set these in the project/makefile),
363 // or just set them in this file at the top (although ideally the first few
364 // should be visible when the header file is compiled too, although it's not
365 // crucial)
366 
367 // STB_VORBIS_NO_INTEGER_CONVERSION
368 //     does not compile the code for converting audio sample data from
369 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
370 //version = STB_VORBIS_NO_INTEGER_CONVERSION;
371 
372 // STB_VORBIS_NO_FAST_SCALED_FLOAT
373 //      does not use a fast float-to-int trick to accelerate float-to-int on
374 //      most platforms which requires endianness be defined correctly.
375 //version = STB_VORBIS_NO_FAST_SCALED_FLOAT;
376 
377 // STB_VORBIS_MAX_CHANNELS [number]
378 //     globally define this to the maximum number of channels you need.
379 //     The spec does not put a restriction on channels except that
380 //     the count is stored in a byte, so 255 is the hard limit.
381 //     Reducing this saves about 16 bytes per value, so using 16 saves
382 //     (255-16)*16 or around 4KB. Plus anything other memory usage
383 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
384 //     6 (5.1 audio), or 2 (stereo only).
385 enum STB_VORBIS_MAX_CHANNELS = 16; // enough for anyone?
386 
387 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
388 //     after a flush_pushdata(), stb_vorbis begins scanning for the
389 //     next valid page, without backtracking. when it finds something
390 //     that looks like a page, it streams through it and verifies its
391 //     CRC32. Should that validation fail, it keeps scanning. But it's
392 //     possible that _while_ streaming through to check the CRC32 of
393 //     one candidate page, it sees another candidate page. This #define
394 //     determines how many "overlapping" candidate pages it can search
395 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
396 //     garbage pages could be as big as 64KB, but probably average ~16KB.
397 //     So don't hose ourselves by scanning an apparent 64KB page and
398 //     missing a ton of real ones in the interim; so minimum of 2
399 enum STB_VORBIS_PUSHDATA_CRC_COUNT = 4;
400 
401 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
402 //     sets the log size of the huffman-acceleration table.  Maximum
403 //     supported value is 24. with larger numbers, more decodings are O(1),
404 //     but the table size is larger so worse cache missing, so you'll have
405 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
406 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
407 
408 // STB_VORBIS_FAST_BINARY_LENGTH [number]
409 //     sets the log size of the binary-search acceleration table. this
410 //     is used in similar fashion to the fast-huffman size to set initial
411 //     parameters for the binary search
412 
413 // STB_VORBIS_FAST_HUFFMAN_INT
414 //     The fast huffman tables are much more efficient if they can be
415 //     stored as 16-bit results instead of 32-bit results. This restricts
416 //     the codebooks to having only 65535 possible outcomes, though.
417 //     (At least, accelerated by the huffman table.)
418 //version = STB_VORBIS_FAST_HUFFMAN_INT;
419 version(STB_VORBIS_FAST_HUFFMAN_INT) {} else version = STB_VORBIS_FAST_HUFFMAN_SHORT;
420 
421 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
422 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
423 //     back on binary searching for the correct one. This requires storing
424 //     extra tables with the huffman codes in sorted order. Defining this
425 //     symbol trades off space for speed by forcing a linear search in the
426 //     non-fast case, except for "sparse" codebooks.
427 //version = STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH;
428 
429 // STB_VORBIS_DIVIDES_IN_RESIDUE
430 //     stb_vorbis precomputes the result of the scalar residue decoding
431 //     that would otherwise require a divide per chunk. you can trade off
432 //     space for time by defining this symbol.
433 //version = STB_VORBIS_DIVIDES_IN_RESIDUE;
434 
435 // STB_VORBIS_DIVIDES_IN_CODEBOOK
436 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
437 //     stored, or with all elements being chosen from a small range of values,
438 //     and all values possible in all elements. By default, stb_vorbis expands
439 //     this latter kind out to look like the former kind for ease of decoding,
440 //     because otherwise an integer divide-per-vector-element is required to
441 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
442 //     trade off storage for speed.
443 //version = STB_VORBIS_DIVIDES_IN_CODEBOOK;
444 
445 version(STB_VORBIS_CODEBOOK_SHORTS) static assert(0, "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats");
446 
447 // STB_VORBIS_DIVIDE_TABLE
448 //     this replaces small integer divides in the floor decode loop with
449 //     table lookups. made less than 1% difference, so disabled by default.
450 //version = STB_VORBIS_DIVIDE_TABLE;
451 
452 // STB_VORBIS_NO_DEFER_FLOOR
453 //     Normally we only decode the floor without synthesizing the actual
454 //     full curve. We can instead synthesize the curve immediately. This
455 //     requires more memory and is very likely slower, so I don't think
456 //     you'd ever want to do it except for debugging.
457 //version = STB_VORBIS_NO_DEFER_FLOOR;
458 //version(STB_VORBIS_CODEBOOK_FLOATS) static assert(0);
459 
460 
461 // ////////////////////////////////////////////////////////////////////////// //
462 private:
463 static assert(STB_VORBIS_MAX_CHANNELS <= 256, "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range");
464 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24, "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range");
465 
466 enum MAX_BLOCKSIZE_LOG = 13; // from specification
467 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
468 
469 
470 alias codetype = float;
471 
472 // @NOTE
473 //
474 // Some arrays below are tagged "//varies", which means it's actually
475 // a variable-sized piece of data, but rather than malloc I assume it's
476 // small enough it's better to just allocate it all together with the
477 // main thing
478 //
479 // Most of the variables are specified with the smallest size I could pack
480 // them into. It might give better performance to make them all full-sized
481 // integers. It should be safe to freely rearrange the structures or change
482 // the sizes larger--nothing relies on silently truncating etc., nor the
483 // order of variables.
484 
485 enum FAST_HUFFMAN_TABLE_SIZE = (1<<STB_VORBIS_FAST_HUFFMAN_LENGTH);
486 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE-1);
487 
488 struct Codebook {
489   int dimensions, entries;
490   ubyte* codeword_lengths;
491   float minimum_value;
492   float delta_value;
493   ubyte value_bits;
494   ubyte lookup_type;
495   ubyte sequence_p;
496   ubyte sparse;
497   uint lookup_values;
498   codetype* multiplicands;
499   uint *codewords;
500   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
501     short[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
502   } else {
503     int[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
504   }
505   uint* sorted_codewords;
506   int* sorted_values;
507   int sorted_entries;
508 }
509 
510 struct Floor0 {
511   ubyte order;
512   ushort rate;
513   ushort bark_map_size;
514   ubyte amplitude_bits;
515   ubyte amplitude_offset;
516   ubyte number_of_books;
517   ubyte[16] book_list; // varies
518 }
519 
520 struct Floor1 {
521   ubyte partitions;
522   ubyte[32] partition_class_list; // varies
523   ubyte[16] class_dimensions; // varies
524   ubyte[16] class_subclasses; // varies
525   ubyte[16] class_masterbooks; // varies
526   short[8][16] subclass_books; // varies
527   ushort[31*8+2] Xlist; // varies
528   ubyte[31*8+2] sorted_order;
529   ubyte[2][31*8+2] neighbors;
530   ubyte floor1_multiplier;
531   ubyte rangebits;
532   int values;
533 }
534 
535 union Floor {
536   Floor0 floor0;
537   Floor1 floor1;
538 }
539 
540 struct Residue {
541   uint begin, end;
542   uint part_size;
543   ubyte classifications;
544   ubyte classbook;
545   ubyte** classdata;
546   //int16 (*residue_books)[8];
547   short[8]* residue_books;
548 }
549 
550 struct MappingChannel {
551   ubyte magnitude;
552   ubyte angle;
553   ubyte mux;
554 }
555 
556 struct Mapping {
557   ushort coupling_steps;
558   MappingChannel* chan;
559   ubyte submaps;
560   ubyte[15] submap_floor; // varies
561   ubyte[15] submap_residue; // varies
562 }
563 
564 struct Mode {
565   ubyte blockflag;
566   ubyte mapping;
567   ushort windowtype;
568   ushort transformtype;
569 }
570 
571 struct CRCscan {
572   uint goal_crc;   // expected crc if match
573   int bytes_left;  // bytes left in packet
574   uint crc_so_far; // running crc
575   int bytes_done;  // bytes processed in _current_ chunk
576   uint sample_loc; // granule pos encoded in page
577 }
578 
579 struct ProbedPage {
580   uint page_start, page_end;
581   uint last_decoded_sample;
582 }
583 
584 private int error (VorbisDecoder f, STBVorbisError e) {
585   f.error = e;
586   if (!f.eof && e != STBVorbisError.need_more_data) {
587     f.error = e; // breakpoint for debugging
588   }
589   return 0;
590 }
591 
592 // these functions are used for allocating temporary memory
593 // while decoding. if you can afford the stack space, use
594 // alloca(); otherwise, provide a temp buffer and it will
595 // allocate out of those.
596 uint temp_alloc_save (VorbisDecoder f) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); return f.alloc.tempSave(f); }
597 void temp_alloc_restore (VorbisDecoder f, uint p) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); f.alloc.tempRestore(p, f); }
598 void temp_free (VorbisDecoder f, void* p) nothrow @nogc {}
599 /*
600 T* temp_alloc(T) (VorbisDecoder f, uint count) nothrow @nogc {
601   auto res = f.alloc.alloc(count*T.sizeof, f);
602   return cast(T*)res;
603 }
604 */
605 
606 /+
607 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
608 
609 // has to be a mixin, due to `alloca`
610 template temp_alloc(string size) {
611   enum temp_alloc = q{(f.alloc.alloc_buffer ? setup_temp_malloc(f, (${size})) : alloca(${size}))}.cmacroFixVars!("size")(size);
612 }
613 
614 // has to be a mixin, due to `alloca`
615 template temp_block_array(string count, string size) {
616   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
617     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
618 }
619 +/
620 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
621 
622 template temp_alloc(string size) {
623   enum temp_alloc = q{alloca(${size})}.cmacroFixVars!("size")(size);
624 }
625 
626 template temp_block_array(string count, string size) {
627   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
628     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
629 }
630 
631 /*
632 T** temp_block_array(T) (VorbisDecoder f, uint count, uint size) {
633   size *= T.sizeof;
634   auto mem = f.alloc.alloc(count*(void*).sizeof+size, f);
635   if (mem !is null) make_block_array(mem, count, size);
636   return cast(T**)mem;
637 }
638 */
639 
640 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
641 private void* make_block_array (void* mem, int count, int size) {
642   void** p = cast(void**)mem;
643   char* q = cast(char*)(p+count);
644   foreach (immutable i; 0..count) {
645     p[i] = q;
646     q += size;
647   }
648   return p;
649 }
650 
651 private T* setup_malloc(T) (VorbisDecoder f, uint sz) {
652   sz *= T.sizeof;
653   /*
654   f.setup_memory_required += sz;
655   if (f.alloc.alloc_buffer) {
656     void* p = cast(char*)f.alloc.alloc_buffer+f.setup_offset;
657     if (f.setup_offset+sz > f.temp_offset) return null;
658     f.setup_offset += sz;
659     return cast(T*)p;
660   }
661   */
662   auto res = f.alloc.alloc(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
663   if (res !is null) {
664     import core.stdc..string : memset;
665     memset(res, 0, sz+8);
666   }
667   return cast(T*)res;
668 }
669 
670 private void setup_free (VorbisDecoder f, void* p) {
671   //if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
672   if (p !is null) f.alloc.free(p, f);
673 }
674 
675 private void* setup_temp_malloc (VorbisDecoder f, uint sz) {
676   auto res = f.alloc.allocTemp(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
677   if (res !is null) {
678     import core.stdc..string : memset;
679     memset(res, 0, sz+8);
680   }
681   return res;
682 }
683 
684 private void setup_temp_free (VorbisDecoder f, void* p, uint sz) {
685   if (p !is null) f.alloc.freeTemp(p, (sz ? sz : 1)+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
686 }
687 
688 immutable uint[256] crc_table;
689 shared static this () {
690   enum CRC32_POLY = 0x04c11db7; // from spec
691   // init crc32 table
692   foreach (uint i; 0..256) {
693     uint s = i<<24;
694     foreach (immutable _; 0..8) s = (s<<1)^(s >= (1U<<31) ? CRC32_POLY : 0);
695     crc_table[i] = s;
696   }
697 }
698 
699 uint crc32_update (uint crc, ubyte b) {
700   static if (__VERSION__ > 2067) pragma(inline, true);
701   return (crc<<8)^crc_table[b^(crc>>24)];
702 }
703 
704 // used in setup, and for huffman that doesn't go fast path
705 private uint bit_reverse (uint n) {
706   static if (__VERSION__ > 2067) pragma(inline, true);
707   n = ((n&0xAAAAAAAA)>>1)|((n&0x55555555)<<1);
708   n = ((n&0xCCCCCCCC)>>2)|((n&0x33333333)<<2);
709   n = ((n&0xF0F0F0F0)>>4)|((n&0x0F0F0F0F)<<4);
710   n = ((n&0xFF00FF00)>>8)|((n&0x00FF00FF)<<8);
711   return (n>>16)|(n<<16);
712 }
713 
714 private float square (float x) {
715   static if (__VERSION__ > 2067) pragma(inline, true);
716   return x*x;
717 }
718 
719 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
720 // as required by the specification. fast(?) implementation from stb.h
721 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
722 immutable byte[16] log2_4 = [0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4];
723 private int ilog (int n) {
724   //static if (__VERSION__ > 2067) pragma(inline, true);
725   if (n < 0) return 0; // signed n returns 0
726   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
727   if (n < (1<<14)) {
728     if (n < (1<<4)) return 0+log2_4[n];
729     if (n < (1<<9)) return 5+log2_4[n>>5];
730     return 10+log2_4[n>>10];
731   } else if (n < (1<<24)) {
732     if (n < (1<<19)) return 15+log2_4[n>>15];
733     return 20+log2_4[n>>20];
734   } else {
735     if (n < (1<<29)) return 25+log2_4[n>>25];
736     return 30+log2_4[n>>30];
737   }
738 }
739 
740 
741 // code length assigned to a value with no huffman encoding
742 enum NO_CODE = 255;
743 
744 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
745 //
746 // these functions are only called at setup, and only a few times per file
747 private float float32_unpack (uint x) {
748   import core.math : ldexp;
749   //static if (__VERSION__ > 2067) pragma(inline, true);
750   // from the specification
751   uint mantissa = x&0x1fffff;
752   uint sign = x&0x80000000;
753   uint exp = (x&0x7fe00000)>>21;
754   double res = (sign ? -cast(double)mantissa : cast(double)mantissa);
755   return cast(float)ldexp(cast(float)res, exp-788);
756 }
757 
758 // zlib & jpeg huffman tables assume that the output symbols
759 // can either be arbitrarily arranged, or have monotonically
760 // increasing frequencies--they rely on the lengths being sorted;
761 // this makes for a very simple generation algorithm.
762 // vorbis allows a huffman table with non-sorted lengths. This
763 // requires a more sophisticated construction, since symbols in
764 // order do not map to huffman codes "in order".
765 private void add_entry (Codebook* c, uint huff_code, int symbol, int count, ubyte len, uint* values) {
766   if (!c.sparse) {
767     c.codewords[symbol] = huff_code;
768   } else {
769     c.codewords[count] = huff_code;
770     c.codeword_lengths[count] = len;
771     values[count] = symbol;
772   }
773 }
774 
775 private int compute_codewords (Codebook* c, ubyte* len, int n, uint* values) {
776   import core.stdc..string : memset;
777 
778   int i, k, m = 0;
779   uint[32] available;
780 
781   memset(available.ptr, 0, available.sizeof);
782   // find the first entry
783   for (k = 0; k < n; ++k) if (len[k] < NO_CODE) break;
784   if (k == n) { assert(c.sorted_entries == 0); return true; }
785   // add to the list
786   add_entry(c, 0, k, m++, len[k], values);
787   // add all available leaves
788   for (i = 1; i <= len[k]; ++i) available[i] = 1U<<(32-i);
789   // note that the above code treats the first case specially,
790   // but it's really the same as the following code, so they
791   // could probably be combined (except the initial code is 0,
792   // and I use 0 in available[] to mean 'empty')
793   for (i = k+1; i < n; ++i) {
794     uint res;
795     int z = len[i];
796     if (z == NO_CODE) continue;
797     // find lowest available leaf (should always be earliest,
798     // which is what the specification calls for)
799     // note that this property, and the fact we can never have
800     // more than one free leaf at a given level, isn't totally
801     // trivial to prove, but it seems true and the assert never
802     // fires, so!
803     while (z > 0 && !available[z]) --z;
804     if (z == 0) return false;
805     res = available[z];
806     assert(z >= 0 && z < 32);
807     available[z] = 0;
808     ubyte xxx = len[i];
809     add_entry(c,
810       bit_reverse(res),
811       i,
812       m++,
813       xxx, // dmd bug: it reads 4 bytes without temp
814       values);
815     // propogate availability up the tree
816     if (z != len[i]) {
817       assert(len[i] >= 0 && len[i] < 32);
818       for (int y = len[i]; y > z; --y) {
819         assert(available[y] == 0);
820         available[y] = res+(1<<(32-y));
821       }
822     }
823   }
824   return true;
825 }
826 
827 // accelerated huffman table allows fast O(1) match of all symbols
828 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
829 private void compute_accelerated_huffman (Codebook* c) {
830   //for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) c.fast_huffman.ptr[i] = -1;
831   c.fast_huffman.ptr[0..FAST_HUFFMAN_TABLE_SIZE] = -1;
832   auto len = (c.sparse ? c.sorted_entries : c.entries);
833   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
834     if (len > 32767) len = 32767; // largest possible value we can encode!
835   }
836   foreach (uint i; 0..len) {
837     if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
838       uint z = (c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]);
839       // set table entries for all bit combinations in the higher bits
840       while (z < FAST_HUFFMAN_TABLE_SIZE) {
841         c.fast_huffman.ptr[z] = cast(typeof(c.fast_huffman[0]))i; //k8
842         z += 1<<c.codeword_lengths[i];
843       }
844     }
845   }
846 }
847 
848 extern(C) int uint32_compare (const void* p, const void* q) {
849   uint x = *cast(uint*)p;
850   uint y = *cast(uint*)q;
851   return (x < y ? -1 : x > y);
852 }
853 
854 private int include_in_sort (Codebook* c, uint len) {
855   if (c.sparse) { assert(len != NO_CODE); return true; }
856   if (len == NO_CODE) return false;
857   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return true;
858   return false;
859 }
860 
861 // if the fast table above doesn't work, we want to binary
862 // search them... need to reverse the bits
863 private void compute_sorted_huffman (Codebook* c, ubyte* lengths, uint* values) {
864   // build a list of all the entries
865   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
866   // this is kind of a frivolous optimization--I don't see any performance improvement,
867   // but it's like 4 extra lines of code, so.
868   if (!c.sparse) {
869     int k = 0;
870     foreach (uint i; 0..c.entries) if (include_in_sort(c, lengths[i])) c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
871     assert(k == c.sorted_entries);
872   } else {
873     foreach (uint i; 0..c.sorted_entries) c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
874   }
875 
876   qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
877   c.sorted_codewords[c.sorted_entries] = 0xffffffff;
878 
879   auto len = (c.sparse ? c.sorted_entries : c.entries);
880   // now we need to indicate how they correspond; we could either
881   //   #1: sort a different data structure that says who they correspond to
882   //   #2: for each sorted entry, search the original list to find who corresponds
883   //   #3: for each original entry, find the sorted entry
884   // #1 requires extra storage, #2 is slow, #3 can use binary search!
885   foreach (uint i; 0..len) {
886     auto huff_len = (c.sparse ? lengths[values[i]] : lengths[i]);
887     if (include_in_sort(c, huff_len)) {
888       uint code = bit_reverse(c.codewords[i]);
889       int x = 0, n = c.sorted_entries;
890       while (n > 1) {
891         // invariant: sc[x] <= code < sc[x+n]
892         int m = x+(n>>1);
893         if (c.sorted_codewords[m] <= code) {
894           x = m;
895           n -= (n>>1);
896         } else {
897           n >>= 1;
898         }
899       }
900       assert(c.sorted_codewords[x] == code);
901       if (c.sparse) {
902         c.sorted_values[x] = values[i];
903         c.codeword_lengths[x] = huff_len;
904       } else {
905         c.sorted_values[x] = i;
906       }
907     }
908   }
909 }
910 
911 // only run while parsing the header (3 times)
912 private int vorbis_validate (const(void)* data) {
913   static if (__VERSION__ > 2067) pragma(inline, true);
914   immutable char[6] vorbis = "vorbis";
915   return ((cast(char*)data)[0..6] == vorbis[]);
916 }
917 
918 // called from setup only, once per code book
919 // (formula implied by specification)
920 private int lookup1_values (int entries, int dim) {
921   import core.stdc.math : lrintf;
922   import std.math : floor, exp, pow, log;
923   int r = cast(int)lrintf(floor(exp(cast(float)log(cast(float)entries)/dim)));
924   if (lrintf(floor(pow(cast(float)r+1, dim))) <= entries) ++r; // (int) cast for MinGW warning; floor() to avoid _ftol() when non-CRT
925   assert(pow(cast(float)r+1, dim) > entries);
926   assert(lrintf(floor(pow(cast(float)r, dim))) <= entries); // (int), floor() as above
927   return r;
928 }
929 
930 // called twice per file
931 private void compute_twiddle_factors (int n, float* A, float* B, float* C) {
932   import std.math : cos, sin, PI;
933   int n4 = n>>2, n8 = n>>3;
934   int k, k2;
935   for (k = k2 = 0; k < n4; ++k, k2 += 2) {
936     A[k2  ] = cast(float) cos(4*k*PI/n);
937     A[k2+1] = cast(float)-sin(4*k*PI/n);
938     B[k2  ] = cast(float) cos((k2+1)*PI/n/2)*0.5f;
939     B[k2+1] = cast(float) sin((k2+1)*PI/n/2)*0.5f;
940   }
941   for (k = k2 = 0; k < n8; ++k, k2 += 2) {
942     C[k2  ] = cast(float) cos(2*(k2+1)*PI/n);
943     C[k2+1] = cast(float)-sin(2*(k2+1)*PI/n);
944   }
945 }
946 
947 private void compute_window (int n, float* window) {
948   import std.math : sin, PI;
949   int n2 = n>>1;
950   foreach (int i; 0..n2) *window++ = cast(float)sin(0.5*PI*square(cast(float)sin((i-0+0.5)/n2*0.5*PI)));
951 }
952 
953 private void compute_bitreverse (int n, ushort* rev) {
954   int ld = ilog(n)-1; // ilog is off-by-one from normal definitions
955   int n8 = n>>3;
956   foreach (int i; 0..n8) *rev++ = cast(ushort)((bit_reverse(i)>>(32-ld+3))<<2); //k8
957 }
958 
959 private int init_blocksize (VorbisDecoder f, int b, int n) {
960   int n2 = n>>1, n4 = n>>2, n8 = n>>3;
961   f.A[b] = setup_malloc!float(f, n2);
962   f.B[b] = setup_malloc!float(f, n2);
963   f.C[b] = setup_malloc!float(f, n4);
964   if (f.A[b] is null || f.B[b] is null || f.C[b] is null) return error(f, STBVorbisError.outofmem);
965   compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
966   f.window[b] = setup_malloc!float(f, n2);
967   if (f.window[b] is null) return error(f, STBVorbisError.outofmem);
968   compute_window(n, f.window[b]);
969   f.bit_reverse[b] = setup_malloc!ushort(f, n8);
970   if (f.bit_reverse[b] is null) return error(f, STBVorbisError.outofmem);
971   compute_bitreverse(n, f.bit_reverse[b]);
972   return true;
973 }
974 
975 private void neighbors (ushort* x, int n, ushort* plow, ushort* phigh) {
976   int low = -1;
977   int high = 65536;
978   assert(n >= 0 && n <= ushort.max);
979   foreach (ushort i; 0..cast(ushort)n) {
980     if (x[i] > low  && x[i] < x[n]) { *plow = i; low = x[i]; }
981     if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
982   }
983 }
984 
985 // this has been repurposed so y is now the original index instead of y
986 struct Point {
987   ushort x, y;
988 }
989 
990 extern(C) int point_compare (const void *p, const void *q) {
991   auto a = cast(const(Point)*)p;
992   auto b = cast(const(Point)*)q;
993   return (a.x < b.x ? -1 : a.x > b.x);
994 }
995 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
996 
997 // ///////////////////////////////////////////////////////////////////// //
998 private ubyte get8 (VorbisDecoder f) {
999   ubyte b = void;
1000   if (!f.eof) {
1001     if (f.rawRead((&b)[0..1]) != 1) { f.eof = true; b = 0; }
1002   }
1003   return b;
1004 }
1005 
1006 private uint get32 (VorbisDecoder f) {
1007   uint x = 0;
1008   if (!f.eof) {
1009     version(LittleEndian) {
1010       if (f.rawRead((&x)[0..1]) != x.sizeof) { f.eof = true; x = 0; }
1011     } else {
1012       x = get8(f);
1013       x |= cast(uint)get8(f)<<8;
1014       x |= cast(uint)get8(f)<<16;
1015       x |= cast(uint)get8(f)<<24;
1016     }
1017   }
1018   return x;
1019 }
1020 
1021 private bool getn (VorbisDecoder f, void* data, int n) {
1022   if (f.eof || n < 0) return false;
1023   if (n == 0) return true;
1024   if (f.rawRead(data[0..n]) != n) { f.eof = true; return false; }
1025   return true;
1026 }
1027 
1028 private void skip (VorbisDecoder f, int n) {
1029   if (f.eof || n <= 0) return;
1030   f.rawSkip(n);
1031 }
1032 
1033 private void set_file_offset (VorbisDecoder f, uint loc) {
1034   /+if (f.push_mode) return;+/
1035   f.eof = false;
1036   if (loc >= 0x80000000) { f.eof = true; return; }
1037   f.rawSeek(loc);
1038 }
1039 
1040 
1041 immutable char[4] ogg_page_header = "OggS"; //[ 0x4f, 0x67, 0x67, 0x53 ];
1042 
1043 private bool capture_pattern (VorbisDecoder f) {
1044   static if (__VERSION__ > 2067) pragma(inline, true);
1045   char[4] sign = void;
1046   if (!getn(f, sign.ptr, 4)) return false;
1047   return (sign == "OggS");
1048 }
1049 
1050 enum PAGEFLAG_continued_packet = 1;
1051 enum PAGEFLAG_first_page = 2;
1052 enum PAGEFLAG_last_page = 4;
1053 
1054 private int start_page_no_capturepattern (VorbisDecoder f) {
1055   uint loc0, loc1, n;
1056   // stream structure version
1057   if (get8(f) != 0) return error(f, STBVorbisError.invalid_stream_structure_version);
1058   // header flag
1059   f.page_flag = get8(f);
1060   // absolute granule position
1061   loc0 = get32(f);
1062   loc1 = get32(f);
1063   // @TODO: validate loc0, loc1 as valid positions?
1064   // stream serial number -- vorbis doesn't interleave, so discard
1065   get32(f);
1066   //if (f.serial != get32(f)) return error(f, STBVorbisError.incorrect_stream_serial_number);
1067   // page sequence number
1068   n = get32(f);
1069   f.last_page = n;
1070   // CRC32
1071   get32(f);
1072   // page_segments
1073   f.segment_count = get8(f);
1074   if (!getn(f, f.segments.ptr, f.segment_count)) return error(f, STBVorbisError.unexpected_eof);
1075   // assume we _don't_ know any the sample position of any segments
1076   f.end_seg_with_known_loc = -2;
1077   if (loc0 != ~0U || loc1 != ~0U) {
1078     int i;
1079     // determine which packet is the last one that will complete
1080     for (i = f.segment_count-1; i >= 0; --i) if (f.segments.ptr[i] < 255) break;
1081     // 'i' is now the index of the _last_ segment of a packet that ends
1082     if (i >= 0) {
1083       f.end_seg_with_known_loc = i;
1084       f.known_loc_for_packet = loc0;
1085     }
1086   }
1087   if (f.first_decode) {
1088     int len;
1089     ProbedPage p;
1090     len = 0;
1091     foreach (int i; 0..f.segment_count) len += f.segments.ptr[i];
1092     len += 27+f.segment_count;
1093     p.page_start = f.first_audio_page_offset;
1094     p.page_end = p.page_start+len;
1095     p.last_decoded_sample = loc0;
1096     f.p_first = p;
1097   }
1098   f.next_seg = 0;
1099   return true;
1100 }
1101 
1102 private int start_page (VorbisDecoder f) {
1103   if (!capture_pattern(f)) return error(f, STBVorbisError.missing_capture_pattern);
1104   return start_page_no_capturepattern(f);
1105 }
1106 
1107 private int start_packet (VorbisDecoder f) {
1108   while (f.next_seg == -1) {
1109     if (!start_page(f)) return false;
1110     if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.continued_packet_flag_invalid);
1111   }
1112   f.last_seg = false;
1113   f.valid_bits = 0;
1114   f.packet_bytes = 0;
1115   f.bytes_in_seg = 0;
1116   // f.next_seg is now valid
1117   return true;
1118 }
1119 
1120 private int maybe_start_packet (VorbisDecoder f) {
1121   if (f.next_seg == -1) {
1122     auto x = get8(f);
1123     if (f.eof) return false; // EOF at page boundary is not an error!
1124     if (0x4f != x      ) return error(f, STBVorbisError.missing_capture_pattern);
1125     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1126     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1127     if (0x53 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1128     if (!start_page_no_capturepattern(f)) return false;
1129     if (f.page_flag&PAGEFLAG_continued_packet) {
1130       // set up enough state that we can read this packet if we want,
1131       // e.g. during recovery
1132       f.last_seg = false;
1133       f.bytes_in_seg = 0;
1134       return error(f, STBVorbisError.continued_packet_flag_invalid);
1135     }
1136   }
1137   return start_packet(f);
1138 }
1139 
1140 private int next_segment (VorbisDecoder f) {
1141   if (f.last_seg) return 0;
1142   if (f.next_seg == -1) {
1143     f.last_seg_which = f.segment_count-1; // in case start_page fails
1144     if (!start_page(f)) { f.last_seg = 1; return 0; }
1145     if (!(f.page_flag&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.continued_packet_flag_invalid);
1146   }
1147   auto len = f.segments.ptr[f.next_seg++];
1148   if (len < 255) {
1149     f.last_seg = true;
1150     f.last_seg_which = f.next_seg-1;
1151   }
1152   if (f.next_seg >= f.segment_count) f.next_seg = -1;
1153   debug(stb_vorbis) assert(f.bytes_in_seg == 0);
1154   f.bytes_in_seg = len;
1155   return len;
1156 }
1157 
1158 enum EOP = (-1);
1159 enum INVALID_BITS = (-1);
1160 
1161 private int get8_packet_raw (VorbisDecoder f) {
1162   if (!f.bytes_in_seg) {  // CLANG!
1163     if (f.last_seg) return EOP;
1164     else if (!next_segment(f)) return EOP;
1165   }
1166   debug(stb_vorbis) assert(f.bytes_in_seg > 0);
1167   --f.bytes_in_seg;
1168   ++f.packet_bytes;
1169   return get8(f);
1170 }
1171 
1172 private int get8_packet (VorbisDecoder f) {
1173   int x = get8_packet_raw(f);
1174   f.valid_bits = 0;
1175   return x;
1176 }
1177 
1178 private uint get32_packet (VorbisDecoder f) {
1179   uint x = get8_packet(f), b;
1180   if (x == EOP) return EOP;
1181   if ((b = get8_packet(f)) == EOP) return EOP;
1182   x += b<<8;
1183   if ((b = get8_packet(f)) == EOP) return EOP;
1184   x += b<<16;
1185   if ((b = get8_packet(f)) == EOP) return EOP;
1186   x += b<<24;
1187   return x;
1188 }
1189 
1190 private void flush_packet (VorbisDecoder f) {
1191   while (get8_packet_raw(f) != EOP) {}
1192 }
1193 
1194 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1195 // as the huffman decoder?
1196 private uint get_bits_main (VorbisDecoder f, int n) {
1197   uint z;
1198   if (f.valid_bits < 0) return 0;
1199   if (f.valid_bits < n) {
1200     if (n > 24) {
1201       // the accumulator technique below would not work correctly in this case
1202       z = get_bits_main(f, 24);
1203       z += get_bits_main(f, n-24)<<24;
1204       return z;
1205     }
1206     if (f.valid_bits == 0) f.acc = 0;
1207     while (f.valid_bits < n) {
1208       z = get8_packet_raw(f);
1209       if (z == EOP) {
1210         f.valid_bits = INVALID_BITS;
1211         return 0;
1212       }
1213       f.acc += z<<f.valid_bits;
1214       f.valid_bits += 8;
1215     }
1216   }
1217   if (f.valid_bits < 0) return 0;
1218   z = f.acc&((1<<n)-1);
1219   f.acc >>= n;
1220   f.valid_bits -= n;
1221   return z;
1222 }
1223 
1224 // chooses minimal possible integer type
1225 private auto get_bits(ubyte n) (VorbisDecoder f) if (n >= 1 && n <= 64) {
1226   static if (n <= 8) return cast(ubyte)get_bits_main(f, n);
1227   else static if (n <= 16) return cast(ushort)get_bits_main(f, n);
1228   else static if (n <= 32) return cast(uint)get_bits_main(f, n);
1229   else static if (n <= 64) return cast(ulong)get_bits_main(f, n);
1230   else static assert(0, "wtf?!");
1231 }
1232 
1233 // chooses minimal possible integer type, assume no overflow
1234 private auto get_bits_add_no(ubyte n) (VorbisDecoder f, ubyte add) if (n >= 1 && n <= 64) {
1235   static if (n <= 8) return cast(ubyte)(get_bits_main(f, n)+add);
1236   else static if (n <= 16) return cast(ushort)(get_bits_main(f, n)+add);
1237   else static if (n <= 32) return cast(uint)(get_bits_main(f, n)+add);
1238   else static if (n <= 64) return cast(ulong)(get_bits_main(f, n)+add);
1239   else static assert(0, "wtf?!");
1240 }
1241 
1242 // @OPTIMIZE: primary accumulator for huffman
1243 // expand the buffer to as many bits as possible without reading off end of packet
1244 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1245 // e.g. cache them locally and decode locally
1246 //private /*__forceinline*/ void prep_huffman (VorbisDecoder f)
1247 enum PrepHuffmanMixin = q{
1248   if (f.valid_bits <= 24) {
1249     if (f.valid_bits == 0) f.acc = 0;
1250     int phmz = void;
1251     do {
1252       if (f.last_seg && !f.bytes_in_seg) break;
1253       phmz = get8_packet_raw(f);
1254       if (phmz == EOP) break;
1255       f.acc += cast(uint)phmz<<f.valid_bits;
1256       f.valid_bits += 8;
1257     } while (f.valid_bits <= 24);
1258   }
1259 };
1260 
1261 enum VorbisPacket {
1262   id = 1,
1263   comment = 3,
1264   setup = 5,
1265 }
1266 
1267 private int codebook_decode_scalar_raw (VorbisDecoder f, Codebook *c) {
1268   mixin(PrepHuffmanMixin);
1269 
1270   if (c.codewords is null && c.sorted_codewords is null) return -1;
1271   // cases to use binary search: sorted_codewords && !c.codewords
1272   //                             sorted_codewords && c.entries > 8
1273   auto cond = (c.entries > 8 ? c.sorted_codewords !is null : !c.codewords);
1274   if (cond) {
1275     // binary search
1276     uint code = bit_reverse(f.acc);
1277     int x = 0, n = c.sorted_entries, len;
1278     while (n > 1) {
1279       // invariant: sc[x] <= code < sc[x+n]
1280       int m = x+(n>>1);
1281       if (c.sorted_codewords[m] <= code) {
1282         x = m;
1283         n -= (n>>1);
1284       } else {
1285         n >>= 1;
1286       }
1287     }
1288     // x is now the sorted index
1289     if (!c.sparse) x = c.sorted_values[x];
1290     // x is now sorted index if sparse, or symbol otherwise
1291     len = c.codeword_lengths[x];
1292     if (f.valid_bits >= len) {
1293       f.acc >>= len;
1294       f.valid_bits -= len;
1295       return x;
1296     }
1297     f.valid_bits = 0;
1298     return -1;
1299   }
1300   // if small, linear search
1301   debug(stb_vorbis) assert(!c.sparse);
1302   foreach (uint i; 0..c.entries) {
1303     if (c.codeword_lengths[i] == NO_CODE) continue;
1304     if (c.codewords[i] == (f.acc&((1<<c.codeword_lengths[i])-1))) {
1305       if (f.valid_bits >= c.codeword_lengths[i]) {
1306         f.acc >>= c.codeword_lengths[i];
1307         f.valid_bits -= c.codeword_lengths[i];
1308         return i;
1309       }
1310       f.valid_bits = 0;
1311       return -1;
1312     }
1313   }
1314   error(f, STBVorbisError.invalid_stream);
1315   f.valid_bits = 0;
1316   return -1;
1317 }
1318 
1319 
1320 template DECODE_RAW(string var, string c) {
1321   enum DECODE_RAW = q{
1322     if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) { mixin(PrepHuffmanMixin); }
1323     // fast huffman table lookup
1324     ${i} = f.acc&FAST_HUFFMAN_TABLE_MASK;
1325     ${i} = ${c}.fast_huffman.ptr[${i}];
1326     if (${i} >= 0) {
1327       auto ${__temp_prefix__}n = ${c}.codeword_lengths[${i}];
1328       f.acc >>= ${__temp_prefix__}n;
1329       f.valid_bits -= ${__temp_prefix__}n;
1330       if (f.valid_bits < 0) { f.valid_bits = 0; ${i} = -1; }
1331     } else {
1332       ${i} = codebook_decode_scalar_raw(f, ${c});
1333     }
1334   }.cmacroFixVars!("i", "c")(var, c);
1335 }
1336 
1337 enum DECODE(string var, string c) = q{
1338   ${DECODE_RAW}
1339   if (${c}.sparse) ${var} = ${c}.sorted_values[${var}];
1340 }.cmacroFixVars!("var", "c", "DECODE_RAW")(var, c, DECODE_RAW!(var, c));
1341 
1342 
1343 version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1344   alias DECODE_VQ = DECODE;
1345 } else {
1346   alias DECODE_VQ = DECODE_RAW;
1347 }
1348 
1349 
1350 
1351 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1352 // where we avoid one addition
1353 enum CODEBOOK_ELEMENT(string c, string off) = "("~c~".multiplicands["~off~"])";
1354 enum CODEBOOK_ELEMENT_FAST(string c, string off) = "("~c~".multiplicands["~off~"])";
1355 enum CODEBOOK_ELEMENT_BASE(string c) = "(0)";
1356 
1357 
1358 private int codebook_decode_start (VorbisDecoder f, Codebook* c) {
1359   int z = -1;
1360   // type 0 is only legal in a scalar context
1361   if (c.lookup_type == 0) {
1362     error(f, STBVorbisError.invalid_stream);
1363   } else {
1364     mixin(DECODE_VQ!("z", "c"));
1365     debug(stb_vorbis) if (c.sparse) assert(z < c.sorted_entries);
1366     if (z < 0) {  // check for EOP
1367       if (!f.bytes_in_seg && f.last_seg) return z;
1368       error(f, STBVorbisError.invalid_stream);
1369     }
1370   }
1371   return z;
1372 }
1373 
1374 private int codebook_decode (VorbisDecoder f, Codebook* c, float* output, int len) {
1375   int z = codebook_decode_start(f, c);
1376   if (z < 0) return false;
1377   if (len > c.dimensions) len = c.dimensions;
1378 
1379   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1380     if (c.lookup_type == 1) {
1381       float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1382       int div = 1;
1383       foreach (immutable i; 0..len) {
1384         int off = (z/div)%c.lookup_values;
1385         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1386         output[i] += val;
1387         if (c.sequence_p) last = val+c.minimum_value;
1388         div *= c.lookup_values;
1389       }
1390       return true;
1391     }
1392   }
1393 
1394   z *= c.dimensions;
1395   if (c.sequence_p) {
1396     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1397     foreach (immutable i; 0..len) {
1398       float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1399       output[i] += val;
1400       last = val+c.minimum_value;
1401     }
1402   } else {
1403     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1404     foreach (immutable i; 0..len) output[i] += mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1405   }
1406 
1407   return true;
1408 }
1409 
1410 private int codebook_decode_step (VorbisDecoder f, Codebook* c, float* output, int len, int step) {
1411   int z = codebook_decode_start(f, c);
1412   float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1413   if (z < 0) return false;
1414   if (len > c.dimensions) len = c.dimensions;
1415 
1416   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1417     if (c.lookup_type == 1) {
1418       int div = 1;
1419       foreach (immutable i; 0..len) {
1420         int off = (z/div)%c.lookup_values;
1421         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1422         output[i*step] += val;
1423         if (c.sequence_p) last = val;
1424         div *= c.lookup_values;
1425       }
1426       return true;
1427     }
1428   }
1429 
1430   z *= c.dimensions;
1431   foreach (immutable i; 0..len) {
1432     float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1433     output[i*step] += val;
1434     if (c.sequence_p) last = val;
1435   }
1436 
1437   return true;
1438 }
1439 
1440 private int codebook_decode_deinterleave_repeat (VorbisDecoder f, Codebook* c, ref float*[STB_VORBIS_MAX_CHANNELS] outputs, int ch, int* c_inter_p, int* p_inter_p, int len, int total_decode) {
1441   int c_inter = *c_inter_p;
1442   int p_inter = *p_inter_p;
1443   int z, effective = c.dimensions;
1444 
1445   // type 0 is only legal in a scalar context
1446   if (c.lookup_type == 0) return error(f, STBVorbisError.invalid_stream);
1447 
1448   while (total_decode > 0) {
1449     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1450     mixin(DECODE_VQ!("z", "c"));
1451     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
1452       debug(stb_vorbis) assert(!c.sparse || z < c.sorted_entries);
1453     }
1454     if (z < 0) {
1455       if (!f.bytes_in_seg && f.last_seg) return false;
1456       return error(f, STBVorbisError.invalid_stream);
1457     }
1458 
1459     // if this will take us off the end of the buffers, stop short!
1460     // we check by computing the length of the virtual interleaved
1461     // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1462     // and the length we'll be using (effective)
1463     if (c_inter+p_inter*ch+effective > len*ch) effective = len*ch-(p_inter*ch-c_inter);
1464 
1465     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1466       if (c.lookup_type == 1) {
1467         int div = 1;
1468         foreach (immutable i; 0..effective) {
1469           int off = (z/div)%c.lookup_values;
1470           float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1471           if (outputs.ptr[c_inter]) outputs.ptr[c_inter].ptr[p_inter] += val;
1472           if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1473           if (c.sequence_p) last = val;
1474           div *= c.lookup_values;
1475         }
1476         goto skipit;
1477       }
1478     }
1479     z *= c.dimensions;
1480     if (c.sequence_p) {
1481       foreach (immutable i; 0..effective) {
1482         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1483         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1484         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1485         last = val;
1486       }
1487     } else {
1488       foreach (immutable i; 0..effective) {
1489         float val = mixin(CODEBOOK_ELEMENT_FAST!("c","z+i"))+last;
1490         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1491         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1492       }
1493     }
1494    skipit:
1495     total_decode -= effective;
1496   }
1497   *c_inter_p = c_inter;
1498   *p_inter_p = p_inter;
1499   return true;
1500 }
1501 
1502 //private int predict_point (int x, int x0, int x1, int y0, int y1)
1503 enum predict_point(string dest, string x, string x0, string x1, string y0, string y1) = q{{
1504   //import std.math : abs;
1505   int dy = ${y1}-${y0};
1506   int adx = ${x1}-${x0};
1507   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1508   int err = /*abs(dy)*/(dy < 0 ? -dy : dy)*(${x}-${x0});
1509   int off = err/adx;
1510   /*return*/${dest} = (dy < 0 ? ${y0}-off : ${y0}+off);
1511 }}.cmacroFixVars!("dest", "x", "x0", "x1", "y0", "y1")(dest, x, x0, x1, y0, y1);
1512 
1513 // the following table is block-copied from the specification
1514 immutable float[256] inverse_db_table = [
1515   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1516   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1517   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1518   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1519   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1520   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1521   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1522   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1523   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1524   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1525   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1526   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1527   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1528   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1529   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1530   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1531   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1532   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1533   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1534   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1535   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1536   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1537   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1538   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1539   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1540   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1541   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1542   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1543   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1544   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1545   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1546   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1547   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1548   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1549   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1550   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1551   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1552   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1553   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1554   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1555   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1556   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1557   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1558   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1559   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1560   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1561   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1562   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1563   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1564   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1565   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1566   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1567   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1568   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1569   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1570   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1571   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1572   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1573   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1574   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1575   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1576   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1577   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1578   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1579 ];
1580 
1581 
1582 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1583 // note that you must produce bit-identical output to decode correctly;
1584 // this specific sequence of operations is specified in the spec (it's
1585 // drawing integer-quantized frequency-space lines that the encoder
1586 // expects to be exactly the same)
1587 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1588 // have to divide in the setup? sigh.
1589 version(STB_VORBIS_NO_DEFER_FLOOR) {
1590   enum LINE_OP(string a, string b) = a~" = "~b~";";
1591 } else {
1592   enum LINE_OP(string a, string b) = a~" *= "~b~";";
1593 }
1594 
1595 version(STB_VORBIS_DIVIDE_TABLE) {
1596   enum DIVTAB_NUMER = 32;
1597   enum DIVTAB_DENOM = 64;
1598   byte[DIVTAB_DENOM][DIVTAB_NUMER] integer_divide_table; // 2KB
1599 }
1600 
1601 // nobranch abs trick
1602 enum ABS(string v) = q{(((${v})+((${v})>>31))^((${v})>>31))}.cmacroFixVars!"v"(v);
1603 
1604 // this is forceinline, but dmd inliner sux
1605 // but hey, i have my k00l macrosystem!
1606 //void draw_line (float* ${output}, int ${x0}, int ${y0}, int ${x1}, int ${y1}, int ${n})
1607 enum draw_line(string output, string x0, string y0, string x1, string y1, string n) = q{{
1608   int ${__temp_prefix__}dy = ${y1}-${y0};
1609   int ${__temp_prefix__}adx = ${x1}-${x0};
1610   int ${__temp_prefix__}ady = mixin(ABS!"${__temp_prefix__}dy");
1611   int ${__temp_prefix__}base;
1612   int ${__temp_prefix__}x = ${x0}, ${__temp_prefix__}y = ${y0};
1613   int ${__temp_prefix__}err = 0;
1614   int ${__temp_prefix__}sy;
1615 
1616   version(STB_VORBIS_DIVIDE_TABLE) {
1617     if (${__temp_prefix__}adx < DIVTAB_DENOM && ${__temp_prefix__}ady < DIVTAB_NUMER) {
1618       if (${__temp_prefix__}dy < 0) {
1619         ${__temp_prefix__}base = -integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1620         ${__temp_prefix__}sy = ${__temp_prefix__}base-1;
1621       } else {
1622         ${__temp_prefix__}base = integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1623         ${__temp_prefix__}sy = ${__temp_prefix__}base+1;
1624       }
1625     } else {
1626       ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1627       ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1628     }
1629   } else {
1630     ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1631     ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1632   }
1633   ${__temp_prefix__}ady -= mixin(ABS!"${__temp_prefix__}base")*${__temp_prefix__}adx;
1634   if (${x1} > ${n}) ${x1} = ${n};
1635   if (${__temp_prefix__}x < ${x1}) {
1636     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1637     for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1638       ${__temp_prefix__}err += ${__temp_prefix__}ady;
1639       if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1640         ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1641         ${__temp_prefix__}y += ${__temp_prefix__}sy;
1642       } else {
1643         ${__temp_prefix__}y += ${__temp_prefix__}base;
1644       }
1645       mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1646     }
1647   }
1648   /*
1649   mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1650   for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1651     ${__temp_prefix__}err += ${__temp_prefix__}ady;
1652     if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1653       ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1654       ${__temp_prefix__}y += ${__temp_prefix__}sy;
1655     } else {
1656       ${__temp_prefix__}y += ${__temp_prefix__}base;
1657     }
1658     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1659   }
1660   */
1661 }}.cmacroFixVars!("output", "x0", "y0", "x1", "y1", "n")(output, x0, y0, x1, y1, n);
1662 
1663 private int residue_decode (VorbisDecoder f, Codebook* book, float* target, int offset, int n, int rtype) {
1664   if (rtype == 0) {
1665     int step = n/book.dimensions;
1666     foreach (immutable k; 0..step) if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) return false;
1667   } else {
1668     for (int k = 0; k < n; ) {
1669       if (!codebook_decode(f, book, target+offset, n-k)) return false;
1670       k += book.dimensions;
1671       offset += book.dimensions;
1672     }
1673   }
1674   return true;
1675 }
1676 
1677 private void decode_residue (VorbisDecoder f, ref float*[STB_VORBIS_MAX_CHANNELS] residue_buffers, int ch, int n, int rn, ubyte* do_not_decode) {
1678   import core.stdc.stdlib : alloca;
1679   import core.stdc..string : memset;
1680 
1681   Residue* r = f.residue_config+rn;
1682   int rtype = f.residue_types.ptr[rn];
1683   int c = r.classbook;
1684   int classwords = f.codebooks[c].dimensions;
1685   int n_read = r.end-r.begin;
1686   int part_read = n_read/r.part_size;
1687   uint temp_alloc_point = temp_alloc_save(f);
1688   version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1689     int** classifications = cast(int**)mixin(temp_block_array!("f.vrchannels", "part_read*int.sizeof"));
1690   } else {
1691     ubyte*** part_classdata = cast(ubyte***)mixin(temp_block_array!("f.vrchannels", "part_read*cast(int)(ubyte*).sizeof"));
1692   }
1693 
1694   //stb_prof(2);
1695   foreach (immutable i; 0..ch) if (!do_not_decode[i]) memset(residue_buffers.ptr[i], 0, float.sizeof*n);
1696 
1697   if (rtype == 2 && ch != 1) {
1698     int j = void;
1699     for (j = 0; j < ch; ++j) if (!do_not_decode[j]) break;
1700     if (j == ch) goto done;
1701 
1702     //stb_prof(3);
1703     foreach (immutable pass; 0..8) {
1704       int pcount = 0, class_set = 0;
1705       if (ch == 2) {
1706         //stb_prof(13);
1707         while (pcount < part_read) {
1708           int z = r.begin+pcount*r.part_size;
1709           int c_inter = (z&1), p_inter = z>>1;
1710           if (pass == 0) {
1711             Codebook *cc = f.codebooks+r.classbook;
1712             int q;
1713             mixin(DECODE!("q", "cc"));
1714             if (q == EOP) goto done;
1715             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1716               for (int i = classwords-1; i >= 0; --i) {
1717                 classifications[0].ptr[i+pcount] = q%r.classifications;
1718                 q /= r.classifications;
1719               }
1720             } else {
1721               part_classdata[0][class_set] = r.classdata[q];
1722             }
1723           }
1724           //stb_prof(5);
1725           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1726             int zz = r.begin+pcount*r.part_size;
1727             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1728               int cc = classifications[0].ptr[pcount];
1729             } else {
1730               int cc = part_classdata[0][class_set][i];
1731             }
1732             int b = r.residue_books[cc].ptr[pass];
1733             if (b >= 0) {
1734               Codebook* book = f.codebooks+b;
1735               //stb_prof(20); // accounts for X time
1736               version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1737                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1738               } else {
1739                 // saves 1%
1740                 //if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r.part_size)) goto done; // according to C source
1741                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1742               }
1743               //stb_prof(7);
1744             } else {
1745               zz += r.part_size;
1746               c_inter = zz&1;
1747               p_inter = zz>>1;
1748             }
1749           }
1750           //stb_prof(8);
1751           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1752             ++class_set;
1753           }
1754         }
1755       } else if (ch == 1) {
1756         while (pcount < part_read) {
1757           int z = r.begin+pcount*r.part_size;
1758           int c_inter = 0, p_inter = z;
1759           if (pass == 0) {
1760             Codebook* cc = f.codebooks+r.classbook;
1761             int q;
1762             mixin(DECODE!("q", "cc"));
1763             if (q == EOP) goto done;
1764             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1765               for (int i = classwords-1; i >= 0; --i) {
1766                 classifications[0].ptr[i+pcount] = q%r.classifications;
1767                 q /= r.classifications;
1768               }
1769             } else {
1770               part_classdata[0][class_set] = r.classdata[q];
1771             }
1772           }
1773           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1774             int zz = r.begin+pcount*r.part_size;
1775             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1776               int cc = classifications[0].ptr[pcount];
1777             } else {
1778               int cc = part_classdata[0][class_set][i];
1779             }
1780             int b = r.residue_books[cc].ptr[pass];
1781             if (b >= 0) {
1782               Codebook* book = f.codebooks+b;
1783               //stb_prof(22);
1784               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1785               //stb_prof(3);
1786             } else {
1787               zz += r.part_size;
1788               c_inter = 0;
1789               p_inter = zz;
1790             }
1791           }
1792           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1793             ++class_set;
1794           }
1795         }
1796       } else {
1797         while (pcount < part_read) {
1798           int z = r.begin+pcount*r.part_size;
1799           int c_inter = z%ch, p_inter = z/ch;
1800           if (pass == 0) {
1801             Codebook* cc = f.codebooks+r.classbook;
1802             int q;
1803             mixin(DECODE!("q", "cc"));
1804             if (q == EOP) goto done;
1805             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1806               for (int i = classwords-1; i >= 0; --i) {
1807                 classifications[0].ptr[i+pcount] = q%r.classifications;
1808                 q /= r.classifications;
1809               }
1810             } else {
1811               part_classdata[0][class_set] = r.classdata[q];
1812             }
1813           }
1814           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1815             int zz = r.begin+pcount*r.part_size;
1816             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1817               int cc = classifications[0].ptr[pcount];
1818             } else {
1819               int cc = part_classdata[0][class_set][i];
1820             }
1821             int b = r.residue_books[cc].ptr[pass];
1822             if (b >= 0) {
1823               Codebook* book = f.codebooks+b;
1824               //stb_prof(22);
1825               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1826               //stb_prof(3);
1827             } else {
1828               zz += r.part_size;
1829               c_inter = zz%ch;
1830               p_inter = zz/ch;
1831             }
1832           }
1833           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1834             ++class_set;
1835           }
1836         }
1837       }
1838     }
1839     goto done;
1840   }
1841   //stb_prof(9);
1842 
1843   foreach (immutable pass; 0..8) {
1844     int pcount = 0, class_set=0;
1845     while (pcount < part_read) {
1846       if (pass == 0) {
1847         foreach (immutable j; 0..ch) {
1848           if (!do_not_decode[j]) {
1849             Codebook* cc = f.codebooks+r.classbook;
1850             int temp;
1851             mixin(DECODE!("temp", "cc"));
1852             if (temp == EOP) goto done;
1853             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1854               for (int i = classwords-1; i >= 0; --i) {
1855                 classifications[j].ptr[i+pcount] = temp%r.classifications;
1856                 temp /= r.classifications;
1857               }
1858             } else {
1859               part_classdata[j][class_set] = r.classdata[temp];
1860             }
1861           }
1862         }
1863       }
1864       for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1865         foreach (immutable j; 0..ch) {
1866           if (!do_not_decode[j]) {
1867             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1868               int cc = classifications[j].ptr[pcount];
1869             } else {
1870               int cc = part_classdata[j][class_set][i];
1871             }
1872             int b = r.residue_books[cc].ptr[pass];
1873             if (b >= 0) {
1874               float* target = residue_buffers.ptr[j];
1875               int offset = r.begin+pcount*r.part_size;
1876               int nn = r.part_size;
1877               Codebook* book = f.codebooks+b;
1878               if (!residue_decode(f, book, target, offset, nn, rtype)) goto done;
1879             }
1880           }
1881         }
1882       }
1883       version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1884         ++class_set;
1885       }
1886     }
1887   }
1888  done:
1889   //stb_prof(0);
1890   version(STB_VORBIS_DIVIDES_IN_RESIDUE) temp_free(f, classifications); else temp_free(f, part_classdata);
1891   temp_alloc_restore(f, temp_alloc_point);
1892 }
1893 
1894 
1895 // the following were split out into separate functions while optimizing;
1896 // they could be pushed back up but eh. __forceinline showed no change;
1897 // they're probably already being inlined.
1898 private void imdct_step3_iter0_loop (int n, float* e, int i_off, int k_off, float* A) {
1899   float* ee0 = e+i_off;
1900   float* ee2 = ee0+k_off;
1901   debug(stb_vorbis) assert((n&3) == 0);
1902   foreach (immutable _; 0..n>>2) {
1903     float k00_20, k01_21;
1904     k00_20 = ee0[ 0]-ee2[ 0];
1905     k01_21 = ee0[-1]-ee2[-1];
1906     ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0]+ee2[ 0];
1907     ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1]+ee2[-1];
1908     ee2[ 0] = k00_20*A[0]-k01_21*A[1];
1909     ee2[-1] = k01_21*A[0]+k00_20*A[1];
1910     A += 8;
1911 
1912     k00_20 = ee0[-2]-ee2[-2];
1913     k01_21 = ee0[-3]-ee2[-3];
1914     ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2]+ee2[-2];
1915     ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3]+ee2[-3];
1916     ee2[-2] = k00_20*A[0]-k01_21*A[1];
1917     ee2[-3] = k01_21*A[0]+k00_20*A[1];
1918     A += 8;
1919 
1920     k00_20 = ee0[-4]-ee2[-4];
1921     k01_21 = ee0[-5]-ee2[-5];
1922     ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4]+ee2[-4];
1923     ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5]+ee2[-5];
1924     ee2[-4] = k00_20*A[0]-k01_21*A[1];
1925     ee2[-5] = k01_21*A[0]+k00_20*A[1];
1926     A += 8;
1927 
1928     k00_20 = ee0[-6]-ee2[-6];
1929     k01_21 = ee0[-7]-ee2[-7];
1930     ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6]+ee2[-6];
1931     ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7]+ee2[-7];
1932     ee2[-6] = k00_20*A[0]-k01_21*A[1];
1933     ee2[-7] = k01_21*A[0]+k00_20*A[1];
1934     A += 8;
1935     ee0 -= 8;
1936     ee2 -= 8;
1937   }
1938 }
1939 
1940 private void imdct_step3_inner_r_loop (int lim, float* e, int d0, int k_off, float* A, int k1) {
1941   float k00_20, k01_21;
1942   float* e0 = e+d0;
1943   float* e2 = e0+k_off;
1944   foreach (immutable _; 0..lim>>2) {
1945     k00_20 = e0[-0]-e2[-0];
1946     k01_21 = e0[-1]-e2[-1];
1947     e0[-0] += e2[-0];//e0[-0] = e0[-0]+e2[-0];
1948     e0[-1] += e2[-1];//e0[-1] = e0[-1]+e2[-1];
1949     e2[-0] = (k00_20)*A[0]-(k01_21)*A[1];
1950     e2[-1] = (k01_21)*A[0]+(k00_20)*A[1];
1951 
1952     A += k1;
1953 
1954     k00_20 = e0[-2]-e2[-2];
1955     k01_21 = e0[-3]-e2[-3];
1956     e0[-2] += e2[-2];//e0[-2] = e0[-2]+e2[-2];
1957     e0[-3] += e2[-3];//e0[-3] = e0[-3]+e2[-3];
1958     e2[-2] = (k00_20)*A[0]-(k01_21)*A[1];
1959     e2[-3] = (k01_21)*A[0]+(k00_20)*A[1];
1960 
1961     A += k1;
1962 
1963     k00_20 = e0[-4]-e2[-4];
1964     k01_21 = e0[-5]-e2[-5];
1965     e0[-4] += e2[-4];//e0[-4] = e0[-4]+e2[-4];
1966     e0[-5] += e2[-5];//e0[-5] = e0[-5]+e2[-5];
1967     e2[-4] = (k00_20)*A[0]-(k01_21)*A[1];
1968     e2[-5] = (k01_21)*A[0]+(k00_20)*A[1];
1969 
1970     A += k1;
1971 
1972     k00_20 = e0[-6]-e2[-6];
1973     k01_21 = e0[-7]-e2[-7];
1974     e0[-6] += e2[-6];//e0[-6] = e0[-6]+e2[-6];
1975     e0[-7] += e2[-7];//e0[-7] = e0[-7]+e2[-7];
1976     e2[-6] = (k00_20)*A[0]-(k01_21)*A[1];
1977     e2[-7] = (k01_21)*A[0]+(k00_20)*A[1];
1978 
1979     e0 -= 8;
1980     e2 -= 8;
1981 
1982     A += k1;
1983   }
1984 }
1985 
1986 private void imdct_step3_inner_s_loop (int n, float* e, int i_off, int k_off, float* A, int a_off, int k0) {
1987   float A0 = A[0];
1988   float A1 = A[0+1];
1989   float A2 = A[0+a_off];
1990   float A3 = A[0+a_off+1];
1991   float A4 = A[0+a_off*2+0];
1992   float A5 = A[0+a_off*2+1];
1993   float A6 = A[0+a_off*3+0];
1994   float A7 = A[0+a_off*3+1];
1995   float k00, k11;
1996   float *ee0 = e  +i_off;
1997   float *ee2 = ee0+k_off;
1998   foreach (immutable _; 0..n) {
1999     k00 = ee0[ 0]-ee2[ 0];
2000     k11 = ee0[-1]-ee2[-1];
2001     ee0[ 0] = ee0[ 0]+ee2[ 0];
2002     ee0[-1] = ee0[-1]+ee2[-1];
2003     ee2[ 0] = (k00)*A0-(k11)*A1;
2004     ee2[-1] = (k11)*A0+(k00)*A1;
2005 
2006     k00 = ee0[-2]-ee2[-2];
2007     k11 = ee0[-3]-ee2[-3];
2008     ee0[-2] = ee0[-2]+ee2[-2];
2009     ee0[-3] = ee0[-3]+ee2[-3];
2010     ee2[-2] = (k00)*A2-(k11)*A3;
2011     ee2[-3] = (k11)*A2+(k00)*A3;
2012 
2013     k00 = ee0[-4]-ee2[-4];
2014     k11 = ee0[-5]-ee2[-5];
2015     ee0[-4] = ee0[-4]+ee2[-4];
2016     ee0[-5] = ee0[-5]+ee2[-5];
2017     ee2[-4] = (k00)*A4-(k11)*A5;
2018     ee2[-5] = (k11)*A4+(k00)*A5;
2019 
2020     k00 = ee0[-6]-ee2[-6];
2021     k11 = ee0[-7]-ee2[-7];
2022     ee0[-6] = ee0[-6]+ee2[-6];
2023     ee0[-7] = ee0[-7]+ee2[-7];
2024     ee2[-6] = (k00)*A6-(k11)*A7;
2025     ee2[-7] = (k11)*A6+(k00)*A7;
2026 
2027     ee0 -= k0;
2028     ee2 -= k0;
2029   }
2030 }
2031 
2032 // this was forceinline
2033 //void iter_54(float *z)
2034 enum iter_54(string z) = q{{
2035   auto ${__temp_prefix__}z = (${z});
2036   float ${__temp_prefix__}k00, ${__temp_prefix__}k11, ${__temp_prefix__}k22, ${__temp_prefix__}k33;
2037   float ${__temp_prefix__}y0, ${__temp_prefix__}y1, ${__temp_prefix__}y2, ${__temp_prefix__}y3;
2038 
2039   ${__temp_prefix__}k00 = ${__temp_prefix__}z[ 0]-${__temp_prefix__}z[-4];
2040   ${__temp_prefix__}y0  = ${__temp_prefix__}z[ 0]+${__temp_prefix__}z[-4];
2041   ${__temp_prefix__}y2  = ${__temp_prefix__}z[-2]+${__temp_prefix__}z[-6];
2042   ${__temp_prefix__}k22 = ${__temp_prefix__}z[-2]-${__temp_prefix__}z[-6];
2043 
2044   ${__temp_prefix__}z[-0] = ${__temp_prefix__}y0+${__temp_prefix__}y2;   // z0+z4+z2+z6
2045   ${__temp_prefix__}z[-2] = ${__temp_prefix__}y0-${__temp_prefix__}y2;   // z0+z4-z2-z6
2046 
2047   // done with ${__temp_prefix__}y0, ${__temp_prefix__}y2
2048 
2049   ${__temp_prefix__}k33 = ${__temp_prefix__}z[-3]-${__temp_prefix__}z[-7];
2050 
2051   ${__temp_prefix__}z[-4] = ${__temp_prefix__}k00+${__temp_prefix__}k33; // z0-z4+z3-z7
2052   ${__temp_prefix__}z[-6] = ${__temp_prefix__}k00-${__temp_prefix__}k33; // z0-z4-z3+z7
2053 
2054   // done with ${__temp_prefix__}k33
2055 
2056   ${__temp_prefix__}k11 = ${__temp_prefix__}z[-1]-${__temp_prefix__}z[-5];
2057   ${__temp_prefix__}y1  = ${__temp_prefix__}z[-1]+${__temp_prefix__}z[-5];
2058   ${__temp_prefix__}y3  = ${__temp_prefix__}z[-3]+${__temp_prefix__}z[-7];
2059 
2060   ${__temp_prefix__}z[-1] = ${__temp_prefix__}y1+${__temp_prefix__}y3;   // z1+z5+z3+z7
2061   ${__temp_prefix__}z[-3] = ${__temp_prefix__}y1-${__temp_prefix__}y3;   // z1+z5-z3-z7
2062   ${__temp_prefix__}z[-5] = ${__temp_prefix__}k11-${__temp_prefix__}k22; // z1-z5+z2-z6
2063   ${__temp_prefix__}z[-7] = ${__temp_prefix__}k11+${__temp_prefix__}k22; // z1-z5-z2+z6
2064 }}.cmacroFixVars!"z"(z);
2065 
2066 private void imdct_step3_inner_s_loop_ld654 (int n, float* e, int i_off, float* A, int base_n) {
2067   int a_off = base_n>>3;
2068   float A2 = A[0+a_off];
2069   float* z = e+i_off;
2070   float* base = z-16*n;
2071   float k00, k11;
2072   while (z > base) {
2073     k00   = z[-0]-z[-8];
2074     k11   = z[-1]-z[-9];
2075     z[-0] = z[-0]+z[-8];
2076     z[-1] = z[-1]+z[-9];
2077     z[-8] =  k00;
2078     z[-9] =  k11;
2079 
2080     k00    = z[ -2]-z[-10];
2081     k11    = z[ -3]-z[-11];
2082     z[ -2] = z[ -2]+z[-10];
2083     z[ -3] = z[ -3]+z[-11];
2084     z[-10] = (k00+k11)*A2;
2085     z[-11] = (k11-k00)*A2;
2086 
2087     k00    = z[-12]-z[ -4];  // reverse to avoid a unary negation
2088     k11    = z[ -5]-z[-13];
2089     z[ -4] = z[ -4]+z[-12];
2090     z[ -5] = z[ -5]+z[-13];
2091     z[-12] = k11;
2092     z[-13] = k00;
2093 
2094     k00    = z[-14]-z[ -6];  // reverse to avoid a unary negation
2095     k11    = z[ -7]-z[-15];
2096     z[ -6] = z[ -6]+z[-14];
2097     z[ -7] = z[ -7]+z[-15];
2098     z[-14] = (k00+k11)*A2;
2099     z[-15] = (k00-k11)*A2;
2100 
2101     mixin(iter_54!"z");
2102     mixin(iter_54!"z-8");
2103     z -= 16;
2104   }
2105 }
2106 
2107 private void inverse_mdct (float* buffer, int n, VorbisDecoder f, int blocktype) {
2108   import core.stdc.stdlib : alloca;
2109 
2110   int n2 = n>>1, n4 = n>>2, n8 = n>>3, l;
2111   int ld;
2112   // @OPTIMIZE: reduce register pressure by using fewer variables?
2113   int save_point = temp_alloc_save(f);
2114   float *buf2;
2115   buf2 = cast(float*)mixin(temp_alloc!("n2*float.sizeof"));
2116   float *u = null, v = null;
2117   // twiddle factors
2118   float *A = f.A.ptr[blocktype];
2119 
2120   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2121   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2122 
2123   // kernel from paper
2124 
2125 
2126   // merged:
2127   //   copy and reflect spectral data
2128   //   step 0
2129 
2130   // note that it turns out that the items added together during
2131   // this step are, in fact, being added to themselves (as reflected
2132   // by step 0). inexplicable inefficiency! this became obvious
2133   // once I combined the passes.
2134 
2135   // so there's a missing 'times 2' here (for adding X to itself).
2136   // this propogates through linearly to the end, where the numbers
2137   // are 1/2 too small, and need to be compensated for.
2138 
2139   {
2140     float* d, e, AA, e_stop;
2141     d = &buf2[n2-2];
2142     AA = A;
2143     e = &buffer[0];
2144     e_stop = &buffer[n2];
2145     while (e != e_stop) {
2146       d[1] = (e[0]*AA[0]-e[2]*AA[1]);
2147       d[0] = (e[0]*AA[1]+e[2]*AA[0]);
2148       d -= 2;
2149       AA += 2;
2150       e += 4;
2151     }
2152     e = &buffer[n2-3];
2153     while (d >= buf2) {
2154       d[1] = (-e[2]*AA[0]- -e[0]*AA[1]);
2155       d[0] = (-e[2]*AA[1]+ -e[0]*AA[0]);
2156       d -= 2;
2157       AA += 2;
2158       e -= 4;
2159     }
2160   }
2161 
2162   // now we use symbolic names for these, so that we can
2163   // possibly swap their meaning as we change which operations
2164   // are in place
2165 
2166   u = buffer;
2167   v = buf2;
2168 
2169   // step 2    (paper output is w, now u)
2170   // this could be in place, but the data ends up in the wrong
2171   // place... _somebody_'s got to swap it, so this is nominated
2172   {
2173     float* AA = &A[n2-8];
2174     float* d0, d1, e0, e1;
2175     e0 = &v[n4];
2176     e1 = &v[0];
2177     d0 = &u[n4];
2178     d1 = &u[0];
2179     while (AA >= A) {
2180       float v40_20, v41_21;
2181 
2182       v41_21 = e0[1]-e1[1];
2183       v40_20 = e0[0]-e1[0];
2184       d0[1]  = e0[1]+e1[1];
2185       d0[0]  = e0[0]+e1[0];
2186       d1[1]  = v41_21*AA[4]-v40_20*AA[5];
2187       d1[0]  = v40_20*AA[4]+v41_21*AA[5];
2188 
2189       v41_21 = e0[3]-e1[3];
2190       v40_20 = e0[2]-e1[2];
2191       d0[3]  = e0[3]+e1[3];
2192       d0[2]  = e0[2]+e1[2];
2193       d1[3]  = v41_21*AA[0]-v40_20*AA[1];
2194       d1[2]  = v40_20*AA[0]+v41_21*AA[1];
2195 
2196       AA -= 8;
2197 
2198       d0 += 4;
2199       d1 += 4;
2200       e0 += 4;
2201       e1 += 4;
2202     }
2203   }
2204 
2205   // step 3
2206   ld = ilog(n)-1; // ilog is off-by-one from normal definitions
2207 
2208   // optimized step 3:
2209 
2210   // the original step3 loop can be nested r inside s or s inside r;
2211   // it's written originally as s inside r, but this is dumb when r
2212   // iterates many times, and s few. So I have two copies of it and
2213   // switch between them halfway.
2214 
2215   // this is iteration 0 of step 3
2216   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*0, -(n>>3), A);
2217   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*1, -(n>>3), A);
2218 
2219   // this is iteration 1 of step 3
2220   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*0, -(n>>4), A, 16);
2221   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*1, -(n>>4), A, 16);
2222   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*2, -(n>>4), A, 16);
2223   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*3, -(n>>4), A, 16);
2224 
2225   l = 2;
2226   for (; l < (ld-3)>>1; ++l) {
2227     int k0 = n>>(l+2), k0_2 = k0>>1;
2228     int lim = 1<<(l+1);
2229     foreach (int i; 0..lim) imdct_step3_inner_r_loop(n>>(l+4), u, n2-1-k0*i, -k0_2, A, 1<<(l+3));
2230   }
2231 
2232   for (; l < ld-6; ++l) {
2233     int k0 = n>>(l+2), k1 = 1<<(l+3), k0_2 = k0>>1;
2234     int rlim = n>>(l+6);
2235     int lim = 1<<(l+1);
2236     int i_off;
2237     float *A0 = A;
2238     i_off = n2-1;
2239     foreach (immutable _; 0..rlim) {
2240       imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2241       A0 += k1*4;
2242       i_off -= 8;
2243     }
2244   }
2245 
2246   // iterations with count:
2247   //   ld-6,-5,-4 all interleaved together
2248   //       the big win comes from getting rid of needless flops
2249   //         due to the constants on pass 5 & 4 being all 1 and 0;
2250   //       combining them to be simultaneous to improve cache made little difference
2251   imdct_step3_inner_s_loop_ld654(n>>5, u, n2-1, A, n);
2252 
2253   // output is u
2254 
2255   // step 4, 5, and 6
2256   // cannot be in-place because of step 5
2257   {
2258     ushort *bitrev = f.bit_reverse.ptr[blocktype];
2259     // weirdly, I'd have thought reading sequentially and writing
2260     // erratically would have been better than vice-versa, but in
2261     // fact that's not what my testing showed. (That is, with
2262     // j = bitreverse(i), do you read i and write j, or read j and write i.)
2263     float *d0 = &v[n4-4];
2264     float *d1 = &v[n2-4];
2265     int k4;
2266     while (d0 >= v) {
2267       k4 = bitrev[0];
2268       d1[3] = u[k4+0];
2269       d1[2] = u[k4+1];
2270       d0[3] = u[k4+2];
2271       d0[2] = u[k4+3];
2272 
2273       k4 = bitrev[1];
2274       d1[1] = u[k4+0];
2275       d1[0] = u[k4+1];
2276       d0[1] = u[k4+2];
2277       d0[0] = u[k4+3];
2278 
2279       d0 -= 4;
2280       d1 -= 4;
2281       bitrev += 2;
2282     }
2283   }
2284   // (paper output is u, now v)
2285 
2286 
2287   // data must be in buf2
2288   debug(stb_vorbis) assert(v == buf2);
2289 
2290   // step 7   (paper output is v, now v)
2291   // this is now in place
2292   {
2293     float a02, a11, b0, b1, b2, b3;
2294     float* C = f.C.ptr[blocktype];
2295     float* d, e;
2296     d = v;
2297     e = v+n2-4;
2298     while (d < e) {
2299       a02 = d[0]-e[2];
2300       a11 = d[1]+e[3];
2301 
2302       b0 = C[1]*a02+C[0]*a11;
2303       b1 = C[1]*a11-C[0]*a02;
2304 
2305       b2 = d[0]+e[ 2];
2306       b3 = d[1]-e[ 3];
2307 
2308       d[0] = b2+b0;
2309       d[1] = b3+b1;
2310       e[2] = b2-b0;
2311       e[3] = b1-b3;
2312 
2313       a02 = d[2]-e[0];
2314       a11 = d[3]+e[1];
2315 
2316       b0 = C[3]*a02+C[2]*a11;
2317       b1 = C[3]*a11-C[2]*a02;
2318 
2319       b2 = d[2]+e[ 0];
2320       b3 = d[3]-e[ 1];
2321 
2322       d[2] = b2+b0;
2323       d[3] = b3+b1;
2324       e[0] = b2-b0;
2325       e[1] = b1-b3;
2326 
2327       C += 4;
2328       d += 4;
2329       e -= 4;
2330     }
2331   }
2332 
2333   // data must be in buf2
2334 
2335 
2336   // step 8+decode   (paper output is X, now buffer)
2337   // this generates pairs of data a la 8 and pushes them directly through
2338   // the decode kernel (pushing rather than pulling) to avoid having
2339   // to make another pass later
2340 
2341   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2342   {
2343     float p0, p1, p2, p3;
2344     float* d0, d1, d2, d3;
2345     float* B = f.B.ptr[blocktype]+n2-8;
2346     float* e = buf2+n2-8;
2347     d0 = &buffer[0];
2348     d1 = &buffer[n2-4];
2349     d2 = &buffer[n2];
2350     d3 = &buffer[n-4];
2351     while (e >= v) {
2352       p3 =  e[6]*B[7]-e[7]*B[6];
2353       p2 = -e[6]*B[6]-e[7]*B[7];
2354 
2355       d0[0] =   p3;
2356       d1[3] =  -p3;
2357       d2[0] =   p2;
2358       d3[3] =   p2;
2359 
2360       p1 =  e[4]*B[5]-e[5]*B[4];
2361       p0 = -e[4]*B[4]-e[5]*B[5];
2362 
2363       d0[1] =   p1;
2364       d1[2] = - p1;
2365       d2[1] =   p0;
2366       d3[2] =   p0;
2367 
2368       p3 =  e[2]*B[3]-e[3]*B[2];
2369       p2 = -e[2]*B[2]-e[3]*B[3];
2370 
2371       d0[2] =   p3;
2372       d1[1] = - p3;
2373       d2[2] =   p2;
2374       d3[1] =   p2;
2375 
2376       p1 =  e[0]*B[1]-e[1]*B[0];
2377       p0 = -e[0]*B[0]-e[1]*B[1];
2378 
2379       d0[3] =   p1;
2380       d1[0] = - p1;
2381       d2[3] =   p0;
2382       d3[0] =   p0;
2383 
2384       B -= 8;
2385       e -= 8;
2386       d0 += 4;
2387       d2 += 4;
2388       d1 -= 4;
2389       d3 -= 4;
2390     }
2391   }
2392 
2393   temp_free(f, buf2);
2394   temp_alloc_restore(f, save_point);
2395 }
2396 
2397 private float *get_window (VorbisDecoder f, int len) {
2398   len <<= 1;
2399   if (len == f.blocksize_0) return f.window.ptr[0];
2400   if (len == f.blocksize_1) return f.window.ptr[1];
2401   assert(0);
2402 }
2403 
2404 version(STB_VORBIS_NO_DEFER_FLOOR) {
2405   alias YTYPE = int;
2406 } else {
2407   alias YTYPE = short;
2408 }
2409 
2410 private int do_floor (VorbisDecoder f, Mapping* map, int i, int n, float* target, YTYPE* finalY, ubyte* step2_flag) {
2411   int n2 = n>>1;
2412   int s = map.chan[i].mux, floor;
2413   floor = map.submap_floor.ptr[s];
2414   if (f.floor_types.ptr[floor] == 0) {
2415     return error(f, STBVorbisError.invalid_stream);
2416   } else {
2417     Floor1* g = &f.floor_config[floor].floor1;
2418     int lx = 0, ly = finalY[0]*g.floor1_multiplier;
2419     foreach (immutable q; 1..g.values) {
2420       int j = g.sorted_order.ptr[q];
2421       version(STB_VORBIS_NO_DEFER_FLOOR) {
2422         auto cond = step2_flag[j];
2423       } else {
2424         auto cond = (finalY[j] >= 0);
2425       }
2426       if (cond) {
2427         int hy = finalY[j]*g.floor1_multiplier;
2428         int hx = g.Xlist.ptr[j];
2429         if (lx != hx) { mixin(draw_line!("target", "lx", "ly", "hx", "hy", "n2")); }
2430         lx = hx; ly = hy;
2431       }
2432     }
2433     if (lx < n2) {
2434       // optimization of: draw_line(target, lx, ly, n, ly, n2);
2435       foreach (immutable j; lx..n2) { mixin(LINE_OP!("target[j]", "inverse_db_table[ly]")); }
2436     }
2437   }
2438   return true;
2439 }
2440 
2441 // The meaning of "left" and "right"
2442 //
2443 // For a given frame:
2444 //     we compute samples from 0..n
2445 //     window_center is n/2
2446 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2447 //     all of the samples from left_end to right_start can be output without mixing; however,
2448 //        this interval is 0-length except when transitioning between short and long frames
2449 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2450 //        which we don't have, so those get saved in a buffer
2451 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2452 //        has to be the same as frame N+1's left_end-left_start (which they are by
2453 //        construction)
2454 
2455 private int vorbis_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
2456   Mode *m;
2457   int i, n, prev, next, window_center;
2458   f.channel_buffer_start = f.channel_buffer_end = 0;
2459 
2460  retry:
2461   if (f.eof) return false;
2462   if (!maybe_start_packet(f)) return false;
2463   // check packet type
2464   if (get_bits!1(f) != 0) {
2465     /+if (f.push_mode) return error(f, STBVorbisError.bad_packet_type);+/
2466     while (EOP != get8_packet(f)) {}
2467     goto retry;
2468   }
2469 
2470   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2471 
2472   i = get_bits_main(f, ilog(f.mode_count-1));
2473   if (i == EOP) return false;
2474   if (i >= f.mode_count) return false;
2475   *mode = i;
2476   m = f.mode_config.ptr+i;
2477   if (m.blockflag) {
2478     n = f.blocksize_1;
2479     prev = get_bits!1(f);
2480     next = get_bits!1(f);
2481   } else {
2482     prev = next = 0;
2483     n = f.blocksize_0;
2484   }
2485 
2486   // WINDOWING
2487   window_center = n>>1;
2488   if (m.blockflag && !prev) {
2489     *p_left_start = (n-f.blocksize_0)>>2;
2490     *p_left_end   = (n+f.blocksize_0)>>2;
2491   } else {
2492     *p_left_start = 0;
2493     *p_left_end   = window_center;
2494   }
2495   if (m.blockflag && !next) {
2496     *p_right_start = (n*3-f.blocksize_0)>>2;
2497     *p_right_end   = (n*3+f.blocksize_0)>>2;
2498   } else {
2499     *p_right_start = window_center;
2500     *p_right_end   = n;
2501   }
2502   return true;
2503 }
2504 
2505 private int vorbis_decode_packet_rest (VorbisDecoder f, int* len, Mode* m, int left_start, int left_end, int right_start, int right_end, int* p_left) {
2506   import core.stdc..string : memcpy, memset;
2507 
2508   Mapping* map;
2509   int n, n2;
2510   int[256] zero_channel;
2511   int[256] really_zero_channel;
2512 
2513   // WINDOWING
2514   n = f.blocksize.ptr[m.blockflag];
2515   map = &f.mapping[m.mapping];
2516 
2517   // FLOORS
2518   n2 = n>>1;
2519 
2520   //stb_prof(1);
2521   foreach (immutable i; 0..f.vrchannels) {
2522     int s = map.chan[i].mux, floor;
2523     zero_channel[i] = false;
2524     floor = map.submap_floor.ptr[s];
2525     if (f.floor_types.ptr[floor] == 0) {
2526       return error(f, STBVorbisError.invalid_stream);
2527     } else {
2528       Floor1* g = &f.floor_config[floor].floor1;
2529       if (get_bits!1(f)) {
2530         short* finalY;
2531         ubyte[256] step2_flag = void;
2532         immutable int[4] range_list = [ 256, 128, 86, 64 ];
2533         int range = range_list[g.floor1_multiplier-1];
2534         int offset = 2;
2535         finalY = f.finalY.ptr[i];
2536         finalY[0] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2537         finalY[1] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2538         foreach (immutable j; 0..g.partitions) {
2539           int pclass = g.partition_class_list.ptr[j];
2540           int cdim = g.class_dimensions.ptr[pclass];
2541           int cbits = g.class_subclasses.ptr[pclass];
2542           int csub = (1<<cbits)-1;
2543           int cval = 0;
2544           if (cbits) {
2545             Codebook *cc = f.codebooks+g.class_masterbooks.ptr[pclass];
2546             mixin(DECODE!("cval", "cc"));
2547           }
2548           foreach (immutable k; 0..cdim) {
2549             int book = g.subclass_books.ptr[pclass].ptr[cval&csub];
2550             cval = cval>>cbits;
2551             if (book >= 0) {
2552               int temp;
2553               Codebook *cc = f.codebooks+book;
2554               mixin(DECODE!("temp", "cc"));
2555               finalY[offset++] = cast(short)temp; //k8
2556             } else {
2557               finalY[offset++] = 0;
2558             }
2559           }
2560         }
2561         if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2562         step2_flag[0] = step2_flag[1] = 1;
2563         foreach (immutable j; 2..g.values) {
2564           int low = g.neighbors.ptr[j].ptr[0];
2565           int high = g.neighbors.ptr[j].ptr[1];
2566           //neighbors(g.Xlist, j, &low, &high);
2567           int pred = void;
2568           mixin(predict_point!("pred", "g.Xlist.ptr[j]", "g.Xlist.ptr[low]", "g.Xlist.ptr[high]", "finalY[low]", "finalY[high]"));
2569           int val = finalY[j];
2570           int highroom = range-pred;
2571           int lowroom = pred;
2572           auto room = (highroom < lowroom ? highroom : lowroom)*2;
2573           if (val) {
2574             step2_flag[low] = step2_flag[high] = 1;
2575             step2_flag[j] = 1;
2576             if (val >= room) {
2577               finalY[j] = cast(short)(highroom > lowroom ? val-lowroom+pred : pred-val+highroom-1); //k8
2578             } else {
2579               finalY[j] = cast(short)(val&1 ? pred-((val+1)>>1) : pred+(val>>1)); //k8
2580             }
2581           } else {
2582             step2_flag[j] = 0;
2583             finalY[j] = cast(short)pred; //k8
2584           }
2585         }
2586 
2587         version(STB_VORBIS_NO_DEFER_FLOOR) {
2588           do_floor(f, map, i, n, f.floor_buffers.ptr[i], finalY, step2_flag);
2589         } else {
2590           // defer final floor computation until _after_ residue
2591           foreach (immutable j; 0..g.values) if (!step2_flag[j]) finalY[j] = -1;
2592         }
2593       } else {
2594   error:
2595         zero_channel[i] = true;
2596       }
2597       // So we just defer everything else to later
2598       // at this point we've decoded the floor into buffer
2599     }
2600   }
2601   //stb_prof(0);
2602   // at this point we've decoded all floors
2603 
2604   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2605 
2606   // re-enable coupled channels if necessary
2607   memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof*f.vrchannels);
2608   foreach (immutable i; 0..map.coupling_steps) {
2609     if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2610       zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = false;
2611     }
2612   }
2613 
2614   // RESIDUE DECODE
2615   foreach (immutable i; 0..map.submaps) {
2616     float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2617     ubyte[256] do_not_decode = void;
2618     int ch = 0;
2619     foreach (immutable j; 0..f.vrchannels) {
2620       if (map.chan[j].mux == i) {
2621         if (zero_channel[j]) {
2622           do_not_decode[ch] = true;
2623           residue_buffers.ptr[ch] = null;
2624         } else {
2625           do_not_decode[ch] = false;
2626           residue_buffers.ptr[ch] = f.channel_buffers.ptr[j];
2627         }
2628         ++ch;
2629       }
2630     }
2631     int r = map.submap_residue.ptr[i];
2632     decode_residue(f, residue_buffers, ch, n2, r, do_not_decode.ptr);
2633   }
2634 
2635   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2636 
2637    // INVERSE COUPLING
2638   //stb_prof(14);
2639   foreach_reverse (immutable i; 0..map.coupling_steps) {
2640     int n2n = n>>1;
2641     float* mm = f.channel_buffers.ptr[map.chan[i].magnitude];
2642     float* a = f.channel_buffers.ptr[map.chan[i].angle];
2643     foreach (immutable j; 0..n2n) {
2644       float a2, m2;
2645       if (mm[j] > 0) {
2646         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]-a[j]; } else { a2 = mm[j]; m2 = mm[j]+a[j]; }
2647       } else {
2648         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]+a[j]; } else { a2 = mm[j]; m2 = mm[j]-a[j]; }
2649       }
2650       mm[j] = m2;
2651       a[j] = a2;
2652     }
2653   }
2654 
2655   // finish decoding the floors
2656   version(STB_VORBIS_NO_DEFER_FLOOR) {
2657     foreach (immutable i; 0..f.vrchannels) {
2658       if (really_zero_channel[i]) {
2659         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2660       } else {
2661         foreach (immutable j; 0..n2) f.channel_buffers.ptr[i].ptr[j] *= f.floor_buffers.ptr[i].ptr[j];
2662       }
2663     }
2664   } else {
2665     //stb_prof(15);
2666     foreach (immutable i; 0..f.vrchannels) {
2667       if (really_zero_channel[i]) {
2668         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2669       } else {
2670         do_floor(f, map, i, n, f.channel_buffers.ptr[i], f.finalY.ptr[i], null);
2671       }
2672     }
2673   }
2674 
2675   // INVERSE MDCT
2676   //stb_prof(16);
2677   foreach (immutable i; 0..f.vrchannels) inverse_mdct(f.channel_buffers.ptr[i], n, f, m.blockflag);
2678   //stb_prof(0);
2679 
2680   // this shouldn't be necessary, unless we exited on an error
2681   // and want to flush to get to the next packet
2682   flush_packet(f);
2683 
2684   if (f.first_decode) {
2685     // assume we start so first non-discarded sample is sample 0
2686     // this isn't to spec, but spec would require us to read ahead
2687     // and decode the size of all current frames--could be done,
2688     // but presumably it's not a commonly used feature
2689     f.current_loc = -n2; // start of first frame is positioned for discard
2690     // we might have to discard samples "from" the next frame too,
2691     // if we're lapping a large block then a small at the start?
2692     f.discard_samples_deferred = n-right_end;
2693     f.current_loc_valid = true;
2694     f.first_decode = false;
2695   } else if (f.discard_samples_deferred) {
2696     if (f.discard_samples_deferred >= right_start-left_start) {
2697       f.discard_samples_deferred -= (right_start-left_start);
2698       left_start = right_start;
2699       *p_left = left_start;
2700     } else {
2701       left_start += f.discard_samples_deferred;
2702       *p_left = left_start;
2703       f.discard_samples_deferred = 0;
2704     }
2705   } else if (f.previous_length == 0 && f.current_loc_valid) {
2706     // we're recovering from a seek... that means we're going to discard
2707     // the samples from this packet even though we know our position from
2708     // the last page header, so we need to update the position based on
2709     // the discarded samples here
2710     // but wait, the code below is going to add this in itself even
2711     // on a discard, so we don't need to do it here...
2712   }
2713 
2714   // check if we have ogg information about the sample # for this packet
2715   if (f.last_seg_which == f.end_seg_with_known_loc) {
2716     // if we have a valid current loc, and this is final:
2717     if (f.current_loc_valid && (f.page_flag&PAGEFLAG_last_page)) {
2718       uint current_end = f.known_loc_for_packet-(n-right_end);
2719       // then let's infer the size of the (probably) short final frame
2720       if (current_end < f.current_loc+right_end) {
2721         if (current_end < f.current_loc+(right_end-left_start)) {
2722           // negative truncation, that's impossible!
2723           *len = 0;
2724         } else {
2725           *len = current_end-f.current_loc;
2726         }
2727         *len += left_start;
2728         if (*len > right_end) *len = right_end; // this should never happen
2729         f.current_loc += *len;
2730         return true;
2731       }
2732     }
2733     // otherwise, just set our sample loc
2734     // guess that the ogg granule pos refers to the _middle_ of the
2735     // last frame?
2736     // set f.current_loc to the position of left_start
2737     f.current_loc = f.known_loc_for_packet-(n2-left_start);
2738     f.current_loc_valid = true;
2739   }
2740   if (f.current_loc_valid) f.current_loc += (right_start-left_start);
2741 
2742   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2743 
2744   *len = right_end;  // ignore samples after the window goes to 0
2745   return true;
2746 }
2747 
2748 private int vorbis_decode_packet (VorbisDecoder f, int* len, int* p_left, int* p_right) {
2749   int mode, left_end, right_end;
2750   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2751   return vorbis_decode_packet_rest(f, len, f.mode_config.ptr+mode, *p_left, left_end, *p_right, right_end, p_left);
2752 }
2753 
2754 private int vorbis_finish_frame (VorbisDecoder f, int len, int left, int right) {
2755   // we use right&left (the start of the right- and left-window sin()-regions)
2756   // to determine how much to return, rather than inferring from the rules
2757   // (same result, clearer code); 'left' indicates where our sin() window
2758   // starts, therefore where the previous window's right edge starts, and
2759   // therefore where to start mixing from the previous buffer. 'right'
2760   // indicates where our sin() ending-window starts, therefore that's where
2761   // we start saving, and where our returned-data ends.
2762 
2763   // mixin from previous window
2764   if (f.previous_length) {
2765     int n = f.previous_length;
2766     float *w = get_window(f, n);
2767     foreach (immutable i; 0..f.vrchannels) {
2768       foreach (immutable j; 0..n) {
2769         (f.channel_buffers.ptr[i])[left+j] =
2770           (f.channel_buffers.ptr[i])[left+j]*w[    j]+
2771           (f.previous_window.ptr[i])[     j]*w[n-1-j];
2772       }
2773     }
2774   }
2775 
2776   auto prev = f.previous_length;
2777 
2778   // last half of this data becomes previous window
2779   f.previous_length = len-right;
2780 
2781   // @OPTIMIZE: could avoid this copy by double-buffering the
2782   // output (flipping previous_window with channel_buffers), but
2783   // then previous_window would have to be 2x as large, and
2784   // channel_buffers couldn't be temp mem (although they're NOT
2785   // currently temp mem, they could be (unless we want to level
2786   // performance by spreading out the computation))
2787   foreach (immutable i; 0..f.vrchannels) {
2788     for (uint j = 0; right+j < len; ++j) (f.previous_window.ptr[i])[j] = (f.channel_buffers.ptr[i])[right+j];
2789   }
2790 
2791   if (!prev) {
2792     // there was no previous packet, so this data isn't valid...
2793     // this isn't entirely true, only the would-have-overlapped data
2794     // isn't valid, but this seems to be what the spec requires
2795     return 0;
2796   }
2797 
2798   // truncate a short frame
2799   if (len < right) right = len;
2800 
2801   f.samples_output += right-left;
2802 
2803   return right-left;
2804 }
2805 
2806 private bool vorbis_pump_first_frame (VorbisDecoder f) {
2807   int len, right, left;
2808   if (vorbis_decode_packet(f, &len, &left, &right)) {
2809     vorbis_finish_frame(f, len, left, right);
2810     return true;
2811   }
2812   return false;
2813 }
2814 
2815 /+ k8: i don't need that, so it's dead
2816 private int is_whole_packet_present (VorbisDecoder f, int end_page) {
2817   import core.stdc.string : memcmp;
2818 
2819   // make sure that we have the packet available before continuing...
2820   // this requires a full ogg parse, but we know we can fetch from f.stream
2821 
2822   // instead of coding this out explicitly, we could save the current read state,
2823   // read the next packet with get8() until end-of-packet, check f.eof, then
2824   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
2825   // of state to restore (primarily the page segment table)
2826 
2827   int s = f.next_seg, first = true;
2828   ubyte *p = f.stream;
2829 
2830   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
2831     for (; s < f.segment_count; ++s) {
2832       p += f.segments[s];
2833       if (f.segments[s] < 255) break; // stop at first short segment
2834     }
2835     // either this continues, or it ends it...
2836     if (end_page && s < f.segment_count-1) return error(f, STBVorbisError.invalid_stream);
2837     if (s == f.segment_count) s = -1; // set 'crosses page' flag
2838     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2839     first = false;
2840   }
2841   while (s == -1) {
2842     ubyte* q = void;
2843     int n = void;
2844     // check that we have the page header ready
2845     if (p+26 >= f.stream_end) return error(f, STBVorbisError.need_more_data);
2846     // validate the page
2847     if (memcmp(p, ogg_page_header.ptr, 4)) return error(f, STBVorbisError.invalid_stream);
2848     if (p[4] != 0) return error(f, STBVorbisError.invalid_stream);
2849     if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
2850       if (f.previous_length && (p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2851       // if no previous length, we're resynching, so we can come in on a continued-packet,
2852       // which we'll just drop
2853     } else {
2854       if (!(p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2855     }
2856     n = p[26]; // segment counts
2857     q = p+27; // q points to segment table
2858     p = q+n; // advance past header
2859     // make sure we've read the segment table
2860     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2861     for (s = 0; s < n; ++s) {
2862       p += q[s];
2863       if (q[s] < 255) break;
2864     }
2865     if (end_page && s < n-1) return error(f, STBVorbisError.invalid_stream);
2866     if (s == n) s = -1; // set 'crosses page' flag
2867     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2868     first = false;
2869   }
2870   return true;
2871 }
2872 +/
2873 
2874 private int start_decoder (VorbisDecoder f) {
2875   import core.stdc..string : memcpy, memset;
2876 
2877   ubyte[6] header;
2878   ubyte x, y;
2879   int len, max_submaps = 0;
2880   int longest_floorlist = 0;
2881 
2882   // first page, first packet
2883 
2884   if (!start_page(f)) return false;
2885   // validate page flag
2886   if (!(f.page_flag&PAGEFLAG_first_page)) return error(f, STBVorbisError.invalid_first_page);
2887   if (f.page_flag&PAGEFLAG_last_page) return error(f, STBVorbisError.invalid_first_page);
2888   if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.invalid_first_page);
2889   // check for expected packet length
2890   if (f.segment_count != 1) return error(f, STBVorbisError.invalid_first_page);
2891   if (f.segments[0] != 30) return error(f, STBVorbisError.invalid_first_page);
2892   // read packet
2893   // check packet header
2894   if (get8(f) != VorbisPacket.id) return error(f, STBVorbisError.invalid_first_page);
2895   if (!getn(f, header.ptr, 6)) return error(f, STBVorbisError.unexpected_eof);
2896   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_first_page);
2897   // vorbis_version
2898   if (get32(f) != 0) return error(f, STBVorbisError.invalid_first_page);
2899   f.vrchannels = get8(f); if (!f.vrchannels) return error(f, STBVorbisError.invalid_first_page);
2900   if (f.vrchannels > STB_VORBIS_MAX_CHANNELS) return error(f, STBVorbisError.too_many_channels);
2901   f.sample_rate = get32(f); if (!f.sample_rate) return error(f, STBVorbisError.invalid_first_page);
2902   get32(f); // bitrate_maximum
2903   get32(f); // bitrate_nominal
2904   get32(f); // bitrate_minimum
2905   x = get8(f);
2906   {
2907     int log0 = x&15;
2908     int log1 = x>>4;
2909     f.blocksize_0 = 1<<log0;
2910     f.blocksize_1 = 1<<log1;
2911     if (log0 < 6 || log0 > 13) return error(f, STBVorbisError.invalid_setup);
2912     if (log1 < 6 || log1 > 13) return error(f, STBVorbisError.invalid_setup);
2913     if (log0 > log1) return error(f, STBVorbisError.invalid_setup);
2914   }
2915 
2916   // framing_flag
2917   x = get8(f);
2918   if (!(x&1)) return error(f, STBVorbisError.invalid_first_page);
2919 
2920   // second packet! (comments)
2921   if (!start_page(f)) return false;
2922 
2923   // read comments
2924   if (!start_packet(f)) return false;
2925 
2926   if (f.read_comments) {
2927     /+if (f.push_mode) {
2928       if (!is_whole_packet_present(f, true)) {
2929         // convert error in ogg header to write type
2930         if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2931         return false;
2932       }
2933     }+/
2934     if (get8_packet(f) != VorbisPacket.comment) return error(f, STBVorbisError.invalid_setup);
2935     foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2936     if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2937 
2938     // skip vendor id
2939     uint vidsize = get32_packet(f);
2940     //{ import core.stdc.stdio; printf("vendor size: %u\n", vidsize); }
2941     if (vidsize == EOP) return error(f, STBVorbisError.invalid_setup);
2942     while (vidsize--) get8_packet(f);
2943 
2944     // read comments section
2945     uint cmtcount = get32_packet(f);
2946     if (cmtcount == EOP) return error(f, STBVorbisError.invalid_setup);
2947     if (cmtcount > 0) {
2948       uint cmtsize = 32768; // this should be enough for everyone
2949       f.comment_data = setup_malloc!ubyte(f, cmtsize);
2950       if (f.comment_data is null) return error(f, STBVorbisError.outofmem);
2951       auto cmtpos = 0;
2952       auto d = f.comment_data;
2953       while (cmtcount--) {
2954         uint linelen = get32_packet(f);
2955         //{ import core.stdc.stdio; printf("linelen: %u; lines left: %u\n", linelen, cmtcount); }
2956         if (linelen == EOP || linelen > ushort.max-2) break;
2957         if (linelen == 0) { continue; }
2958         if (cmtpos+2+linelen > cmtsize) break;
2959         cmtpos += linelen+2;
2960         *d++ = (linelen+2)&0xff;
2961         *d++ = ((linelen+2)>>8)&0xff;
2962         while (linelen--) {
2963           auto b = get8_packet(f);
2964           if (b == EOP) return error(f, STBVorbisError.outofmem);
2965           *d++ = cast(ubyte)b;
2966         }
2967         //{ import core.stdc.stdio; printf("%u bytes of comments read\n", cmtpos); }
2968         f.comment_size = cmtpos;
2969       }
2970     }
2971     flush_packet(f);
2972     f.comment_rewind();
2973   } else {
2974     // skip comments
2975     do {
2976       len = next_segment(f);
2977       skip(f, len);
2978       f.bytes_in_seg = 0;
2979     } while (len);
2980   }
2981 
2982   // third packet!
2983   if (!start_packet(f)) return false;
2984 
2985   /+if (f.push_mode) {
2986     if (!is_whole_packet_present(f, true)) {
2987       // convert error in ogg header to write type
2988       if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2989       return false;
2990     }
2991   }+/
2992 
2993   if (get8_packet(f) != VorbisPacket.setup) return error(f, STBVorbisError.invalid_setup);
2994   foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2995   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2996 
2997   // codebooks
2998   f.codebook_count = get_bits!8(f)+1;
2999   f.codebooks = setup_malloc!Codebook(f, f.codebook_count);
3000   static assert((*f.codebooks).sizeof == Codebook.sizeof);
3001   if (f.codebooks is null) return error(f, STBVorbisError.outofmem);
3002   memset(f.codebooks, 0, (*f.codebooks).sizeof*f.codebook_count);
3003   foreach (immutable i; 0..f.codebook_count) {
3004     uint* values;
3005     int ordered, sorted_count;
3006     int total = 0;
3007     ubyte* lengths;
3008     Codebook* c = f.codebooks+i;
3009     x = get_bits!8(f); if (x != 0x42) return error(f, STBVorbisError.invalid_setup);
3010     x = get_bits!8(f); if (x != 0x43) return error(f, STBVorbisError.invalid_setup);
3011     x = get_bits!8(f); if (x != 0x56) return error(f, STBVorbisError.invalid_setup);
3012     x = get_bits!8(f);
3013     c.dimensions = (get_bits!8(f)<<8)+x;
3014     x = get_bits!8(f);
3015     y = get_bits!8(f);
3016     c.entries = (get_bits!8(f)<<16)+(y<<8)+x;
3017     ordered = get_bits!1(f);
3018     c.sparse = (ordered ? 0 : get_bits!1(f));
3019 
3020     if (c.dimensions == 0 && c.entries != 0) return error(f, STBVorbisError.invalid_setup);
3021 
3022     if (c.sparse) {
3023       lengths = cast(ubyte*)setup_temp_malloc(f, c.entries);
3024     } else {
3025       lengths = c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3026     }
3027 
3028     if (lengths is null) return error(f, STBVorbisError.outofmem);
3029 
3030     if (ordered) {
3031       int current_entry = 0;
3032       int current_length = get_bits_add_no!5(f, 1);
3033       while (current_entry < c.entries) {
3034         int limit = c.entries-current_entry;
3035         int n = get_bits_main(f, ilog(limit));
3036         if (current_entry+n > cast(int)c.entries) return error(f, STBVorbisError.invalid_setup);
3037         memset(lengths+current_entry, current_length, n);
3038         current_entry += n;
3039         ++current_length;
3040       }
3041     } else {
3042       foreach (immutable j; 0..c.entries) {
3043         int present = (c.sparse ? get_bits!1(f) : 1);
3044         if (present) {
3045           lengths[j] = get_bits_add_no!5(f, 1);
3046           ++total;
3047           if (lengths[j] == 32) return error(f, STBVorbisError.invalid_setup);
3048         } else {
3049           lengths[j] = NO_CODE;
3050         }
3051       }
3052     }
3053 
3054     if (c.sparse && total >= c.entries>>2) {
3055       // convert sparse items to non-sparse!
3056       if (c.entries > cast(int)f.setup_temp_memory_required) f.setup_temp_memory_required = c.entries;
3057       c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3058       if (c.codeword_lengths is null) return error(f, STBVorbisError.outofmem);
3059       memcpy(c.codeword_lengths, lengths, c.entries);
3060       setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
3061       lengths = c.codeword_lengths;
3062       c.sparse = 0;
3063     }
3064 
3065     // compute the size of the sorted tables
3066     if (c.sparse) {
3067       sorted_count = total;
3068     } else {
3069       sorted_count = 0;
3070       version(STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH) {} else {
3071         foreach (immutable j; 0..c.entries) if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) ++sorted_count;
3072       }
3073     }
3074 
3075     c.sorted_entries = sorted_count;
3076     values = null;
3077 
3078     if (!c.sparse) {
3079       c.codewords = setup_malloc!uint(f, c.entries);
3080       if (!c.codewords) return error(f, STBVorbisError.outofmem);
3081     } else {
3082       if (c.sorted_entries) {
3083         c.codeword_lengths = setup_malloc!ubyte(f, c.sorted_entries);
3084         if (!c.codeword_lengths) return error(f, STBVorbisError.outofmem);
3085         c.codewords = cast(uint*)setup_temp_malloc(f, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3086         if (!c.codewords) return error(f, STBVorbisError.outofmem);
3087         values = cast(uint*)setup_temp_malloc(f, cast(int)(*values).sizeof*c.sorted_entries);
3088         if (!values) return error(f, STBVorbisError.outofmem);
3089       }
3090       uint size = c.entries+cast(int)((*c.codewords).sizeof+(*values).sizeof)*c.sorted_entries;
3091       if (size > f.setup_temp_memory_required) f.setup_temp_memory_required = size;
3092     }
3093 
3094     if (!compute_codewords(c, lengths, c.entries, values)) {
3095       if (c.sparse) setup_temp_free(f, values, 0);
3096       return error(f, STBVorbisError.invalid_setup);
3097     }
3098 
3099     if (c.sorted_entries) {
3100       // allocate an extra slot for sentinels
3101       c.sorted_codewords = setup_malloc!uint(f, c.sorted_entries+1);
3102       if (c.sorted_codewords is null) return error(f, STBVorbisError.outofmem);
3103       // allocate an extra slot at the front so that c.sorted_values[-1] is defined
3104       // so that we can catch that case without an extra if
3105       c.sorted_values = setup_malloc!int(f, c.sorted_entries+1);
3106       if (c.sorted_values is null) return error(f, STBVorbisError.outofmem);
3107       ++c.sorted_values;
3108       c.sorted_values[-1] = -1;
3109       compute_sorted_huffman(c, lengths, values);
3110     }
3111 
3112     if (c.sparse) {
3113       setup_temp_free(f, values, cast(int)(*values).sizeof*c.sorted_entries);
3114       setup_temp_free(f, c.codewords, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3115       setup_temp_free(f, lengths, c.entries);
3116       c.codewords = null;
3117     }
3118 
3119     compute_accelerated_huffman(c);
3120 
3121     c.lookup_type = get_bits!4(f);
3122     if (c.lookup_type > 2) return error(f, STBVorbisError.invalid_setup);
3123     if (c.lookup_type > 0) {
3124       ushort* mults;
3125       c.minimum_value = float32_unpack(get_bits!32(f));
3126       c.delta_value = float32_unpack(get_bits!32(f));
3127       c.value_bits = get_bits_add_no!4(f, 1);
3128       c.sequence_p = get_bits!1(f);
3129       if (c.lookup_type == 1) {
3130         c.lookup_values = lookup1_values(c.entries, c.dimensions);
3131       } else {
3132         c.lookup_values = c.entries*c.dimensions;
3133       }
3134       if (c.lookup_values == 0) return error(f, STBVorbisError.invalid_setup);
3135       mults = cast(ushort*)setup_temp_malloc(f, cast(int)(mults[0]).sizeof*c.lookup_values);
3136       if (mults is null) return error(f, STBVorbisError.outofmem);
3137       foreach (immutable j; 0..cast(int)c.lookup_values) {
3138         int q = get_bits_main(f, c.value_bits);
3139         if (q == EOP) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.invalid_setup); }
3140         mults[j] = cast(ushort)q; //k8
3141       }
3142 
3143       version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
3144         if (c.lookup_type == 1) {
3145           int sparse = c.sparse; //len
3146           float last = 0;
3147           // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3148           if (sparse) {
3149             if (c.sorted_entries == 0) goto skip;
3150             c.multiplicands = setup_malloc!codetype(f, c.sorted_entries*c.dimensions);
3151           } else {
3152             c.multiplicands = setup_malloc!codetype(f, c.entries*c.dimensions);
3153           }
3154           if (c.multiplicands is null) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3155           foreach (immutable j; 0..(sparse ? c.sorted_entries : c.entries)) {
3156             uint z = (sparse ? c.sorted_values[j] : j);
3157             uint div = 1;
3158             foreach (immutable k; 0..c.dimensions) {
3159               int off = (z/div)%c.lookup_values;
3160               float val = mults[off];
3161               val = val*c.delta_value+c.minimum_value+last;
3162               c.multiplicands[j*c.dimensions+k] = val;
3163               if (c.sequence_p) last = val;
3164               if (k+1 < c.dimensions) {
3165                  if (div > uint.max/cast(uint)c.lookup_values) {
3166                     setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3167                     return error(f, STBVorbisError.invalid_setup);
3168                  }
3169                  div *= c.lookup_values;
3170               }
3171             }
3172           }
3173           c.lookup_type = 2;
3174           goto skip;
3175         }
3176         //else
3177       }
3178       {
3179         float last = 0;
3180         c.multiplicands = setup_malloc!codetype(f, c.lookup_values);
3181         if (c.multiplicands is null) { setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3182         foreach (immutable j; 0..cast(int)c.lookup_values) {
3183           float val = mults[j]*c.delta_value+c.minimum_value+last;
3184           c.multiplicands[j] = val;
3185           if (c.sequence_p) last = val;
3186         }
3187       }
3188      //version(STB_VORBIS_DIVIDES_IN_CODEBOOK)
3189      skip: // this is versioned out in C
3190       setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3191     }
3192   }
3193 
3194   // time domain transfers (notused)
3195   x = get_bits_add_no!6(f, 1);
3196   foreach (immutable i; 0..x) {
3197     auto z = get_bits!16(f);
3198     if (z != 0) return error(f, STBVorbisError.invalid_setup);
3199   }
3200 
3201   // Floors
3202   f.floor_count = get_bits_add_no!6(f, 1);
3203   f.floor_config = setup_malloc!Floor(f, f.floor_count);
3204   if (f.floor_config is null) return error(f, STBVorbisError.outofmem);
3205   foreach (immutable i; 0..f.floor_count) {
3206     f.floor_types[i] = get_bits!16(f);
3207     if (f.floor_types[i] > 1) return error(f, STBVorbisError.invalid_setup);
3208     if (f.floor_types[i] == 0) {
3209       Floor0* g = &f.floor_config[i].floor0;
3210       g.order = get_bits!8(f);
3211       g.rate = get_bits!16(f);
3212       g.bark_map_size = get_bits!16(f);
3213       g.amplitude_bits = get_bits!6(f);
3214       g.amplitude_offset = get_bits!8(f);
3215       g.number_of_books = get_bits_add_no!4(f, 1);
3216       foreach (immutable j; 0..g.number_of_books) g.book_list[j] = get_bits!8(f);
3217       return error(f, STBVorbisError.feature_not_supported);
3218     } else {
3219       Point[31*8+2] p;
3220       Floor1 *g = &f.floor_config[i].floor1;
3221       int max_class = -1;
3222       g.partitions = get_bits!5(f);
3223       foreach (immutable j; 0..g.partitions) {
3224         g.partition_class_list[j] = get_bits!4(f);
3225         if (g.partition_class_list[j] > max_class) max_class = g.partition_class_list[j];
3226       }
3227       foreach (immutable j; 0..max_class+1) {
3228         g.class_dimensions[j] = get_bits_add_no!3(f, 1);
3229         g.class_subclasses[j] = get_bits!2(f);
3230         if (g.class_subclasses[j]) {
3231           g.class_masterbooks[j] = get_bits!8(f);
3232           if (g.class_masterbooks[j] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3233         }
3234         foreach (immutable k; 0..1<<g.class_subclasses[j]) {
3235           g.subclass_books[j].ptr[k] = get_bits!8(f)-1;
3236           if (g.subclass_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3237         }
3238       }
3239       g.floor1_multiplier = get_bits_add_no!2(f, 1);
3240       g.rangebits = get_bits!4(f);
3241       g.Xlist[0] = 0;
3242       g.Xlist[1] = cast(ushort)(1<<g.rangebits); //k8
3243       g.values = 2;
3244       foreach (immutable j; 0..g.partitions) {
3245         int c = g.partition_class_list[j];
3246         foreach (immutable k; 0..g.class_dimensions[c]) {
3247           g.Xlist[g.values] = cast(ushort)get_bits_main(f, g.rangebits); //k8
3248           ++g.values;
3249         }
3250       }
3251       assert(g.values <= ushort.max);
3252       // precompute the sorting
3253       foreach (ushort j; 0..cast(ushort)g.values) {
3254         p[j].x = g.Xlist[j];
3255         p[j].y = j;
3256       }
3257       qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
3258       foreach (uint j; 0..g.values) g.sorted_order.ptr[j] = cast(ubyte)p.ptr[j].y;
3259       // precompute the neighbors
3260       foreach (uint j; 2..g.values) {
3261         ushort low = void, hi = void;
3262         neighbors(g.Xlist.ptr, j, &low, &hi);
3263         assert(low <= ubyte.max);
3264         assert(hi <= ubyte.max);
3265         g.neighbors[j].ptr[0] = cast(ubyte)low;
3266         g.neighbors[j].ptr[1] = cast(ubyte)hi;
3267       }
3268       if (g.values > longest_floorlist) longest_floorlist = g.values;
3269     }
3270   }
3271 
3272   // Residue
3273   f.residue_count = get_bits_add_no!6(f, 1);
3274   f.residue_config = setup_malloc!Residue(f, f.residue_count);
3275   if (f.residue_config is null) return error(f, STBVorbisError.outofmem);
3276   memset(f.residue_config, 0, f.residue_count*(f.residue_config[0]).sizeof);
3277   foreach (immutable i; 0..f.residue_count) {
3278     ubyte[64] residue_cascade;
3279     Residue* r = f.residue_config+i;
3280     f.residue_types[i] = get_bits!16(f);
3281     if (f.residue_types[i] > 2) return error(f, STBVorbisError.invalid_setup);
3282     r.begin = get_bits!24(f);
3283     r.end = get_bits!24(f);
3284     if (r.end < r.begin) return error(f, STBVorbisError.invalid_setup);
3285     r.part_size = get_bits_add_no!24(f, 1);
3286     r.classifications = get_bits_add_no!6(f, 1);
3287     r.classbook = get_bits!8(f);
3288     if (r.classbook >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3289     foreach (immutable j; 0..r.classifications) {
3290       ubyte high_bits = 0;
3291       ubyte low_bits = get_bits!3(f);
3292       if (get_bits!1(f)) high_bits = get_bits!5(f);
3293       assert(high_bits*8+low_bits <= ubyte.max);
3294       residue_cascade[j] = cast(ubyte)(high_bits*8+low_bits);
3295     }
3296     static assert(r.residue_books[0].sizeof == 16);
3297     r.residue_books = setup_malloc!(short[8])(f, r.classifications);
3298     if (r.residue_books is null) return error(f, STBVorbisError.outofmem);
3299     foreach (immutable j; 0..r.classifications) {
3300       foreach (immutable k; 0..8) {
3301         if (residue_cascade[j]&(1<<k)) {
3302           r.residue_books[j].ptr[k] = get_bits!8(f);
3303           if (r.residue_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3304         } else {
3305           r.residue_books[j].ptr[k] = -1;
3306         }
3307       }
3308     }
3309     // precompute the classifications[] array to avoid inner-loop mod/divide
3310     // call it 'classdata' since we already have r.classifications
3311     r.classdata = setup_malloc!(ubyte*)(f, f.codebooks[r.classbook].entries);
3312     if (!r.classdata) return error(f, STBVorbisError.outofmem);
3313     memset(r.classdata, 0, (*r.classdata).sizeof*f.codebooks[r.classbook].entries);
3314     foreach (immutable j; 0..f.codebooks[r.classbook].entries) {
3315       int classwords = f.codebooks[r.classbook].dimensions;
3316       int temp = j;
3317       r.classdata[j] = setup_malloc!ubyte(f, classwords);
3318       if (r.classdata[j] is null) return error(f, STBVorbisError.outofmem);
3319       foreach_reverse (immutable k; 0..classwords) {
3320         assert(temp%r.classifications >= 0 && temp%r.classifications <= ubyte.max);
3321         r.classdata[j][k] = cast(ubyte)(temp%r.classifications);
3322         temp /= r.classifications;
3323       }
3324     }
3325   }
3326 
3327   f.mapping_count = get_bits_add_no!6(f, 1);
3328   f.mapping = setup_malloc!Mapping(f, f.mapping_count);
3329   if (f.mapping is null) return error(f, STBVorbisError.outofmem);
3330   memset(f.mapping, 0, f.mapping_count*(*f.mapping).sizeof);
3331   foreach (immutable i; 0..f.mapping_count) {
3332     Mapping* m = f.mapping+i;
3333     int mapping_type = get_bits!16(f);
3334     if (mapping_type != 0) return error(f, STBVorbisError.invalid_setup);
3335     m.chan = setup_malloc!MappingChannel(f, f.vrchannels);
3336     if (m.chan is null) return error(f, STBVorbisError.outofmem);
3337     m.submaps = (get_bits!1(f) ? get_bits_add_no!4(f, 1) : 1);
3338     if (m.submaps > max_submaps) max_submaps = m.submaps;
3339     if (get_bits!1(f)) {
3340       m.coupling_steps = get_bits_add_no!8(f, 1);
3341       foreach (immutable k; 0..m.coupling_steps) {
3342         m.chan[k].magnitude = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3343         m.chan[k].angle = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3344         if (m.chan[k].magnitude >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3345         if (m.chan[k].angle     >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3346         if (m.chan[k].magnitude == m.chan[k].angle) return error(f, STBVorbisError.invalid_setup);
3347       }
3348     } else {
3349       m.coupling_steps = 0;
3350     }
3351 
3352     // reserved field
3353     if (get_bits!2(f)) return error(f, STBVorbisError.invalid_setup);
3354     if (m.submaps > 1) {
3355       foreach (immutable j; 0..f.vrchannels) {
3356         m.chan[j].mux = get_bits!4(f);
3357         if (m.chan[j].mux >= m.submaps) return error(f, STBVorbisError.invalid_setup);
3358       }
3359     } else {
3360       // @SPECIFICATION: this case is missing from the spec
3361       foreach (immutable j; 0..f.vrchannels) m.chan[j].mux = 0;
3362     }
3363     foreach (immutable j; 0..m.submaps) {
3364       get_bits!8(f); // discard
3365       m.submap_floor[j] = get_bits!8(f);
3366       m.submap_residue[j] = get_bits!8(f);
3367       if (m.submap_floor[j] >= f.floor_count) return error(f, STBVorbisError.invalid_setup);
3368       if (m.submap_residue[j] >= f.residue_count) return error(f, STBVorbisError.invalid_setup);
3369     }
3370   }
3371 
3372   // Modes
3373   f.mode_count = get_bits_add_no!6(f, 1);
3374   foreach (immutable i; 0..f.mode_count) {
3375     Mode* m = f.mode_config.ptr+i;
3376     m.blockflag = get_bits!1(f);
3377     m.windowtype = get_bits!16(f);
3378     m.transformtype = get_bits!16(f);
3379     m.mapping = get_bits!8(f);
3380     if (m.windowtype != 0) return error(f, STBVorbisError.invalid_setup);
3381     if (m.transformtype != 0) return error(f, STBVorbisError.invalid_setup);
3382     if (m.mapping >= f.mapping_count) return error(f, STBVorbisError.invalid_setup);
3383   }
3384 
3385   flush_packet(f);
3386 
3387   f.previous_length = 0;
3388 
3389   foreach (immutable i; 0..f.vrchannels) {
3390     f.channel_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1);
3391     f.previous_window.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3392     f.finalY.ptr[i]          = setup_malloc!short(f, longest_floorlist);
3393     if (f.channel_buffers.ptr[i] is null || f.previous_window.ptr[i] is null || f.finalY.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3394     version(STB_VORBIS_NO_DEFER_FLOOR) {
3395       f.floor_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3396       if (f.floor_buffers.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3397     }
3398   }
3399 
3400   if (!init_blocksize(f, 0, f.blocksize_0)) return false;
3401   if (!init_blocksize(f, 1, f.blocksize_1)) return false;
3402   f.blocksize.ptr[0] = f.blocksize_0;
3403   f.blocksize.ptr[1] = f.blocksize_1;
3404 
3405   version(STB_VORBIS_DIVIDE_TABLE) {
3406     if (integer_divide_table[1].ptr[1] == 0) {
3407       foreach (immutable i; 0..DIVTAB_NUMER) foreach (immutable j; 1..DIVTAB_DENOM) integer_divide_table[i].ptr[j] = i/j;
3408     }
3409   }
3410 
3411   // compute how much temporary memory is needed
3412 
3413   // 1.
3414   {
3415     uint imdct_mem = (f.blocksize_1*cast(uint)(float).sizeof>>1);
3416     uint classify_mem;
3417     int max_part_read = 0;
3418     foreach (immutable i; 0..f.residue_count) {
3419       Residue* r = f.residue_config+i;
3420       int n_read = r.end-r.begin;
3421       int part_read = n_read/r.part_size;
3422       if (part_read > max_part_read) max_part_read = part_read;
3423     }
3424     version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
3425       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(int*).sizeof);
3426     } else {
3427       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(ubyte*).sizeof);
3428     }
3429     f.temp_memory_required = classify_mem;
3430     if (imdct_mem > f.temp_memory_required) f.temp_memory_required = imdct_mem;
3431   }
3432 
3433   f.first_decode = true;
3434 
3435   /+
3436   if (f.alloc.alloc_buffer) {
3437     debug(stb_vorbis) assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3438     // check if there's enough temp memory so we don't error later
3439     if (f.setup_offset+ /*(*f).sizeof+*/ f.temp_memory_required > cast(uint)f.temp_offset) return error(f, STBVorbisError.outofmem);
3440   }
3441   +/
3442 
3443   f.first_audio_page_offset = f.fileOffset();
3444 
3445   return true;
3446 }
3447 
3448 /+
3449 private int vorbis_search_for_page_pushdata (VorbisDecoder f, ubyte* data, int data_len) {
3450   import core.stdc.string : memcmp;
3451 
3452   foreach (immutable i; 0..f.page_crc_tests) f.scan.ptr[i].bytes_done = 0;
3453 
3454   // if we have room for more scans, search for them first, because
3455   // they may cause us to stop early if their header is incomplete
3456   if (f.page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
3457     if (data_len < 4) return 0;
3458     data_len -= 3; // need to look for 4-byte sequence, so don't miss one that straddles a boundary
3459     foreach (immutable i; 0..data_len) {
3460       if (data[i] == 0x4f) {
3461         if (memcmp(data+i, ogg_page_header.ptr, 4) == 0) {
3462           // make sure we have the whole page header
3463           if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
3464             // only read up to this page start, so hopefully we'll
3465             // have the whole page header start next time
3466             data_len = i;
3467             break;
3468           }
3469           // ok, we have it all; compute the length of the page
3470           auto len = 27+data[i+26];
3471           foreach (immutable j; 0..data[i+26]) len += data[i+27+j];
3472           // scan everything up to the embedded crc (which we must 0)
3473           uint crc = 0;
3474           foreach (immutable j; 0..22) crc = crc32_update(crc, data[i+j]);
3475           // now process 4 0-bytes
3476           foreach (immutable j; 22..26) crc = crc32_update(crc, 0);
3477           // len is the total number of bytes we need to scan
3478           auto n = f.page_crc_tests++;
3479           f.scan.ptr[n].bytes_left = len-/*j*/26;
3480           f.scan.ptr[n].crc_so_far = crc;
3481           f.scan.ptr[n].goal_crc = data[i+22]+(data[i+23]<<8)+(data[i+24]<<16)+(data[i+25]<<24);
3482           // if the last frame on a page is continued to the next, then
3483           // we can't recover the sample_loc immediately
3484           if (data[i+27+data[i+26]-1] == 255) {
3485             f.scan.ptr[n].sample_loc = ~0;
3486           } else {
3487             f.scan.ptr[n].sample_loc = data[i+6]+(data[i+7]<<8)+(data[i+8]<<16)+(data[i+9]<<24);
3488           }
3489           f.scan.ptr[n].bytes_done = i+26/*j*/;
3490           if (f.page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) break;
3491           // keep going if we still have room for more
3492         }
3493       }
3494     }
3495   }
3496 
3497   for (uint i = 0; i < f.page_crc_tests; ) {
3498     int nn = f.scan.ptr[i].bytes_done;
3499     int m = f.scan.ptr[i].bytes_left;
3500     if (m > data_len-nn) m = data_len-nn;
3501     // m is the bytes to scan in the current chunk
3502     uint crc = f.scan.ptr[i].crc_so_far;
3503     foreach (immutable j; 0..m) crc = crc32_update(crc, data[nn+j]);
3504     f.scan.ptr[i].bytes_left -= m;
3505     f.scan.ptr[i].crc_so_far = crc;
3506     if (f.scan.ptr[i].bytes_left == 0) {
3507       // does it match?
3508       if (f.scan.ptr[i].crc_so_far == f.scan.ptr[i].goal_crc) {
3509         // Houston, we have page
3510         data_len = nn+m; // consumption amount is wherever that scan ended
3511         f.page_crc_tests = -1; // drop out of page scan mode
3512         f.previous_length = 0; // decode-but-don't-output one frame
3513         f.next_seg = -1;       // start a new page
3514         f.current_loc = f.scan.ptr[i].sample_loc; // set the current sample location to the amount we'd have decoded had we decoded this page
3515         f.current_loc_valid = f.current_loc != ~0U;
3516         return data_len;
3517       }
3518       // delete entry
3519       f.scan.ptr[i] = f.scan.ptr[--f.page_crc_tests];
3520     } else {
3521       ++i;
3522     }
3523   }
3524 
3525   return data_len;
3526 }
3527 +/
3528 
3529 private uint vorbis_find_page (VorbisDecoder f, uint* end, uint* last) {
3530   for (;;) {
3531     if (f.eof) return 0;
3532     auto n = get8(f);
3533     if (n == 0x4f) { // page header candidate
3534       uint retry_loc = f.fileOffset;
3535       // check if we're off the end of a file_section stream
3536       if (retry_loc-25 > f.stream_len) return 0;
3537       // check the rest of the header
3538       int i = void;
3539       for (i = 1; i < 4; ++i) if (get8(f) != ogg_page_header[i]) break;
3540       if (f.eof) return 0;
3541       if (i == 4) {
3542         ubyte[27] header;
3543         //for (i=0; i < 4; ++i) header[i] = ogg_page_header[i];
3544         header[0..4] = cast(immutable(ubyte)[])ogg_page_header[0..4];
3545         for (i = 4; i < 27; ++i) header[i] = get8(f);
3546         if (f.eof) return 0;
3547         if (header[4] != 0) goto invalid;
3548         uint goal = header[22]+(header[23]<<8)+(header[24]<<16)+(header[25]<<24);
3549         for (i = 22; i < 26; ++i) header[i] = 0;
3550         uint crc = 0;
3551         for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]);
3552         uint len = 0;
3553         for (i = 0; i < header[26]; ++i) {
3554           auto s = get8(f);
3555           crc = crc32_update(crc, s);
3556           len += s;
3557         }
3558         if (len && f.eof) return 0;
3559         for (i = 0; i < len; ++i) crc = crc32_update(crc, get8(f));
3560         // finished parsing probable page
3561         if (crc == goal) {
3562           // we could now check that it's either got the last
3563           // page flag set, OR it's followed by the capture
3564           // pattern, but I guess TECHNICALLY you could have
3565           // a file with garbage between each ogg page and recover
3566           // from it automatically? So even though that paranoia
3567           // might decrease the chance of an invalid decode by
3568           // another 2^32, not worth it since it would hose those
3569           // invalid-but-useful files?
3570           if (end) *end = f.fileOffset;
3571           if (last) *last = (header[5]&0x04 ? 1 : 0);
3572           set_file_offset(f, retry_loc-1);
3573           return 1;
3574         }
3575       }
3576      invalid:
3577       // not a valid page, so rewind and look for next one
3578       set_file_offset(f, retry_loc);
3579     }
3580   }
3581   assert(0);
3582 }
3583 
3584 enum SAMPLE_unknown = 0xffffffff;
3585 
3586 // seeking is implemented with a binary search, which narrows down the range to
3587 // 64K, before using a linear search (because finding the synchronization
3588 // pattern can be expensive, and the chance we'd find the end page again is
3589 // relatively high for small ranges)
3590 //
3591 // two initial interpolation-style probes are used at the start of the search
3592 // to try to bound either side of the binary search sensibly, while still
3593 // working in O(log n) time if they fail.
3594 private int get_seek_page_info (VorbisDecoder f, ProbedPage* z) {
3595   ubyte[27] header;
3596   ubyte[255] lacing;
3597 
3598   // record where the page starts
3599   z.page_start = f.fileOffset;
3600 
3601   // parse the header
3602   getn(f, header.ptr, 27);
3603   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') return 0;
3604   getn(f, lacing.ptr, header[26]);
3605 
3606   // determine the length of the payload
3607   uint len = 0;
3608   foreach (immutable i; 0..header[26]) len += lacing[i];
3609 
3610   // this implies where the page ends
3611   z.page_end = z.page_start+27+header[26]+len;
3612 
3613   // read the last-decoded sample out of the data
3614   z.last_decoded_sample = header[6]+(header[7]<<8)+(header[8]<<16)+(header[9]<<24);
3615 
3616   // restore file state to where we were
3617   set_file_offset(f, z.page_start);
3618   return 1;
3619 }
3620 
3621 // rarely used function to seek back to the preceeding page while finding the start of a packet
3622 private int go_to_page_before (VorbisDecoder f, uint limit_offset) {
3623   uint previous_safe, end;
3624 
3625   // now we want to seek back 64K from the limit
3626   if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) {
3627     previous_safe = limit_offset-65536;
3628   } else {
3629     previous_safe = f.first_audio_page_offset;
3630   }
3631 
3632   set_file_offset(f, previous_safe);
3633 
3634   while (vorbis_find_page(f, &end, null)) {
3635     if (end >= limit_offset && f.fileOffset < limit_offset) return 1;
3636     set_file_offset(f, end);
3637   }
3638 
3639   return 0;
3640 }
3641 
3642 // implements the search logic for finding a page and starting decoding. if
3643 // the function succeeds, current_loc_valid will be true and current_loc will
3644 // be less than or equal to the provided sample number (the closer the
3645 // better).
3646 private int seek_to_sample_coarse (VorbisDecoder f, uint sample_number) {
3647   ProbedPage left, right, mid;
3648   int i, start_seg_with_known_loc, end_pos, page_start;
3649   uint delta, stream_length, padding;
3650   double offset, bytes_per_sample;
3651   int probe = 0;
3652 
3653   // find the last page and validate the target sample
3654   stream_length = f.streamLengthInSamples;
3655   if (stream_length == 0) return error(f, STBVorbisError.seek_without_length);
3656   if (sample_number > stream_length) return error(f, STBVorbisError.seek_invalid);
3657 
3658   // this is the maximum difference between the window-center (which is the
3659   // actual granule position value), and the right-start (which the spec
3660   // indicates should be the granule position (give or take one)).
3661   padding = ((f.blocksize_1-f.blocksize_0)>>2);
3662   if (sample_number < padding) sample_number = 0; else sample_number -= padding;
3663 
3664   left = f.p_first;
3665   while (left.last_decoded_sample == ~0U) {
3666     // (untested) the first page does not have a 'last_decoded_sample'
3667     set_file_offset(f, left.page_end);
3668     if (!get_seek_page_info(f, &left)) goto error;
3669   }
3670 
3671   right = f.p_last;
3672   debug(stb_vorbis) assert(right.last_decoded_sample != ~0U);
3673 
3674   // starting from the start is handled differently
3675   if (sample_number <= left.last_decoded_sample) {
3676     f.seekStart;
3677     return 1;
3678   }
3679 
3680   while (left.page_end != right.page_start) {
3681     debug(stb_vorbis) assert(left.page_end < right.page_start);
3682     // search range in bytes
3683     delta = right.page_start-left.page_end;
3684     if (delta <= 65536) {
3685       // there's only 64K left to search - handle it linearly
3686       set_file_offset(f, left.page_end);
3687     } else {
3688       if (probe < 2) {
3689         if (probe == 0) {
3690           // first probe (interpolate)
3691           double data_bytes = right.page_end-left.page_start;
3692           bytes_per_sample = data_bytes/right.last_decoded_sample;
3693           offset = left.page_start+bytes_per_sample*(sample_number-left.last_decoded_sample);
3694         } else {
3695           // second probe (try to bound the other side)
3696           double error = (cast(double)sample_number-mid.last_decoded_sample)*bytes_per_sample;
3697           if (error >= 0 && error <  8000) error =  8000;
3698           if (error <  0 && error > -8000) error = -8000;
3699           offset += error*2;
3700         }
3701 
3702         // ensure the offset is valid
3703         if (offset < left.page_end) offset = left.page_end;
3704         if (offset > right.page_start-65536) offset = right.page_start-65536;
3705 
3706         set_file_offset(f, cast(uint)offset);
3707       } else {
3708         // binary search for large ranges (offset by 32K to ensure
3709         // we don't hit the right page)
3710         set_file_offset(f, left.page_end+(delta/2)-32768);
3711       }
3712 
3713       if (!vorbis_find_page(f, null, null)) goto error;
3714     }
3715 
3716     for (;;) {
3717       if (!get_seek_page_info(f, &mid)) goto error;
3718       if (mid.last_decoded_sample != ~0U) break;
3719       // (untested) no frames end on this page
3720       set_file_offset(f, mid.page_end);
3721       debug(stb_vorbis) assert(mid.page_start < right.page_start);
3722     }
3723 
3724     // if we've just found the last page again then we're in a tricky file,
3725     // and we're close enough.
3726     if (mid.page_start == right.page_start) break;
3727 
3728     if (sample_number < mid.last_decoded_sample) right = mid; else left = mid;
3729 
3730     ++probe;
3731   }
3732 
3733   // seek back to start of the last packet
3734   page_start = left.page_start;
3735   set_file_offset(f, page_start);
3736   if (!start_page(f)) return error(f, STBVorbisError.seek_failed);
3737   end_pos = f.end_seg_with_known_loc;
3738   debug(stb_vorbis) assert(end_pos >= 0);
3739 
3740   for (;;) {
3741     for (i = end_pos; i > 0; --i) if (f.segments.ptr[i-1] != 255) break;
3742     start_seg_with_known_loc = i;
3743     if (start_seg_with_known_loc > 0 || !(f.page_flag&PAGEFLAG_continued_packet)) break;
3744     // (untested) the final packet begins on an earlier page
3745     if (!go_to_page_before(f, page_start)) goto error;
3746     page_start = f.fileOffset;
3747     if (!start_page(f)) goto error;
3748     end_pos = f.segment_count-1;
3749   }
3750 
3751   // prepare to start decoding
3752   f.current_loc_valid = false;
3753   f.last_seg = false;
3754   f.valid_bits = 0;
3755   f.packet_bytes = 0;
3756   f.bytes_in_seg = 0;
3757   f.previous_length = 0;
3758   f.next_seg = start_seg_with_known_loc;
3759 
3760   for (i = 0; i < start_seg_with_known_loc; ++i) skip(f, f.segments.ptr[i]);
3761 
3762   // start decoding (optimizable - this frame is generally discarded)
3763   if (!vorbis_pump_first_frame(f)) return 0;
3764   if (f.current_loc > sample_number) return error(f, STBVorbisError.seek_failed);
3765   return 1;
3766 
3767 error:
3768   // try to restore the file to a valid state
3769   f.seekStart;
3770   return error(f, STBVorbisError.seek_failed);
3771 }
3772 
3773 // the same as vorbis_decode_initial, but without advancing
3774 private int peek_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
3775   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) return 0;
3776 
3777   // either 1 or 2 bytes were read, figure out which so we can rewind
3778   int bits_read = 1+ilog(f.mode_count-1);
3779   if (f.mode_config.ptr[*mode].blockflag) bits_read += 2;
3780   int bytes_read = (bits_read+7)/8;
3781 
3782   f.bytes_in_seg += bytes_read;
3783   f.packet_bytes -= bytes_read;
3784   skip(f, -bytes_read);
3785   if (f.next_seg == -1) f.next_seg = f.segment_count-1; else --f.next_seg;
3786   f.valid_bits = 0;
3787 
3788   return 1;
3789 }
3790 
3791 // ////////////////////////////////////////////////////////////////////////// //
3792 // utility and supporting functions for getting s16 samples
3793 enum PLAYBACK_MONO  = (1<<0);
3794 enum PLAYBACK_LEFT  = (1<<1);
3795 enum PLAYBACK_RIGHT = (1<<2);
3796 
3797 enum L = (PLAYBACK_LEFT |PLAYBACK_MONO);
3798 enum C = (PLAYBACK_LEFT |PLAYBACK_RIGHT|PLAYBACK_MONO);
3799 enum R = (PLAYBACK_RIGHT|PLAYBACK_MONO);
3800 
3801 immutable byte[6][7] channel_position = [
3802   [ 0 ],
3803   [ C ],
3804   [ L, R ],
3805   [ L, C, R ],
3806   [ L, R, L, R ],
3807   [ L, C, R, L, R ],
3808   [ L, C, R, L, R, C ],
3809 ];
3810 
3811 
3812 version(STB_VORBIS_NO_FAST_SCALED_FLOAT) {
3813   enum declfcvar(string name) = "{}";
3814   template FAST_SCALED_FLOAT_TO_INT(string x, string s) {
3815     static assert(s == "15");
3816     enum FAST_SCALED_FLOAT_TO_INT = q{import core.stdc.math : lrintf; int v = lrintf((${x})*32768.0f);}.cmacroFixVars!"x"(x);
3817   }
3818 } else {
3819   //k8: actually, this is only marginally faster than using `lrintf()`, but anyway...
3820   align(1) union float_conv {
3821   align(1):
3822     float f;
3823     int i;
3824   }
3825   enum declfcvar(string name) = "float_conv "~name~" = void;";
3826   static assert(float_conv.i.sizeof == 4 && float_conv.f.sizeof == 4);
3827   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
3828   //#define check_endianness()
3829   enum MAGIC(string SHIFT) = q{(1.5f*(1<<(23-${SHIFT}))+0.5f/(1<<${SHIFT}))}.cmacroFixVars!("SHIFT")(SHIFT);
3830   enum ADDEND(string SHIFT) = q{(((150-${SHIFT})<<23)+(1<<22))}.cmacroFixVars!("SHIFT")(SHIFT);
3831   enum FAST_SCALED_FLOAT_TO_INT(string x, string s) = q{temp.f = (${x})+${MAGIC}; int v = temp.i-${ADDEND};}
3832     .cmacroFixVars!("x", "s", "MAGIC", "ADDEND")(x, s, MAGIC!(s), ADDEND!(s));
3833 }
3834 
3835 private void copy_samples (short* dest, float* src, int len) {
3836   //check_endianness();
3837   mixin(declfcvar!"temp");
3838   foreach (immutable _; 0..len) {
3839     mixin(FAST_SCALED_FLOAT_TO_INT!("*src", "15"));
3840     if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3841     *dest++ = cast(short)v; //k8
3842     ++src;
3843   }
3844 }
3845 
3846 private void compute_samples (int mask, short* output, int num_c, float** data, int d_offset, int len) {
3847   import core.stdc..string : memset;
3848   enum BUFFER_SIZE = 32;
3849   float[BUFFER_SIZE] buffer;
3850   int n = BUFFER_SIZE;
3851   //check_endianness();
3852   mixin(declfcvar!"temp");
3853   for (uint o = 0; o < len; o += BUFFER_SIZE) {
3854     memset(buffer.ptr, 0, (buffer).sizeof);
3855     if (o+n > len) n = len-o;
3856     foreach (immutable j; 0..num_c) {
3857       if (channel_position[num_c].ptr[j]&mask) foreach (immutable i; 0..n) buffer.ptr[i] += data[j][d_offset+o+i];
3858     }
3859     foreach (immutable i; 0..n) {
3860       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3861       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3862       output[o+i] = cast(short)v; //k8
3863     }
3864   }
3865 }
3866 
3867 private void compute_stereo_samples (short* output, int num_c, float** data, int d_offset, int len) {
3868   import core.stdc..string : memset;
3869 
3870   enum BUFFER_SIZE = 32;
3871   float[BUFFER_SIZE] buffer;
3872   int n = BUFFER_SIZE>>1;
3873   // o is the offset in the source data
3874   //check_endianness();
3875   mixin(declfcvar!"temp");
3876   for (uint o = 0; o < len; o += BUFFER_SIZE>>1) {
3877     // o2 is the offset in the output data
3878     int o2 = o<<1;
3879     memset(buffer.ptr, 0, buffer.sizeof);
3880     if (o+n > len) n = len-o;
3881     foreach (immutable j; 0..num_c) {
3882       int m = channel_position[num_c].ptr[j]&(PLAYBACK_LEFT|PLAYBACK_RIGHT);
3883       if (m == (PLAYBACK_LEFT|PLAYBACK_RIGHT)) {
3884         foreach (immutable i; 0..n) {
3885           buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3886           buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3887         }
3888       } else if (m == PLAYBACK_LEFT) {
3889         foreach (immutable i; 0..n) buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3890       } else if (m == PLAYBACK_RIGHT) {
3891         foreach (immutable i; 0..n) buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3892       }
3893     }
3894     foreach (immutable i; 0..n<<1) {
3895       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3896       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3897       output[o2+i] = cast(short)v; //k8
3898     }
3899   }
3900 }
3901 
3902 private void convert_samples_short (int buf_c, short** buffer, int b_offset, int data_c, float** data, int d_offset, int samples) {
3903   import core.stdc..string : memset;
3904 
3905   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3906     immutable int[2][3] channel_selector = [ [0,0], [PLAYBACK_MONO,0], [PLAYBACK_LEFT, PLAYBACK_RIGHT] ];
3907     foreach (immutable i; 0..buf_c) compute_samples(channel_selector[buf_c].ptr[i], buffer[i]+b_offset, data_c, data, d_offset, samples);
3908   } else {
3909     int limit = (buf_c < data_c ? buf_c : data_c);
3910     foreach (immutable i; 0..limit) copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
3911     foreach (immutable i; limit..buf_c) memset(buffer[i]+b_offset, 0, short.sizeof*samples);
3912   }
3913 }
3914 
3915 private void convert_channels_short_interleaved (int buf_c, short* buffer, int data_c, float** data, int d_offset, int len) {
3916   //check_endianness();
3917   mixin(declfcvar!"temp");
3918   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3919     debug(stb_vorbis) assert(buf_c == 2);
3920     foreach (immutable i; 0..buf_c) compute_stereo_samples(buffer, data_c, data, d_offset, len);
3921   } else {
3922     int limit = (buf_c < data_c ? buf_c : data_c);
3923     foreach (immutable j; 0..len) {
3924       foreach (immutable i; 0..limit) {
3925         float f = data[i][d_offset+j];
3926         mixin(FAST_SCALED_FLOAT_TO_INT!("f", "15"));//data[i][d_offset+j], 15);
3927         if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3928         *buffer++ = cast(short)v; //k8
3929       }
3930       foreach (immutable i; limit..buf_c) *buffer++ = 0;
3931     }
3932   }
3933 }
3934 } // @nogc
3935 
3936 
3937 public class VorbisDecoder {
3938   // return # of bytes read, 0 on eof, -1 on error
3939   // if called with `buf is null`, do `close()`
3940   alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3941 
3942   //TODO
3943   static struct Allocator {
3944   static nothrow @nogc: // because
3945     void* alloc (uint sz, VorbisDecoder vb) {
3946       import core.stdc.stdlib : malloc;
3947       return malloc(sz);
3948     }
3949     void free (void* p, VorbisDecoder vb) {
3950       import core.stdc.stdlib : free;
3951       free(p);
3952     }
3953     void* allocTemp (uint sz, VorbisDecoder vb) {
3954       import core.stdc.stdlib : malloc;
3955       return malloc(sz);
3956     }
3957     void freeTemp (void* p, uint sz, VorbisDecoder vb) {
3958       import core.stdc.stdlib : free;
3959       free(p);
3960     }
3961     uint tempSave (VorbisDecoder vb) { return 0; }
3962     void tempRestore (uint pos, VorbisDecoder vb) {}
3963   }
3964 
3965 nothrow @nogc:
3966 private:
3967   bool isOpened;
3968   readCB stmread;
3969   uint stlastofs = uint.max;
3970   uint stst;
3971   uint stpos;
3972   uint stend;
3973   bool stclose;
3974   FILE* stfl;
3975 
3976 private:
3977   //ubyte* stream;
3978   //ubyte* stream_start;
3979   //ubyte* stream_end;
3980   //uint stream_len;
3981 
3982   /+bool push_mode;+/
3983 
3984   uint first_audio_page_offset;
3985 
3986   ProbedPage p_first, p_last;
3987 
3988   // memory management
3989   Allocator alloc;
3990   int setup_offset;
3991   int temp_offset;
3992 
3993   // run-time results
3994   bool eof = true;
3995   STBVorbisError error;
3996 
3997   // header info
3998   int[2] blocksize;
3999   int blocksize_0, blocksize_1;
4000   int codebook_count;
4001   Codebook* codebooks;
4002   int floor_count;
4003   ushort[64] floor_types; // varies
4004   Floor* floor_config;
4005   int residue_count;
4006   ushort[64] residue_types; // varies
4007   Residue* residue_config;
4008   int mapping_count;
4009   Mapping* mapping;
4010   int mode_count;
4011   Mode[64] mode_config;  // varies
4012 
4013   uint total_samples;
4014 
4015   // decode buffer
4016   float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
4017   float*[STB_VORBIS_MAX_CHANNELS] outputs;
4018 
4019   float*[STB_VORBIS_MAX_CHANNELS] previous_window;
4020   int previous_length;
4021 
4022   version(STB_VORBIS_NO_DEFER_FLOOR) {
4023     float*[STB_VORBIS_MAX_CHANNELS] floor_buffers;
4024   } else {
4025     short*[STB_VORBIS_MAX_CHANNELS] finalY;
4026   }
4027 
4028   uint current_loc; // sample location of next frame to decode
4029   int current_loc_valid;
4030 
4031   // per-blocksize precomputed data
4032 
4033   // twiddle factors
4034   float*[2] A, B, C;
4035   float*[2] window;
4036   ushort*[2] bit_reverse;
4037 
4038   // current page/packet/segment streaming info
4039   uint serial; // stream serial number for verification
4040   int last_page;
4041   int segment_count;
4042   ubyte[255] segments;
4043   ubyte page_flag;
4044   ubyte bytes_in_seg;
4045   ubyte first_decode;
4046   int next_seg;
4047   int last_seg;  // flag that we're on the last segment
4048   int last_seg_which; // what was the segment number of the last seg?
4049   uint acc;
4050   int valid_bits;
4051   int packet_bytes;
4052   int end_seg_with_known_loc;
4053   uint known_loc_for_packet;
4054   int discard_samples_deferred;
4055   uint samples_output;
4056 
4057   // push mode scanning
4058   /+
4059   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
4060   CRCscan[STB_VORBIS_PUSHDATA_CRC_COUNT] scan;
4061   +/
4062 
4063   // sample-access
4064   int channel_buffer_start;
4065   int channel_buffer_end;
4066 
4067 private: // k8: 'cause i'm evil
4068   // user-accessible info
4069   uint sample_rate;
4070   int vrchannels;
4071 
4072   uint setup_memory_required;
4073   uint temp_memory_required;
4074   uint setup_temp_memory_required;
4075 
4076   bool read_comments;
4077   ubyte* comment_data;
4078   uint comment_size;
4079 
4080   // functions to get comment data
4081   uint comment_data_pos;
4082 
4083 private:
4084   int rawRead (void[] buf) {
4085     static if (__VERSION__ > 2067) pragma(inline, true);
4086     if (isOpened && buf.length > 0 && stpos < stend) {
4087       if (stend-stpos < buf.length) buf = buf[0..stend-stpos];
4088       auto rd = stmread(buf, stpos, this);
4089       if (rd > 0) stpos += rd;
4090       return rd;
4091     }
4092     return 0;
4093   }
4094   void rawSkip (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened && n > 0) { if ((stpos += n) > stend) stpos = stend; } }
4095   void rawSeek (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { stpos = stst+(n < 0 ? 0 : n); if (stpos > stend) stpos = stend; } }
4096   void rawClose () { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { isOpened = false; stmread(null, 0, this); } }
4097 
4098 final:
4099 private:
4100   void doInit () {
4101     import core.stdc..string : memset;
4102     /*
4103     if (z) {
4104       alloc = *z;
4105       alloc.alloc_buffer_length_in_bytes = (alloc.alloc_buffer_length_in_bytes+3)&~3;
4106       temp_offset = alloc.alloc_buffer_length_in_bytes;
4107     }
4108     */
4109     eof = false;
4110     error = STBVorbisError.no_error;
4111     /+stream = null;+/
4112     codebooks = null;
4113     /+page_crc_tests = -1;+/
4114   }
4115 
4116   static int stflRead (void[] buf, uint ofs, VorbisDecoder vb) {
4117     if (buf !is null) {
4118       //{ import core.stdc.stdio; printf("stflRead: ofs=%u; len=%u\n", ofs, cast(uint)buf.length); }
4119       if (vb.stlastofs != ofs) {
4120         import core.stdc.stdio : fseek, SEEK_SET;
4121         vb.stlastofs = ofs;
4122         fseek(vb.stfl, ofs, SEEK_SET);
4123       }
4124       import core.stdc.stdio : fread;
4125       return cast(int)fread(buf.ptr, 1, buf.length, vb.stfl);
4126     } else {
4127       if (vb.stclose) {
4128         import core.stdc.stdio : fclose;
4129         if (vb.stfl !is null) fclose(vb.stfl);
4130       }
4131       vb.stfl = null;
4132       return 0;
4133     }
4134   }
4135 
4136 public:
4137   this () {}
4138   ~this () { close(); }
4139 
4140   this (int asize, readCB rcb) { assert(rcb !is null); stend = (asize > 0 ? asize : 0); stmread = rcb; }
4141   this (FILE* fl, bool doclose=true) { open(fl, doclose); }
4142   this (const(char)[] filename) { open(filename); }
4143 
4144   @property bool closed () { return !isOpened; }
4145 
4146   void open (FILE *fl, bool doclose=true) {
4147     import core.stdc.stdio : ftell, fseek, SEEK_SET, SEEK_END;
4148     close();
4149     if (fl is null) { error = STBVorbisError.invalid_stream; return; }
4150     stclose = doclose;
4151     stst = stpos = cast(uint)ftell(fl);
4152     fseek(fl, 0, SEEK_END);
4153     stend = cast(uint)ftell(fl);
4154     stlastofs = stlastofs.max;
4155     stclose = false;
4156     stfl = fl;
4157     import std.functional : toDelegate;
4158     stmread = toDelegate(&stflRead);
4159     isOpened = true;
4160     eof = false;
4161     read_comments = true;
4162     if (start_decoder(this)) {
4163       vorbis_pump_first_frame(this);
4164       return;
4165     }
4166     auto err = error;
4167     close();
4168     error = err;
4169   }
4170 
4171   void open (const(char)[] filename) {
4172     import core.stdc.stdio : fopen;
4173     import std.internal.cstring; // sorry
4174     close();
4175     FILE* fl = fopen(filename.tempCString, "rb");
4176     if (fl is null) { error = STBVorbisError.file_open_failure; return; }
4177     open(fl, true);
4178   }
4179 
4180   /+
4181   void openPushdata(void* data, int data_len, // the memory available for decoding
4182                     int* data_used)           // only defined on success
4183   {
4184     close();
4185     eof = false;
4186     stream = cast(ubyte*)data;
4187     stream_end = stream+data_len;
4188     push_mode = true;
4189     if (!start_decoder(this)) {
4190       auto err = error;
4191       if (eof) err = STBVorbisError.need_more_data; else close();
4192       error = err;
4193       return;
4194     }
4195     *data_used = stream-(cast(ubyte*)data);
4196     error = STBVorbisError.no_error;
4197   }
4198   +/
4199 
4200   void close () {
4201     import core.stdc..string : memset;
4202 
4203     setup_free(this, this.comment_data);
4204     if (this.residue_config) {
4205       foreach (immutable i; 0..this.residue_count) {
4206         Residue* r = this.residue_config+i;
4207         if (r.classdata) {
4208           foreach (immutable j; 0..this.codebooks[r.classbook].entries) setup_free(this, r.classdata[j]);
4209           setup_free(this, r.classdata);
4210         }
4211         setup_free(this, r.residue_books);
4212       }
4213     }
4214 
4215     if (this.codebooks) {
4216       foreach (immutable i; 0..this.codebook_count) {
4217         Codebook* c = this.codebooks+i;
4218         setup_free(this, c.codeword_lengths);
4219         setup_free(this, c.multiplicands);
4220         setup_free(this, c.codewords);
4221         setup_free(this, c.sorted_codewords);
4222         // c.sorted_values[-1] is the first entry in the array
4223         setup_free(this, c.sorted_values ? c.sorted_values-1 : null);
4224       }
4225       setup_free(this, this.codebooks);
4226     }
4227     setup_free(this, this.floor_config);
4228     setup_free(this, this.residue_config);
4229     if (this.mapping) {
4230       foreach (immutable i; 0..this.mapping_count) setup_free(this, this.mapping[i].chan);
4231       setup_free(this, this.mapping);
4232     }
4233     foreach (immutable i; 0..(this.vrchannels > STB_VORBIS_MAX_CHANNELS ? STB_VORBIS_MAX_CHANNELS : this.vrchannels)) {
4234       setup_free(this, this.channel_buffers.ptr[i]);
4235       setup_free(this, this.previous_window.ptr[i]);
4236       version(STB_VORBIS_NO_DEFER_FLOOR) setup_free(this, this.floor_buffers.ptr[i]);
4237       setup_free(this, this.finalY.ptr[i]);
4238     }
4239     foreach (immutable i; 0..2) {
4240       setup_free(this, this.A.ptr[i]);
4241       setup_free(this, this.B.ptr[i]);
4242       setup_free(this, this.C.ptr[i]);
4243       setup_free(this, this.window.ptr[i]);
4244       setup_free(this, this.bit_reverse.ptr[i]);
4245     }
4246 
4247     rawClose();
4248     isOpened = false;
4249     stmread = null;
4250     stlastofs = uint.max;
4251     stst = 0;
4252     stpos = 0;
4253     stend = 0;
4254     stclose = false;
4255     stfl = null;
4256 
4257     sample_rate = 0;
4258     vrchannels = 0;
4259 
4260     setup_memory_required = 0;
4261     temp_memory_required = 0;
4262     setup_temp_memory_required = 0;
4263 
4264     read_comments = 0;
4265     comment_data = null;
4266     comment_size = 0;
4267 
4268     comment_data_pos = 0;
4269 
4270     /+
4271     stream = null;
4272     stream_start = null;
4273     stream_end = null;
4274     +/
4275 
4276     //stream_len = 0;
4277 
4278     /+push_mode = false;+/
4279 
4280     first_audio_page_offset = 0;
4281 
4282     p_first = p_first.init;
4283     p_last = p_last.init;
4284 
4285     setup_offset = 0;
4286     temp_offset = 0;
4287 
4288     eof = true;
4289     error = STBVorbisError.no_error;
4290 
4291     blocksize[] = 0;
4292     blocksize_0 = 0;
4293     blocksize_1 = 0;
4294     codebook_count = 0;
4295     codebooks = null;
4296     floor_count = 0;
4297     floor_types[] = 0;
4298     floor_config = null;
4299     residue_count = 0;
4300     residue_types[] = 0;
4301     residue_config = null;
4302     mapping_count = 0;
4303     mapping = null;
4304     mode_count = 0;
4305     mode_config[] = Mode.init;
4306 
4307     total_samples = 0;
4308 
4309     channel_buffers[] = null;
4310     outputs[] = null;
4311 
4312     previous_window[] = null;
4313     previous_length = 0;
4314 
4315     version(STB_VORBIS_NO_DEFER_FLOOR) {
4316       floor_buffers[] = null;
4317     } else {
4318       finalY[] = null;
4319     }
4320 
4321     current_loc = 0;
4322     current_loc_valid = 0;
4323 
4324     A[] = null;
4325     B[] = null;
4326     C[] = null;
4327     window[] = null;
4328     bit_reverse = null;
4329 
4330     serial = 0;
4331     last_page = 0;
4332     segment_count = 0;
4333     segments[] = 0;
4334     page_flag = 0;
4335     bytes_in_seg = 0;
4336     first_decode = 0;
4337     next_seg = 0;
4338     last_seg = 0;
4339     last_seg_which = 0;
4340     acc = 0;
4341     valid_bits = 0;
4342     packet_bytes = 0;
4343     end_seg_with_known_loc = 0;
4344     known_loc_for_packet = 0;
4345     discard_samples_deferred = 0;
4346     samples_output = 0;
4347 
4348     /+
4349     page_crc_tests = -1;
4350     scan[] = CRCscan.init;
4351     +/
4352 
4353     channel_buffer_start = 0;
4354     channel_buffer_end = 0;
4355   }
4356 
4357   @property const pure {
4358     int getSampleOffset () { return (current_loc_valid ? current_loc : -1); }
4359 
4360     @property ubyte chans () { return (isOpened ? cast(ubyte)this.vrchannels : 0); }
4361     @property uint sampleRate () { return (isOpened ? this.sample_rate : 0); }
4362     @property uint maxFrameSize () { return (isOpened ? this.blocksize_1>>1 : 0); }
4363 
4364     @property uint getSetupMemoryRequired () { return (isOpened ? this.setup_memory_required : 0); }
4365     @property uint getSetupTempMemoryRequired () { return (isOpened ? this.setup_temp_memory_required : 0); }
4366     @property uint getTempMemoryRequired () { return (isOpened ? this.temp_memory_required : 0); }
4367   }
4368 
4369   // will clear last error
4370   @property int lastError () {
4371     int e = error;
4372     error = STBVorbisError.no_error;
4373     return e;
4374   }
4375 
4376   // PUSHDATA API
4377   /+
4378   void flushPushdata () {
4379     if (push_mode) {
4380       previous_length = 0;
4381       page_crc_tests = 0;
4382       discard_samples_deferred = 0;
4383       current_loc_valid = false;
4384       first_decode = false;
4385       samples_output = 0;
4386       channel_buffer_start = 0;
4387       channel_buffer_end = 0;
4388     }
4389   }
4390 
4391   // return value: number of bytes we used
4392   int decodeFramePushdata(
4393            void* data, int data_len, // the memory available for decoding
4394            int* channels,            // place to write number of float* buffers
4395            float*** output,          // place to write float** array of float* buffers
4396            int* samples              // place to write number of output samples
4397        )
4398   {
4399     if (!this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);
4400 
4401     if (this.page_crc_tests >= 0) {
4402       *samples = 0;
4403       return vorbis_search_for_page_pushdata(this, cast(ubyte*)data, data_len);
4404     }
4405 
4406     this.stream = cast(ubyte*)data;
4407     this.stream_end = this.stream+data_len;
4408     this.error = STBVorbisError.no_error;
4409 
4410     // check that we have the entire packet in memory
4411     if (!is_whole_packet_present(this, false)) {
4412       *samples = 0;
4413       return 0;
4414     }
4415 
4416     int len, left, right;
4417 
4418     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4419       // save the actual error we encountered
4420       STBVorbisError error = this.error;
4421       if (error == STBVorbisError.bad_packet_type) {
4422         // flush and resynch
4423         this.error = STBVorbisError.no_error;
4424         while (get8_packet(this) != EOP) if (this.eof) break;
4425         *samples = 0;
4426         return this.stream-data;
4427       }
4428       if (error == STBVorbisError.continued_packet_flag_invalid) {
4429         if (this.previous_length == 0) {
4430           // we may be resynching, in which case it's ok to hit one
4431           // of these; just discard the packet
4432           this.error = STBVorbisError.no_error;
4433           while (get8_packet(this) != EOP) if (this.eof) break;
4434           *samples = 0;
4435           return this.stream-data;
4436         }
4437       }
4438       // if we get an error while parsing, what to do?
4439       // well, it DEFINITELY won't work to continue from where we are!
4440       flushPushdata();
4441       // restore the error that actually made us bail
4442       this.error = error;
4443       *samples = 0;
4444       return 1;
4445     }
4446 
4447     // success!
4448     len = vorbis_finish_frame(this, len, left, right);
4449     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4450 
4451     if (channels) *channels = this.vrchannels;
4452     *samples = len;
4453     *output = this.outputs.ptr;
4454     return this.stream-data;
4455   }
4456   +/
4457 
4458   public uint fileOffset () {
4459     if (/+push_mode ||+/ !isOpened) return 0;
4460     /+if (stream !is null) return cast(uint)(stream-stream_start);+/
4461     return (stpos > stst ? stpos-stst : 0);
4462   }
4463 
4464   public uint stream_len () { return stend-stst; }
4465 
4466   // DATA-PULLING API
4467   public int seekFrame (uint sample_number) {
4468     uint max_frame_samples;
4469 
4470     /+if (this.push_mode) return -.error(this, STBVorbisError.invalid_api_mixing);+/
4471 
4472     // fast page-level search
4473     if (!seek_to_sample_coarse(this, sample_number)) return 0;
4474 
4475     assert(this.current_loc_valid);
4476     assert(this.current_loc <= sample_number);
4477 
4478     // linear search for the relevant packet
4479     max_frame_samples = (this.blocksize_1*3-this.blocksize_0)>>2;
4480     while (this.current_loc < sample_number) {
4481       int left_start, left_end, right_start, right_end, mode, frame_samples;
4482       if (!peek_decode_initial(this, &left_start, &left_end, &right_start, &right_end, &mode)) return .error(this, STBVorbisError.seek_failed);
4483       // calculate the number of samples returned by the next frame
4484       frame_samples = right_start-left_start;
4485       if (this.current_loc+frame_samples > sample_number) {
4486         return 1; // the next frame will contain the sample
4487       } else if (this.current_loc+frame_samples+max_frame_samples > sample_number) {
4488         // there's a chance the frame after this could contain the sample
4489         vorbis_pump_first_frame(this);
4490       } else {
4491         // this frame is too early to be relevant
4492         this.current_loc += frame_samples;
4493         this.previous_length = 0;
4494         maybe_start_packet(this);
4495         flush_packet(this);
4496       }
4497     }
4498     // the next frame will start with the sample
4499     assert(this.current_loc == sample_number);
4500     return 1;
4501   }
4502 
4503   public int seek (uint sample_number) {
4504     if (!seekFrame(sample_number)) return 0;
4505     if (sample_number != this.current_loc) {
4506       int n;
4507       uint frame_start = this.current_loc;
4508       getFrameFloat(&n, null);
4509       assert(sample_number > frame_start);
4510       assert(this.channel_buffer_start+cast(int)(sample_number-frame_start) <= this.channel_buffer_end);
4511       this.channel_buffer_start += (sample_number-frame_start);
4512     }
4513     return 1;
4514   }
4515 
4516   public bool seekStart () {
4517     /+if (push_mode) { .error(this, STBVorbisError.invalid_api_mixing); return; }+/
4518     set_file_offset(this, first_audio_page_offset);
4519     previous_length = 0;
4520     first_decode = true;
4521     next_seg = -1;
4522     return vorbis_pump_first_frame(this);
4523   }
4524 
4525   public uint streamLengthInSamples () {
4526     uint restore_offset, previous_safe;
4527     uint end, last_page_loc;
4528 
4529     /+if (this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4530     if (!this.total_samples) {
4531       uint last;
4532       uint lo, hi;
4533       char[6] header;
4534 
4535       // first, store the current decode position so we can restore it
4536       restore_offset = fileOffset;
4537 
4538       // now we want to seek back 64K from the end (the last page must
4539       // be at most a little less than 64K, but let's allow a little slop)
4540       if (this.stream_len >= 65536 && this.stream_len-65536 >= this.first_audio_page_offset) {
4541         previous_safe = this.stream_len-65536;
4542       } else {
4543         previous_safe = this.first_audio_page_offset;
4544       }
4545 
4546       set_file_offset(this, previous_safe);
4547       // previous_safe is now our candidate 'earliest known place that seeking
4548       // to will lead to the final page'
4549 
4550       if (!vorbis_find_page(this, &end, &last)) {
4551         // if we can't find a page, we're hosed!
4552         this.error = STBVorbisError.cant_find_last_page;
4553         this.total_samples = 0xffffffff;
4554         goto done;
4555       }
4556 
4557       // check if there are more pages
4558       last_page_loc = fileOffset;
4559 
4560       // stop when the last_page flag is set, not when we reach eof;
4561       // this allows us to stop short of a 'file_section' end without
4562       // explicitly checking the length of the section
4563       while (!last) {
4564         set_file_offset(this, end);
4565         if (!vorbis_find_page(this, &end, &last)) {
4566           // the last page we found didn't have the 'last page' flag set. whoops!
4567           break;
4568         }
4569         previous_safe = last_page_loc+1;
4570         last_page_loc = fileOffset;
4571       }
4572 
4573       set_file_offset(this, last_page_loc);
4574 
4575       // parse the header
4576       getn(this, cast(ubyte*)header, 6);
4577       // extract the absolute granule position
4578       lo = get32(this);
4579       hi = get32(this);
4580       if (lo == 0xffffffff && hi == 0xffffffff) {
4581         this.error = STBVorbisError.cant_find_last_page;
4582         this.total_samples = SAMPLE_unknown;
4583         goto done;
4584       }
4585       if (hi) lo = 0xfffffffe; // saturate
4586       this.total_samples = lo;
4587 
4588       this.p_last.page_start = last_page_loc;
4589       this.p_last.page_end = end;
4590       this.p_last.last_decoded_sample = lo;
4591 
4592      done:
4593       set_file_offset(this, restore_offset);
4594     }
4595     return (this.total_samples == SAMPLE_unknown ? 0 : this.total_samples);
4596   }
4597 
4598   public float streamLengthInSeconds () {
4599     return (isOpened ? streamLengthInSamples()/cast(float)sample_rate : 0.0f);
4600   }
4601 
4602   public int getFrameFloat (int* channels, float*** output) {
4603     int len, right, left;
4604     /+if (push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4605 
4606     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4607       channel_buffer_start = channel_buffer_end = 0;
4608       return 0;
4609     }
4610 
4611     len = vorbis_finish_frame(this, len, left, right);
4612     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4613 
4614     channel_buffer_start = left;
4615     channel_buffer_end = left+len;
4616 
4617     if (channels) *channels = this.vrchannels;
4618     if (output) *output = this.outputs.ptr;
4619     return len;
4620   }
4621 
4622   /+
4623   public VorbisDecoder stb_vorbis_open_memory (const(void)* data, int len, int* error=null, stb_vorbis_alloc* alloc=null) {
4624     VorbisDecoder this;
4625     stb_vorbis_ctx p = void;
4626     if (data is null) return null;
4627     vorbis_init(&p, alloc);
4628     p.stream = cast(ubyte*)data;
4629     p.stream_end = cast(ubyte*)data+len;
4630     p.stream_start = cast(ubyte*)p.stream;
4631     p.stream_len = len;
4632     p.push_mode = false;
4633     if (start_decoder(&p)) {
4634       this = vorbis_alloc(&p);
4635       if (this) {
4636         *this = p;
4637         vorbis_pump_first_frame(this);
4638         return this;
4639       }
4640     }
4641     if (error) *error = p.error;
4642     vorbis_deinit(&p);
4643     return null;
4644   }
4645   +/
4646 
4647   // s16 samples API
4648   int getFrameShort (int num_c, short** buffer, int num_samples) {
4649     float** output;
4650     int len = getFrameFloat(null, &output);
4651     if (len > num_samples) len = num_samples;
4652     if (len) convert_samples_short(num_c, buffer, 0, vrchannels, output, 0, len);
4653     return len;
4654   }
4655 
4656   int getFrameShortInterleaved (int num_c, short* buffer, int num_shorts) {
4657     float** output;
4658     int len;
4659     if (num_c == 1) return getFrameShort(num_c, &buffer, num_shorts);
4660     len = getFrameFloat(null, &output);
4661     if (len) {
4662       if (len*num_c > num_shorts) len = num_shorts/num_c;
4663       convert_channels_short_interleaved(num_c, buffer, vrchannels, output, 0, len);
4664     }
4665     return len;
4666   }
4667 
4668   int getSamplesShortInterleaved (int channels, short* buffer, int num_shorts) {
4669     float** outputs;
4670     int len = num_shorts/channels;
4671     int n = 0;
4672     int z = this.vrchannels;
4673     if (z > channels) z = channels;
4674     while (n < len) {
4675       int k = channel_buffer_end-channel_buffer_start;
4676       if (n+k >= len) k = len-n;
4677       if (k) convert_channels_short_interleaved(channels, buffer, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4678       buffer += k*channels;
4679       n += k;
4680       channel_buffer_start += k;
4681       if (n == len) break;
4682       if (!getFrameFloat(null, &outputs)) break;
4683     }
4684     return n;
4685   }
4686 
4687   int getSamplesShort (int channels, short** buffer, int len) {
4688     float** outputs;
4689     int n = 0;
4690     int z = this.vrchannels;
4691     if (z > channels) z = channels;
4692     while (n < len) {
4693       int k = channel_buffer_end-channel_buffer_start;
4694       if (n+k >= len) k = len-n;
4695       if (k) convert_samples_short(channels, buffer, n, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4696       n += k;
4697       channel_buffer_start += k;
4698       if (n == len) break;
4699       if (!getFrameFloat(null, &outputs)) break;
4700     }
4701     return n;
4702   }
4703 
4704   /+
4705   public int stb_vorbis_decode_filename (string filename, int* channels, int* sample_rate, short** output) {
4706     import core.stdc.stdlib : malloc, realloc;
4707 
4708     int data_len, offset, total, limit, error;
4709     short* data;
4710     VorbisDecoder v = stb_vorbis_open_filename(filename, &error, null);
4711     if (v is null) return -1;
4712     limit = v.vrchannels*4096;
4713     *channels = v.vrchannels;
4714     if (sample_rate) *sample_rate = v.sample_rate;
4715     offset = data_len = 0;
4716     total = limit;
4717     data = cast(short*)malloc(total*(*data).sizeof);
4718     if (data is null) {
4719       stb_vorbis_close(v);
4720       return -2;
4721     }
4722     for (;;) {
4723       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4724       if (n == 0) break;
4725       data_len += n;
4726       offset += n*v.vrchannels;
4727       if (offset+limit > total) {
4728         short *data2;
4729         total *= 2;
4730         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4731         if (data2 is null) {
4732           import core.stdc.stdlib : free;
4733           free(data);
4734           stb_vorbis_close(v);
4735           return -2;
4736         }
4737         data = data2;
4738       }
4739     }
4740     *output = data;
4741     stb_vorbis_close(v);
4742     return data_len;
4743   }
4744 
4745   public int stb_vorbis_decode_memory (const(void)* mem, int len, int* channels, int* sample_rate, short** output) {
4746     import core.stdc.stdlib : malloc, realloc;
4747 
4748     int data_len, offset, total, limit, error;
4749     short* data;
4750     VorbisDecoder v = stb_vorbis_open_memory(mem, len, &error, null);
4751     if (v is null) return -1;
4752     limit = v.vrchannels*4096;
4753     *channels = v.vrchannels;
4754     if (sample_rate) *sample_rate = v.sample_rate;
4755     offset = data_len = 0;
4756     total = limit;
4757     data = cast(short*)malloc(total*(*data).sizeof);
4758     if (data is null) {
4759       stb_vorbis_close(v);
4760       return -2;
4761     }
4762     for (;;) {
4763       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4764       if (n == 0) break;
4765       data_len += n;
4766       offset += n*v.vrchannels;
4767       if (offset+limit > total) {
4768         short *data2;
4769         total *= 2;
4770         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4771         if (data2 is null) {
4772           import core.stdc.stdlib : free;
4773           free(data);
4774           stb_vorbis_close(v);
4775           return -2;
4776         }
4777         data = data2;
4778       }
4779     }
4780     *output = data;
4781     stb_vorbis_close(v);
4782     return data_len;
4783   }
4784 
4785   public int stb_vorbis_get_samples_float_interleaved (VorbisDecoder this, int channels, float* buffer, int num_floats) {
4786     float** outputs;
4787     int len = num_floats/channels;
4788     int n = 0;
4789     int z = this.vrchannels;
4790     if (z > channels) z = channels;
4791     while (n < len) {
4792       int k = this.channel_buffer_end-this.channel_buffer_start;
4793       if (n+k >= len) k = len-n;
4794       foreach (immutable j; 0..k) {
4795         foreach (immutable i; 0..z) *buffer++ = (this.channel_buffers.ptr[i])[this.channel_buffer_start+j];
4796         foreach (immutable i; z..channels) *buffer++ = 0;
4797       }
4798       n += k;
4799       this.channel_buffer_start += k;
4800       if (n == len) break;
4801       if (!stb_vorbis_get_frame_float(this, null, &outputs)) break;
4802     }
4803     return n;
4804   }
4805   +/
4806 
4807   public int getSamplesFloat (int achans, float** buffer, int num_samples) {
4808     import core.stdc..string : memcpy, memset;
4809     float** outputs;
4810     int n = 0;
4811     int z = vrchannels;
4812     if (z > achans) z = achans;
4813     while (n < num_samples) {
4814       int k = channel_buffer_end-channel_buffer_start;
4815       if (n+k >= num_samples) k = num_samples-n;
4816       if (k) {
4817         foreach (immutable i; 0..z) memcpy(buffer[i]+n, channel_buffers.ptr[i]+channel_buffer_start, float.sizeof*k);
4818         foreach (immutable i; z..achans) memset(buffer[i]+n, 0, float.sizeof*k);
4819       }
4820       n += k;
4821       channel_buffer_start += k;
4822       if (n == num_samples) break;
4823       if (!getFrameFloat(null, &outputs)) break;
4824     }
4825     return n;
4826   }
4827 
4828 private: // k8: 'cause i'm evil
4829   private enum cmt_len_size = 2;
4830   nothrow /*@trusted*/ @nogc {
4831     public @property bool comment_empty () const pure { return (comment_get_line_len == 0); }
4832 
4833     // 0: error
4834     // includes length itself
4835     private uint comment_get_line_len () const pure {
4836       if (comment_data_pos >= comment_size) return 0;
4837       if (comment_size-comment_data_pos < cmt_len_size) return 0;
4838       uint len = comment_data[comment_data_pos];
4839       len += cast(uint)comment_data[comment_data_pos+1]<<8;
4840       return (len >= cmt_len_size && comment_data_pos+len <= comment_size ? len : 0);
4841     }
4842 
4843     public bool comment_rewind () {
4844       comment_data_pos = 0;
4845       for (;;) {
4846         auto len = comment_get_line_len();
4847         if (!len) { comment_data_pos = comment_size; return false; }
4848         if (len != cmt_len_size) return true;
4849         comment_data_pos += len;
4850       }
4851     }
4852 
4853     // true: has something to read after skip
4854     public bool comment_skip () {
4855       comment_data_pos += comment_get_line_len();
4856       for (;;) {
4857         auto len = comment_get_line_len();
4858         if (!len) { comment_data_pos = comment_size; return false; }
4859         if (len != cmt_len_size) break;
4860         comment_data_pos += len;
4861       }
4862       return true;
4863     }
4864 
4865     public const(char)[] comment_line () {
4866       auto len = comment_get_line_len();
4867       if (len < cmt_len_size) return null;
4868       if (len == cmt_len_size) return "";
4869       return (cast(char*)comment_data+comment_data_pos+cmt_len_size)[0..len-cmt_len_size];
4870     }
4871 
4872     public const(char)[] comment_name () {
4873       auto line = comment_line();
4874       if (line.length == 0) return line;
4875       uint epos = 0;
4876       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4877       return (epos < line.length ? line[0..epos] : "");
4878     }
4879 
4880     public const(char)[] comment_value () {
4881       auto line = comment_line();
4882       if (line.length == 0) return line;
4883       uint epos = 0;
4884       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4885       return (epos < line.length ? line[epos+1..$] : line);
4886     }
4887   }
4888 }
4889 
4890 
4891 // ////////////////////////////////////////////////////////////////////////// //
4892 private:
4893 // cool helper to translate C defines
4894 template cmacroFixVars(T...) {
4895   /**
4896    * 64-bit implementation of fasthash
4897    *
4898    * Params:
4899    *   buf =  data buffer
4900    *   seed = the seed
4901    *
4902    * Returns:
4903    *   32-bit or 64-bit hash
4904    */
4905   size_t hashOf (const(void)* buf, size_t len, size_t seed=0) pure nothrow @trusted @nogc {
4906     enum Get8Bytes = q{
4907       cast(ulong)data[0]|
4908       (cast(ulong)data[1]<<8)|
4909       (cast(ulong)data[2]<<16)|
4910       (cast(ulong)data[3]<<24)|
4911       (cast(ulong)data[4]<<32)|
4912       (cast(ulong)data[5]<<40)|
4913       (cast(ulong)data[6]<<48)|
4914       (cast(ulong)data[7]<<56)
4915     };
4916     enum m = 0x880355f21e6d1965UL;
4917     auto data = cast(const(ubyte)*)buf;
4918     ulong h = seed;
4919     ulong t;
4920     foreach (immutable _; 0..len/8) {
4921       version(HasUnalignedOps) {
4922         if (__ctfe) {
4923           t = mixin(Get8Bytes);
4924         } else {
4925           t = *cast(ulong*)data;
4926         }
4927       } else {
4928         t = mixin(Get8Bytes);
4929       }
4930       data += 8;
4931       t ^= t>>23;
4932       t *= 0x2127599bf4325c37UL;
4933       t ^= t>>47;
4934       h ^= t;
4935       h *= m;
4936     }
4937 
4938     h ^= len*m;
4939     t = 0;
4940     switch (len&7) {
4941       case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
4942       case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
4943       case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
4944       case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
4945       case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
4946       case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
4947       case 1: t ^= cast(ulong)data[0]; goto default;
4948       default:
4949         t ^= t>>23;
4950         t *= 0x2127599bf4325c37UL;
4951         t ^= t>>47;
4952         h ^= t;
4953         h *= m;
4954         break;
4955     }
4956 
4957     h ^= h>>23;
4958     h *= 0x2127599bf4325c37UL;
4959     h ^= h>>47;
4960     static if (size_t.sizeof == 4) {
4961       // 32-bit hash
4962       // the following trick converts the 64-bit hashcode to Fermat
4963       // residue, which shall retain information from both the higher
4964       // and lower parts of hashcode.
4965       return cast(size_t)(h-(h>>32));
4966     } else {
4967       return h;
4968     }
4969   }
4970 
4971   string cmacroFixVars (string s, string[] names...) {
4972     assert(T.length == names.length, "cmacroFixVars: names and arguments count mismatch");
4973     enum tmpPfxName = "__temp_prefix__";
4974     string res;
4975     string tmppfx;
4976     uint pos = 0;
4977     // skip empty lines (for pretty printing)
4978     // trim trailing spaces
4979     while (s.length > 0 && s[$-1] <= ' ') s = s[0..$-1];
4980     uint linestpos = 0; // start of the current line
4981     while (pos < s.length) {
4982       if (s[pos] > ' ') break;
4983       if (s[pos] == '\n') linestpos = pos+1;
4984       ++pos;
4985     }
4986     pos = linestpos;
4987     while (pos+2 < s.length) {
4988       int epos = pos;
4989       while (epos+2 < s.length && (s[epos] != '$' || s[epos+1] != '{')) ++epos;
4990       if (epos > pos) {
4991         if (s.length-epos < 3) break;
4992         res ~= s[pos..epos];
4993         pos = epos;
4994       }
4995       assert(s[pos] == '$' && s[pos+1] == '{');
4996       pos += 2;
4997       bool found = false;
4998       if (s.length-pos >= tmpPfxName.length+1 && s[pos+tmpPfxName.length] == '}' && s[pos..pos+tmpPfxName.length] == tmpPfxName) {
4999         if (tmppfx.length == 0) {
5000           // generate temporary prefix
5001           auto hash = hashOf(s.ptr, s.length);
5002           immutable char[16] hexChars = "0123456789abcdef";
5003           tmppfx = "_temp_macro_var_";
5004           foreach_reverse (immutable idx; 0..size_t.sizeof*2) {
5005             tmppfx ~= hexChars[hash&0x0f];
5006             hash >>= 4;
5007           }
5008           tmppfx ~= "_";
5009         }
5010         pos += tmpPfxName.length+1;
5011         res ~= tmppfx;
5012         found = true;
5013       } else {
5014         foreach (immutable nidx, string oname; T) {
5015           static assert(oname.length > 0);
5016           if (s.length-pos >= oname.length+1 && s[pos+oname.length] == '}' && s[pos..pos+oname.length] == oname) {
5017             found = true;
5018             pos += oname.length+1;
5019             res ~= names[nidx];
5020             break;
5021           }
5022         }
5023       }
5024       assert(found, "unknown variable in macro");
5025     }
5026     if (pos < s.length) res ~= s[pos..$];
5027     return res;
5028   }
5029 }
5030 
5031 // ////////////////////////////////////////////////////////////////////////// //
5032 /* Version history
5033     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5034     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5035                            avoid discarding last frame of audio data
5036     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5037                            some more crash fixes when out of memory or with corrupt files
5038     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5039                            some crash fixes when out of memory or with corrupt files
5040     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5041     1.04    - 2014/08/27 - fix missing const-correct case in API
5042     1.03    - 2014/08/07 - Warning fixes
5043     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5044     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5045     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5046                            (API change) report sample rate for decode-full-file funcs
5047     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5048     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5049     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5050     0.99993 - remove assert that fired on legal files with empty tables
5051     0.99992 - rewind-to-start
5052     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5053     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5054     0.9998 - add a full-decode function with a memory source
5055     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5056     0.9996 - query length of vorbis stream in samples/seconds
5057     0.9995 - bugfix to another optimization that only happened in certain files
5058     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5059     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5060     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5061     0.9991 - performance improvement of IMDCT
5062     0.999 - (should have been 0.9990) performance improvement of IMDCT
5063     0.998 - no-CRT support from Casey Muratori
5064     0.997 - bugfixes for bugs found by Terje Mathisen
5065     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5066     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5067     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5068     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5069     0.992 - fixes for MinGW warning
5070     0.991 - turn fast-float-conversion on by default
5071     0.990 - fix push-mode seek recovery if you seek into the headers
5072     0.98b - fix to bad release of 0.98
5073     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5074     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5075     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5076     0.95 - clamping code for 16-bit functions
5077     0.94 - not publically released
5078     0.93 - fixed all-zero-floor case (was decoding garbage)
5079     0.92 - fixed a memory leak
5080     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5081     0.90 - first public release
5082 */
5083 
5084 /*
5085 ------------------------------------------------------------------------------
5086 This software is available under 2 licenses -- choose whichever you prefer.
5087 ------------------------------------------------------------------------------
5088 ALTERNATIVE A - MIT License
5089 Copyright (c) 2017 Sean Barrett
5090 Permission is hereby granted, free of charge, to any person obtaining a copy of
5091 this software and associated documentation files (the "Software"), to deal in
5092 the Software without restriction, including without limitation the rights to
5093 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5094 of the Software, and to permit persons to whom the Software is furnished to do
5095 so, subject to the following conditions:
5096 The above copyright notice and this permission notice shall be included in all
5097 copies or substantial portions of the Software.
5098 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5099 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5100 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5101 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5102 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5103 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5104 SOFTWARE.
5105 ------------------------------------------------------------------------------
5106 ALTERNATIVE B - Public Domain (www.unlicense.org)
5107 This is free and unencumbered software released into the public domain.
5108 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5109 software, either in source code form or as a compiled binary, for any purpose,
5110 commercial or non-commercial, and by any means.
5111 In jurisdictions that recognize copyright laws, the author or authors of this
5112 software dedicate any and all copyright interest in the software to the public
5113 domain. We make this dedication for the benefit of the public at large and to
5114 the detriment of our heirs and successors. We intend this dedication to be an
5115 overt act of relinquishment in perpetuity of all present and future rights to
5116 this software under copyright law.
5117 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5118 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5119 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5120 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5121 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5122 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5123 ------------------------------------------------------------------------------
5124 */