1 // Ogg Vorbis audio decoder - v1.10 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix
33 //
34 // Partial history:
35 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
36 //    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
37 //    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
38 //                           avoid discarding last frame of audio data
39 //    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
40 //                           some more crash fixes when out of memory or with corrupt files
41 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
42 //                           some crash fixes when out of memory or with corrupt files
43 //                           fix some inappropriately signed shifts
44 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
45 //    1.04    - 2014/08/27 - fix missing const-correct case in API
46 //    1.03    - 2014/08/07 - warning fixes
47 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
48 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
49 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
50 //                           (API change) report sample rate for decode-full-file funcs
51 //    0.99996 -            - bracket #include <malloc.h> for macintosh compilation
52 //    0.99995 -            - avoid alias-optimization issue in float-to-int conversion
53 //
54 // See end of file for full version history.
55 // D translation by Ketmar // Invisible Vector
56 // stolen by adam and module renamed.
57 module arsd.vorbis;
58 
59 import core.stdc.stdio : FILE;
60 
61 version(Windows)
62 	extern(C) int lrintf(float f) { return cast(int) f; }
63 
64 nothrow /*@trusted*/:
65 @nogc { // code block, as c macro helper is not @nogc; yet it's CTFE-only
66 // import it here, as druntime has no `@nogc` on it (for a reason)
67 private extern(C) void qsort (void* base, size_t nmemb, size_t size, int function(in void*, in void*) compar);
68 
69 
70 //////////////////////////////////////////////////////////////////////////////
71 //
72 //  HEADER BEGINS HERE
73 //
74 
75 ///////////   THREAD SAFETY
76 
77 // Individual VorbisDecoder handles are not thread-safe; you cannot decode from
78 // them from multiple threads at the same time. However, you can have multiple
79 // VorbisDecoder handles and decode from them independently in multiple thrads.
80 
81 
82 ///////////   MEMORY ALLOCATION
83 
84 // normally stb_vorbis uses malloc() to allocate memory at startup,
85 // and alloca() to allocate temporary memory during a frame on the
86 // stack. (Memory consumption will depend on the amount of setup
87 // data in the file and how you set the compile flags for speed
88 // vs. size. In my test files the maximal-size usage is ~150KB.)
89 //
90 // You can modify the wrapper functions in the source (setup_malloc,
91 // setup_temp_malloc, temp_malloc) to change this behavior, or you
92 // can use a simpler allocation model: you pass in a buffer from
93 // which stb_vorbis will allocate _all_ its memory (including the
94 // temp memory). "open" may fail with a VORBIS_outofmem if you
95 // do not pass in enough data; there is no way to determine how
96 // much you do need except to succeed (at which point you can
97 // query get_info to find the exact amount required. yes I know
98 // this is lame).
99 //
100 // If you pass in a non-null buffer of the type below, allocation
101 // will occur from it as described above. Otherwise just pass null
102 // to use malloc()/alloca()
103 
104 public struct stb_vorbis_alloc {
105   ubyte* alloc_buffer;
106   int alloc_buffer_length_in_bytes;
107 }
108 
109 
110 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
111 
112 /*
113 public struct stb_vorbis_info {
114   uint sample_rate;
115   int channels;
116 
117   uint setup_memory_required;
118   uint setup_temp_memory_required;
119   uint temp_memory_required;
120 
121   int max_frame_size;
122 }
123 */
124 
125 
126 /* ************************************************************************** *
127 // get general information about the file
128 stb_vorbis_info stb_vorbis_get_info (VorbisDecoder f);
129 
130 // get the last error detected (clears it, too)
131 int stb_vorbis_get_error (VorbisDecoder f);
132 
133 // close an ogg vorbis file and free all memory in use
134 void stb_vorbis_close (VorbisDecoder f);
135 
136 // this function returns the offset (in samples) from the beginning of the
137 // file that will be returned by the next decode, if it is known, or -1
138 // otherwise. after a flush_pushdata() call, this may take a while before
139 // it becomes valid again.
140 // NOT WORKING YET after a seek with PULLDATA API
141 int stb_vorbis_get_sample_offset (VorbisDecoder f);
142 
143 // returns the current seek point within the file, or offset from the beginning
144 // of the memory buffer. In pushdata mode it returns 0.
145 uint stb_vorbis_get_file_offset (VorbisDecoder f);
146 
147 
148 ///////////   PUSHDATA API
149 
150 // this API allows you to get blocks of data from any source and hand
151 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
152 // you how much it used, and you have to give it the rest next time;
153 // and stb_vorbis may not have enough data to work with and you will
154 // need to give it the same data again PLUS more. Note that the Vorbis
155 // specification does not bound the size of an individual frame.
156 
157 // create a vorbis decoder by passing in the initial data block containing
158 //    the ogg&vorbis headers (you don't need to do parse them, just provide
159 //    the first N bytes of the file--you're told if it's not enough, see below)
160 // on success, returns an VorbisDecoder, does not set error, returns the amount of
161 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
162 // on failure, returns null on error and sets *error, does not change *datablock_memory_consumed
163 // if returns null and *error is VORBIS_need_more_data, then the input block was
164 //       incomplete and you need to pass in a larger block from the start of the file
165 VorbisDecoder stb_vorbis_open_pushdata (
166               ubyte* datablock, int datablock_length_in_bytes,
167               int* datablock_memory_consumed_in_bytes,
168               int* error,
169               stb_vorbis_alloc* alloc_buffer
170             );
171 
172 // decode a frame of audio sample data if possible from the passed-in data block
173 //
174 // return value: number of bytes we used from datablock
175 //
176 // possible cases:
177 //     0 bytes used, 0 samples output (need more data)
178 //     N bytes used, 0 samples output (resynching the stream, keep going)
179 //     N bytes used, M samples output (one frame of data)
180 // note that after opening a file, you will ALWAYS get one N-bytes, 0-sample
181 // frame, because Vorbis always "discards" the first frame.
182 //
183 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
184 // instead only datablock_length_in_bytes-3 or less. This is because it wants
185 // to avoid missing parts of a page header if they cross a datablock boundary,
186 // without writing state-machiney code to record a partial detection.
187 //
188 // The number of channels returned are stored in *channels (which can be
189 // null--it is always the same as the number of channels reported by
190 // get_info). *output will contain an array of float* buffers, one per
191 // channel. In other words, (*output)[0][0] contains the first sample from
192 // the first channel, and (*output)[1][0] contains the first sample from
193 // the second channel.
194 int stb_vorbis_decode_frame_pushdata (
195       VorbisDecoder f, ubyte* datablock, int datablock_length_in_bytes,
196       int* channels,   // place to write number of float * buffers
197       float*** output, // place to write float ** array of float * buffers
198       int* samples     // place to write number of output samples
199     );
200 
201 // inform stb_vorbis that your next datablock will not be contiguous with
202 // previous ones (e.g. you've seeked in the data); future attempts to decode
203 // frames will cause stb_vorbis to resynchronize (as noted above), and
204 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
205 // will begin decoding the _next_ frame.
206 //
207 // if you want to seek using pushdata, you need to seek in your file, then
208 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
209 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
210 // if you don't like the result, seek your file again and repeat.
211 void stb_vorbis_flush_pushdata (VorbisDecoder f);
212 
213 
214 //////////   PULLING INPUT API
215 
216 // This API assumes stb_vorbis is allowed to pull data from a source--
217 // either a block of memory containing the _entire_ vorbis stream, or a
218 // FILE* that you or it create, or possibly some other reading mechanism
219 // if you go modify the source to replace the FILE* case with some kind
220 // of callback to your code. (But if you don't support seeking, you may
221 // just want to go ahead and use pushdata.)
222 
223 // decode an entire file and output the data interleaved into a malloc()ed
224 // buffer stored in *output. The return value is the number of samples
225 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
226 // When you're done with it, just free() the pointer returned in *output.
227 int stb_vorbis_decode_filename (const(char)* filename, int* channels, int* sample_rate, short** output);
228 int stb_vorbis_decode_memory (const(ubyte)* mem, int len, int* channels, int* sample_rate, short** output);
229 
230 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
231 // this must be the entire stream!). on failure, returns null and sets *error
232 VorbisDecoder stb_vorbis_open_memory (const(ubyte)* data, int len, int* error, stb_vorbis_alloc* alloc_buffer);
233 
234 // create an ogg vorbis decoder from a filename via fopen(). on failure,
235 // returns null and sets *error (possibly to VORBIS_file_open_failure).
236 VorbisDecoder stb_vorbis_open_filename (const(char)* filename, int* error, stb_vorbis_alloc* alloc_buffer);
237 
238 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
239 // the _current_ seek point (ftell). on failure, returns null and sets *error.
240 // note that stb_vorbis must "own" this stream; if you seek it in between
241 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
242 // perform stb_vorbis_seek_*() operations on this file, it will assume it
243 // owns the _entire_ rest of the file after the start point. Use the next
244 // function, stb_vorbis_open_file_section(), to limit it.
245 VorbisDecoder stb_vorbis_open_file (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer);
246 
247 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
248 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
249 // on failure, returns null and sets *error. note that stb_vorbis must "own"
250 // this stream; if you seek it in between calls to stb_vorbis, it will become
251 // confused.
252 VorbisDecoder stb_vorbis_open_file_section (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer, uint len);
253 
254 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
255 // after calling seek_frame(), the next call to get_frame_*() will include
256 // the specified sample. after calling stb_vorbis_seek(), the next call to
257 // stb_vorbis_get_samples_* will start with the specified sample. If you
258 // do not need to seek to EXACTLY the target sample when using get_samples_*,
259 // you can also use seek_frame().
260 int stb_vorbis_seek_frame (VorbisDecoder f, uint sample_number);
261 int stb_vorbis_seek (VorbisDecoder f, uint sample_number);
262 
263 // this function is equivalent to stb_vorbis_seek(f, 0)
264 int stb_vorbis_seek_start (VorbisDecoder f);
265 
266 // these functions return the total length of the vorbis stream
267 uint stb_vorbis_stream_length_in_samples (VorbisDecoder f);
268 float stb_vorbis_stream_length_in_seconds (VorbisDecoder f);
269 
270 // decode the next frame and return the number of samples. the number of
271 // channels returned are stored in *channels (which can be null--it is always
272 // the same as the number of channels reported by get_info). *output will
273 // contain an array of float* buffers, one per channel. These outputs will
274 // be overwritten on the next call to stb_vorbis_get_frame_*.
275 //
276 // You generally should not intermix calls to stb_vorbis_get_frame_*()
277 // and stb_vorbis_get_samples_*(), since the latter calls the former.
278 int stb_vorbis_get_frame_float (VorbisDecoder f, int* channels, float*** output);
279 
280 // decode the next frame and return the number of *samples* per channel.
281 // Note that for interleaved data, you pass in the number of shorts (the
282 // size of your array), but the return value is the number of samples per
283 // channel, not the total number of samples.
284 //
285 // The data is coerced to the number of channels you request according to the
286 // channel coercion rules (see below). You must pass in the size of your
287 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
288 // The maximum buffer size needed can be gotten from get_info(); however,
289 // the Vorbis I specification implies an absolute maximum of 4096 samples
290 // per channel.
291 int stb_vorbis_get_frame_short_interleaved (VorbisDecoder f, int num_c, short* buffer, int num_shorts);
292 int stb_vorbis_get_frame_short (VorbisDecoder f, int num_c, short** buffer, int num_samples);
293 
294 // Channel coercion rules:
295 //    Let M be the number of channels requested, and N the number of channels present,
296 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
297 //    and stereo R be the sum of all R and center channels (channel assignment from the
298 //    vorbis spec).
299 //        M    N       output
300 //        1    k      sum(Ck) for all k
301 //        2    *      stereo L, stereo R
302 //        k    l      k > l, the first l channels, then 0s
303 //        k    l      k <= l, the first k channels
304 //    Note that this is not _good_ surround etc. mixing at all! It's just so
305 //    you get something useful.
306 
307 // gets num_samples samples, not necessarily on a frame boundary--this requires
308 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
309 // Returns the number of samples stored per channel; it may be less than requested
310 // at the end of the file. If there are no more samples in the file, returns 0.
311 int stb_vorbis_get_samples_float_interleaved (VorbisDecoder f, int channels, float* buffer, int num_floats);
312 int stb_vorbis_get_samples_float (VorbisDecoder f, int channels, float** buffer, int num_samples);
313 
314 // gets num_samples samples, not necessarily on a frame boundary--this requires
315 // buffering so you have to supply the buffers. Applies the coercion rules above
316 // to produce 'channels' channels. Returns the number of samples stored per channel;
317 // it may be less than requested at the end of the file. If there are no more
318 // samples in the file, returns 0.
319 int stb_vorbis_get_samples_short_interleaved (VorbisDecoder f, int channels, short* buffer, int num_shorts);
320 int stb_vorbis_get_samples_short (VorbisDecoder f, int channels, short** buffer, int num_samples);
321 */
322 
323 ////////   ERROR CODES
324 
325 public enum STBVorbisError {
326   no_error,
327 
328   need_more_data = 1,    // not a real error
329 
330   invalid_api_mixing,    // can't mix API modes
331   outofmem,              // not enough memory
332   feature_not_supported, // uses floor 0
333   too_many_channels,     // STB_VORBIS_MAX_CHANNELS is too small
334   file_open_failure,     // fopen() failed
335   seek_without_length,   // can't seek in unknown-length file
336 
337   unexpected_eof = 10,   // file is truncated?
338   seek_invalid,          // seek past EOF
339 
340   // decoding errors (corrupt/invalid stream) -- you probably
341   // don't care about the exact details of these
342 
343   // vorbis errors:
344   invalid_setup = 20,
345   invalid_stream,
346 
347   // ogg errors:
348   missing_capture_pattern = 30,
349   invalid_stream_structure_version,
350   continued_packet_flag_invalid,
351   incorrect_stream_serial_number,
352   invalid_first_page,
353   bad_packet_type,
354   cant_find_last_page,
355   seek_failed,
356 }
357 //
358 //  HEADER ENDS HERE
359 //
360 //////////////////////////////////////////////////////////////////////////////
361 
362 
363 // global configuration settings (e.g. set these in the project/makefile),
364 // or just set them in this file at the top (although ideally the first few
365 // should be visible when the header file is compiled too, although it's not
366 // crucial)
367 
368 // STB_VORBIS_NO_INTEGER_CONVERSION
369 //     does not compile the code for converting audio sample data from
370 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
371 //version = STB_VORBIS_NO_INTEGER_CONVERSION;
372 
373 // STB_VORBIS_NO_FAST_SCALED_FLOAT
374 //      does not use a fast float-to-int trick to accelerate float-to-int on
375 //      most platforms which requires endianness be defined correctly.
376 //version = STB_VORBIS_NO_FAST_SCALED_FLOAT;
377 
378 // STB_VORBIS_MAX_CHANNELS [number]
379 //     globally define this to the maximum number of channels you need.
380 //     The spec does not put a restriction on channels except that
381 //     the count is stored in a byte, so 255 is the hard limit.
382 //     Reducing this saves about 16 bytes per value, so using 16 saves
383 //     (255-16)*16 or around 4KB. Plus anything other memory usage
384 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
385 //     6 (5.1 audio), or 2 (stereo only).
386 enum STB_VORBIS_MAX_CHANNELS = 16; // enough for anyone?
387 
388 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
389 //     after a flush_pushdata(), stb_vorbis begins scanning for the
390 //     next valid page, without backtracking. when it finds something
391 //     that looks like a page, it streams through it and verifies its
392 //     CRC32. Should that validation fail, it keeps scanning. But it's
393 //     possible that _while_ streaming through to check the CRC32 of
394 //     one candidate page, it sees another candidate page. This #define
395 //     determines how many "overlapping" candidate pages it can search
396 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
397 //     garbage pages could be as big as 64KB, but probably average ~16KB.
398 //     So don't hose ourselves by scanning an apparent 64KB page and
399 //     missing a ton of real ones in the interim; so minimum of 2
400 enum STB_VORBIS_PUSHDATA_CRC_COUNT = 4;
401 
402 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
403 //     sets the log size of the huffman-acceleration table.  Maximum
404 //     supported value is 24. with larger numbers, more decodings are O(1),
405 //     but the table size is larger so worse cache missing, so you'll have
406 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
407 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
408 
409 // STB_VORBIS_FAST_BINARY_LENGTH [number]
410 //     sets the log size of the binary-search acceleration table. this
411 //     is used in similar fashion to the fast-huffman size to set initial
412 //     parameters for the binary search
413 
414 // STB_VORBIS_FAST_HUFFMAN_INT
415 //     The fast huffman tables are much more efficient if they can be
416 //     stored as 16-bit results instead of 32-bit results. This restricts
417 //     the codebooks to having only 65535 possible outcomes, though.
418 //     (At least, accelerated by the huffman table.)
419 //version = STB_VORBIS_FAST_HUFFMAN_INT;
420 version(STB_VORBIS_FAST_HUFFMAN_INT) {} else version = STB_VORBIS_FAST_HUFFMAN_SHORT;
421 
422 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
423 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
424 //     back on binary searching for the correct one. This requires storing
425 //     extra tables with the huffman codes in sorted order. Defining this
426 //     symbol trades off space for speed by forcing a linear search in the
427 //     non-fast case, except for "sparse" codebooks.
428 //version = STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH;
429 
430 // STB_VORBIS_DIVIDES_IN_RESIDUE
431 //     stb_vorbis precomputes the result of the scalar residue decoding
432 //     that would otherwise require a divide per chunk. you can trade off
433 //     space for time by defining this symbol.
434 //version = STB_VORBIS_DIVIDES_IN_RESIDUE;
435 
436 // STB_VORBIS_DIVIDES_IN_CODEBOOK
437 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
438 //     stored, or with all elements being chosen from a small range of values,
439 //     and all values possible in all elements. By default, stb_vorbis expands
440 //     this latter kind out to look like the former kind for ease of decoding,
441 //     because otherwise an integer divide-per-vector-element is required to
442 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
443 //     trade off storage for speed.
444 //version = STB_VORBIS_DIVIDES_IN_CODEBOOK;
445 
446 version(STB_VORBIS_CODEBOOK_SHORTS) static assert(0, "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats");
447 
448 // STB_VORBIS_DIVIDE_TABLE
449 //     this replaces small integer divides in the floor decode loop with
450 //     table lookups. made less than 1% difference, so disabled by default.
451 //version = STB_VORBIS_DIVIDE_TABLE;
452 
453 // STB_VORBIS_NO_DEFER_FLOOR
454 //     Normally we only decode the floor without synthesizing the actual
455 //     full curve. We can instead synthesize the curve immediately. This
456 //     requires more memory and is very likely slower, so I don't think
457 //     you'd ever want to do it except for debugging.
458 //version = STB_VORBIS_NO_DEFER_FLOOR;
459 //version(STB_VORBIS_CODEBOOK_FLOATS) static assert(0);
460 
461 
462 // ////////////////////////////////////////////////////////////////////////// //
463 private:
464 static assert(STB_VORBIS_MAX_CHANNELS <= 256, "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range");
465 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24, "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range");
466 
467 enum MAX_BLOCKSIZE_LOG = 13; // from specification
468 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
469 
470 
471 alias codetype = float;
472 
473 // @NOTE
474 //
475 // Some arrays below are tagged "//varies", which means it's actually
476 // a variable-sized piece of data, but rather than malloc I assume it's
477 // small enough it's better to just allocate it all together with the
478 // main thing
479 //
480 // Most of the variables are specified with the smallest size I could pack
481 // them into. It might give better performance to make them all full-sized
482 // integers. It should be safe to freely rearrange the structures or change
483 // the sizes larger--nothing relies on silently truncating etc., nor the
484 // order of variables.
485 
486 enum FAST_HUFFMAN_TABLE_SIZE = (1<<STB_VORBIS_FAST_HUFFMAN_LENGTH);
487 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE-1);
488 
489 struct Codebook {
490   int dimensions, entries;
491   ubyte* codeword_lengths;
492   float minimum_value;
493   float delta_value;
494   ubyte value_bits;
495   ubyte lookup_type;
496   ubyte sequence_p;
497   ubyte sparse;
498   uint lookup_values;
499   codetype* multiplicands;
500   uint *codewords;
501   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
502     short[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
503   } else {
504     int[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
505   }
506   uint* sorted_codewords;
507   int* sorted_values;
508   int sorted_entries;
509 }
510 
511 struct Floor0 {
512   ubyte order;
513   ushort rate;
514   ushort bark_map_size;
515   ubyte amplitude_bits;
516   ubyte amplitude_offset;
517   ubyte number_of_books;
518   ubyte[16] book_list; // varies
519 }
520 
521 struct Floor1 {
522   ubyte partitions;
523   ubyte[32] partition_class_list; // varies
524   ubyte[16] class_dimensions; // varies
525   ubyte[16] class_subclasses; // varies
526   ubyte[16] class_masterbooks; // varies
527   short[8][16] subclass_books; // varies
528   ushort[31*8+2] Xlist; // varies
529   ubyte[31*8+2] sorted_order;
530   ubyte[2][31*8+2] neighbors;
531   ubyte floor1_multiplier;
532   ubyte rangebits;
533   int values;
534 }
535 
536 union Floor {
537   Floor0 floor0;
538   Floor1 floor1;
539 }
540 
541 struct Residue {
542   uint begin, end;
543   uint part_size;
544   ubyte classifications;
545   ubyte classbook;
546   ubyte** classdata;
547   //int16 (*residue_books)[8];
548   short[8]* residue_books;
549 }
550 
551 struct MappingChannel {
552   ubyte magnitude;
553   ubyte angle;
554   ubyte mux;
555 }
556 
557 struct Mapping {
558   ushort coupling_steps;
559   MappingChannel* chan;
560   ubyte submaps;
561   ubyte[15] submap_floor; // varies
562   ubyte[15] submap_residue; // varies
563 }
564 
565 struct Mode {
566   ubyte blockflag;
567   ubyte mapping;
568   ushort windowtype;
569   ushort transformtype;
570 }
571 
572 struct CRCscan {
573   uint goal_crc;   // expected crc if match
574   int bytes_left;  // bytes left in packet
575   uint crc_so_far; // running crc
576   int bytes_done;  // bytes processed in _current_ chunk
577   uint sample_loc; // granule pos encoded in page
578 }
579 
580 struct ProbedPage {
581   uint page_start, page_end;
582   uint last_decoded_sample;
583 }
584 
585 private int error (VorbisDecoder f, STBVorbisError e) {
586   f.error = e;
587   if (!f.eof && e != STBVorbisError.need_more_data) {
588     f.error = e; // breakpoint for debugging
589   }
590   return 0;
591 }
592 
593 // these functions are used for allocating temporary memory
594 // while decoding. if you can afford the stack space, use
595 // alloca(); otherwise, provide a temp buffer and it will
596 // allocate out of those.
597 uint temp_alloc_save (VorbisDecoder f) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); return f.alloc.tempSave(f); }
598 void temp_alloc_restore (VorbisDecoder f, uint p) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); f.alloc.tempRestore(p, f); }
599 void temp_free (VorbisDecoder f, void* p) nothrow @nogc {}
600 /*
601 T* temp_alloc(T) (VorbisDecoder f, uint count) nothrow @nogc {
602   auto res = f.alloc.alloc(count*T.sizeof, f);
603   return cast(T*)res;
604 }
605 */
606 
607 /+
608 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
609 
610 // has to be a mixin, due to `alloca`
611 template temp_alloc(string size) {
612   enum temp_alloc = q{(f.alloc.alloc_buffer ? setup_temp_malloc(f, (${size})) : alloca(${size}))}.cmacroFixVars!("size")(size);
613 }
614 
615 // has to be a mixin, due to `alloca`
616 template temp_block_array(string count, string size) {
617   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
618     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
619 }
620 +/
621 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
622 
623 template temp_alloc(string size) {
624   enum temp_alloc = q{alloca(${size})}.cmacroFixVars!("size")(size);
625 }
626 
627 template temp_block_array(string count, string size) {
628   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
629     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
630 }
631 
632 /*
633 T** temp_block_array(T) (VorbisDecoder f, uint count, uint size) {
634   size *= T.sizeof;
635   auto mem = f.alloc.alloc(count*(void*).sizeof+size, f);
636   if (mem !is null) make_block_array(mem, count, size);
637   return cast(T**)mem;
638 }
639 */
640 
641 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
642 private void* make_block_array (void* mem, int count, int size) {
643   void** p = cast(void**)mem;
644   char* q = cast(char*)(p+count);
645   foreach (immutable i; 0..count) {
646     p[i] = q;
647     q += size;
648   }
649   return p;
650 }
651 
652 private T* setup_malloc(T) (VorbisDecoder f, uint sz) {
653   sz *= T.sizeof;
654   /*
655   f.setup_memory_required += sz;
656   if (f.alloc.alloc_buffer) {
657     void* p = cast(char*)f.alloc.alloc_buffer+f.setup_offset;
658     if (f.setup_offset+sz > f.temp_offset) return null;
659     f.setup_offset += sz;
660     return cast(T*)p;
661   }
662   */
663   auto res = f.alloc.alloc(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
664   if (res !is null) {
665     import core.stdc.string : memset;
666     memset(res, 0, sz+8);
667   }
668   return cast(T*)res;
669 }
670 
671 private void setup_free (VorbisDecoder f, void* p) {
672   //if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
673   if (p !is null) f.alloc.free(p, f);
674 }
675 
676 private void* setup_temp_malloc (VorbisDecoder f, uint sz) {
677   auto res = f.alloc.allocTemp(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
678   if (res !is null) {
679     import core.stdc.string : memset;
680     memset(res, 0, sz+8);
681   }
682   return res;
683 }
684 
685 private void setup_temp_free (VorbisDecoder f, void* p, uint sz) {
686   if (p !is null) f.alloc.freeTemp(p, (sz ? sz : 1)+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
687 }
688 
689 immutable uint[256] crc_table;
690 shared static this () {
691   enum CRC32_POLY = 0x04c11db7; // from spec
692   // init crc32 table
693   foreach (uint i; 0..256) {
694     uint s = i<<24;
695     foreach (immutable _; 0..8) s = (s<<1)^(s >= (1U<<31) ? CRC32_POLY : 0);
696     crc_table[i] = s;
697   }
698 }
699 
700 uint crc32_update (uint crc, ubyte b) {
701   static if (__VERSION__ > 2067) pragma(inline, true);
702   return (crc<<8)^crc_table[b^(crc>>24)];
703 }
704 
705 // used in setup, and for huffman that doesn't go fast path
706 private uint bit_reverse (uint n) {
707   static if (__VERSION__ > 2067) pragma(inline, true);
708   n = ((n&0xAAAAAAAA)>>1)|((n&0x55555555)<<1);
709   n = ((n&0xCCCCCCCC)>>2)|((n&0x33333333)<<2);
710   n = ((n&0xF0F0F0F0)>>4)|((n&0x0F0F0F0F)<<4);
711   n = ((n&0xFF00FF00)>>8)|((n&0x00FF00FF)<<8);
712   return (n>>16)|(n<<16);
713 }
714 
715 private float square (float x) {
716   static if (__VERSION__ > 2067) pragma(inline, true);
717   return x*x;
718 }
719 
720 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
721 // as required by the specification. fast(?) implementation from stb.h
722 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
723 immutable byte[16] log2_4 = [0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4];
724 private int ilog (int n) {
725   //static if (__VERSION__ > 2067) pragma(inline, true);
726   if (n < 0) return 0; // signed n returns 0
727   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
728   if (n < (1<<14)) {
729     if (n < (1<<4)) return 0+log2_4[n];
730     if (n < (1<<9)) return 5+log2_4[n>>5];
731     return 10+log2_4[n>>10];
732   } else if (n < (1<<24)) {
733     if (n < (1<<19)) return 15+log2_4[n>>15];
734     return 20+log2_4[n>>20];
735   } else {
736     if (n < (1<<29)) return 25+log2_4[n>>25];
737     return 30+log2_4[n>>30];
738   }
739 }
740 
741 
742 // code length assigned to a value with no huffman encoding
743 enum NO_CODE = 255;
744 
745 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
746 //
747 // these functions are only called at setup, and only a few times per file
748 private float float32_unpack (uint x) {
749   import core.math : ldexp;
750   //static if (__VERSION__ > 2067) pragma(inline, true);
751   // from the specification
752   uint mantissa = x&0x1fffff;
753   uint sign = x&0x80000000;
754   uint exp = (x&0x7fe00000)>>21;
755   double res = (sign ? -cast(double)mantissa : cast(double)mantissa);
756   return cast(float)ldexp(cast(float)res, exp-788);
757 }
758 
759 // zlib & jpeg huffman tables assume that the output symbols
760 // can either be arbitrarily arranged, or have monotonically
761 // increasing frequencies--they rely on the lengths being sorted;
762 // this makes for a very simple generation algorithm.
763 // vorbis allows a huffman table with non-sorted lengths. This
764 // requires a more sophisticated construction, since symbols in
765 // order do not map to huffman codes "in order".
766 private void add_entry (Codebook* c, uint huff_code, int symbol, int count, ubyte len, uint* values) {
767   if (!c.sparse) {
768     c.codewords[symbol] = huff_code;
769   } else {
770     c.codewords[count] = huff_code;
771     c.codeword_lengths[count] = len;
772     values[count] = symbol;
773   }
774 }
775 
776 private int compute_codewords (Codebook* c, ubyte* len, int n, uint* values) {
777   import core.stdc.string : memset;
778 
779   int i, k, m = 0;
780   uint[32] available;
781 
782   memset(available.ptr, 0, available.sizeof);
783   // find the first entry
784   for (k = 0; k < n; ++k) if (len[k] < NO_CODE) break;
785   if (k == n) { assert(c.sorted_entries == 0); return true; }
786   // add to the list
787   add_entry(c, 0, k, m++, len[k], values);
788   // add all available leaves
789   for (i = 1; i <= len[k]; ++i) available[i] = 1U<<(32-i);
790   // note that the above code treats the first case specially,
791   // but it's really the same as the following code, so they
792   // could probably be combined (except the initial code is 0,
793   // and I use 0 in available[] to mean 'empty')
794   for (i = k+1; i < n; ++i) {
795     uint res;
796     int z = len[i];
797     if (z == NO_CODE) continue;
798     // find lowest available leaf (should always be earliest,
799     // which is what the specification calls for)
800     // note that this property, and the fact we can never have
801     // more than one free leaf at a given level, isn't totally
802     // trivial to prove, but it seems true and the assert never
803     // fires, so!
804     while (z > 0 && !available[z]) --z;
805     if (z == 0) return false;
806     res = available[z];
807     assert(z >= 0 && z < 32);
808     available[z] = 0;
809     ubyte xxx = len[i];
810     add_entry(c,
811       bit_reverse(res),
812       i,
813       m++,
814       xxx, // dmd bug: it reads 4 bytes without temp
815       values);
816     // propogate availability up the tree
817     if (z != len[i]) {
818       assert(len[i] >= 0 && len[i] < 32);
819       for (int y = len[i]; y > z; --y) {
820         assert(available[y] == 0);
821         available[y] = res+(1<<(32-y));
822       }
823     }
824   }
825   return true;
826 }
827 
828 // accelerated huffman table allows fast O(1) match of all symbols
829 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
830 private void compute_accelerated_huffman (Codebook* c) {
831   //for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) c.fast_huffman.ptr[i] = -1;
832   c.fast_huffman.ptr[0..FAST_HUFFMAN_TABLE_SIZE] = -1;
833   auto len = (c.sparse ? c.sorted_entries : c.entries);
834   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
835     if (len > 32767) len = 32767; // largest possible value we can encode!
836   }
837   foreach (uint i; 0..len) {
838     if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
839       uint z = (c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]);
840       // set table entries for all bit combinations in the higher bits
841       while (z < FAST_HUFFMAN_TABLE_SIZE) {
842         c.fast_huffman.ptr[z] = cast(typeof(c.fast_huffman[0]))i; //k8
843         z += 1<<c.codeword_lengths[i];
844       }
845     }
846   }
847 }
848 
849 extern(C) int uint32_compare (const void* p, const void* q) {
850   uint x = *cast(uint*)p;
851   uint y = *cast(uint*)q;
852   return (x < y ? -1 : x > y);
853 }
854 
855 private int include_in_sort (Codebook* c, uint len) {
856   if (c.sparse) { assert(len != NO_CODE); return true; }
857   if (len == NO_CODE) return false;
858   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return true;
859   return false;
860 }
861 
862 // if the fast table above doesn't work, we want to binary
863 // search them... need to reverse the bits
864 private void compute_sorted_huffman (Codebook* c, ubyte* lengths, uint* values) {
865   // build a list of all the entries
866   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
867   // this is kind of a frivolous optimization--I don't see any performance improvement,
868   // but it's like 4 extra lines of code, so.
869   if (!c.sparse) {
870     int k = 0;
871     foreach (uint i; 0..c.entries) if (include_in_sort(c, lengths[i])) c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
872     assert(k == c.sorted_entries);
873   } else {
874     foreach (uint i; 0..c.sorted_entries) c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
875   }
876 
877   qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
878   c.sorted_codewords[c.sorted_entries] = 0xffffffff;
879 
880   auto len = (c.sparse ? c.sorted_entries : c.entries);
881   // now we need to indicate how they correspond; we could either
882   //   #1: sort a different data structure that says who they correspond to
883   //   #2: for each sorted entry, search the original list to find who corresponds
884   //   #3: for each original entry, find the sorted entry
885   // #1 requires extra storage, #2 is slow, #3 can use binary search!
886   foreach (uint i; 0..len) {
887     auto huff_len = (c.sparse ? lengths[values[i]] : lengths[i]);
888     if (include_in_sort(c, huff_len)) {
889       uint code = bit_reverse(c.codewords[i]);
890       int x = 0, n = c.sorted_entries;
891       while (n > 1) {
892         // invariant: sc[x] <= code < sc[x+n]
893         int m = x+(n>>1);
894         if (c.sorted_codewords[m] <= code) {
895           x = m;
896           n -= (n>>1);
897         } else {
898           n >>= 1;
899         }
900       }
901       assert(c.sorted_codewords[x] == code);
902       if (c.sparse) {
903         c.sorted_values[x] = values[i];
904         c.codeword_lengths[x] = huff_len;
905       } else {
906         c.sorted_values[x] = i;
907       }
908     }
909   }
910 }
911 
912 // only run while parsing the header (3 times)
913 private int vorbis_validate (const(void)* data) {
914   static if (__VERSION__ > 2067) pragma(inline, true);
915   immutable char[6] vorbis = "vorbis";
916   return ((cast(char*)data)[0..6] == vorbis[]);
917 }
918 
919 // called from setup only, once per code book
920 // (formula implied by specification)
921 private int lookup1_values (int entries, int dim) {
922   import core.stdc.math : lrintf;
923   import std.math : floor, exp, pow, log;
924   int r = cast(int)lrintf(floor(exp(cast(float)log(cast(float)entries)/dim)));
925   if (lrintf(floor(pow(cast(float)r+1, dim))) <= entries) ++r; // (int) cast for MinGW warning; floor() to avoid _ftol() when non-CRT
926   assert(pow(cast(float)r+1, dim) > entries);
927   assert(lrintf(floor(pow(cast(float)r, dim))) <= entries); // (int), floor() as above
928   return r;
929 }
930 
931 // called twice per file
932 private void compute_twiddle_factors (int n, float* A, float* B, float* C) {
933   import std.math : cos, sin, PI;
934   int n4 = n>>2, n8 = n>>3;
935   int k, k2;
936   for (k = k2 = 0; k < n4; ++k, k2 += 2) {
937     A[k2  ] = cast(float) cos(4*k*PI/n);
938     A[k2+1] = cast(float)-sin(4*k*PI/n);
939     B[k2  ] = cast(float) cos((k2+1)*PI/n/2)*0.5f;
940     B[k2+1] = cast(float) sin((k2+1)*PI/n/2)*0.5f;
941   }
942   for (k = k2 = 0; k < n8; ++k, k2 += 2) {
943     C[k2  ] = cast(float) cos(2*(k2+1)*PI/n);
944     C[k2+1] = cast(float)-sin(2*(k2+1)*PI/n);
945   }
946 }
947 
948 private void compute_window (int n, float* window) {
949   import std.math : sin, PI;
950   int n2 = n>>1;
951   foreach (int i; 0..n2) *window++ = cast(float)sin(0.5*PI*square(cast(float)sin((i-0+0.5)/n2*0.5*PI)));
952 }
953 
954 private void compute_bitreverse (int n, ushort* rev) {
955   int ld = ilog(n)-1; // ilog is off-by-one from normal definitions
956   int n8 = n>>3;
957   foreach (int i; 0..n8) *rev++ = cast(ushort)((bit_reverse(i)>>(32-ld+3))<<2); //k8
958 }
959 
960 private int init_blocksize (VorbisDecoder f, int b, int n) {
961   int n2 = n>>1, n4 = n>>2, n8 = n>>3;
962   f.A[b] = setup_malloc!float(f, n2);
963   f.B[b] = setup_malloc!float(f, n2);
964   f.C[b] = setup_malloc!float(f, n4);
965   if (f.A[b] is null || f.B[b] is null || f.C[b] is null) return error(f, STBVorbisError.outofmem);
966   compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
967   f.window[b] = setup_malloc!float(f, n2);
968   if (f.window[b] is null) return error(f, STBVorbisError.outofmem);
969   compute_window(n, f.window[b]);
970   f.bit_reverse[b] = setup_malloc!ushort(f, n8);
971   if (f.bit_reverse[b] is null) return error(f, STBVorbisError.outofmem);
972   compute_bitreverse(n, f.bit_reverse[b]);
973   return true;
974 }
975 
976 private void neighbors (ushort* x, int n, ushort* plow, ushort* phigh) {
977   int low = -1;
978   int high = 65536;
979   assert(n >= 0 && n <= ushort.max);
980   foreach (ushort i; 0..cast(ushort)n) {
981     if (x[i] > low  && x[i] < x[n]) { *plow = i; low = x[i]; }
982     if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
983   }
984 }
985 
986 // this has been repurposed so y is now the original index instead of y
987 struct Point {
988   ushort x, y;
989 }
990 
991 extern(C) int point_compare (const void *p, const void *q) {
992   auto a = cast(const(Point)*)p;
993   auto b = cast(const(Point)*)q;
994   return (a.x < b.x ? -1 : a.x > b.x);
995 }
996 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
997 
998 // ///////////////////////////////////////////////////////////////////// //
999 private ubyte get8 (VorbisDecoder f) {
1000   ubyte b = void;
1001   if (!f.eof) {
1002     if (f.rawRead((&b)[0..1]) != 1) { f.eof = true; b = 0; }
1003   }
1004   return b;
1005 }
1006 
1007 private uint get32 (VorbisDecoder f) {
1008   uint x = 0;
1009   if (!f.eof) {
1010     version(LittleEndian) {
1011       if (f.rawRead((&x)[0..1]) != x.sizeof) { f.eof = true; x = 0; }
1012     } else {
1013       x = get8(f);
1014       x |= cast(uint)get8(f)<<8;
1015       x |= cast(uint)get8(f)<<16;
1016       x |= cast(uint)get8(f)<<24;
1017     }
1018   }
1019   return x;
1020 }
1021 
1022 private bool getn (VorbisDecoder f, void* data, int n) {
1023   if (f.eof || n < 0) return false;
1024   if (n == 0) return true;
1025   if (f.rawRead(data[0..n]) != n) { f.eof = true; return false; }
1026   return true;
1027 }
1028 
1029 private void skip (VorbisDecoder f, int n) {
1030   if (f.eof || n <= 0) return;
1031   f.rawSkip(n);
1032 }
1033 
1034 private void set_file_offset (VorbisDecoder f, uint loc) {
1035   /+if (f.push_mode) return;+/
1036   f.eof = false;
1037   if (loc >= 0x80000000) { f.eof = true; return; }
1038   f.rawSeek(loc);
1039 }
1040 
1041 
1042 immutable char[4] ogg_page_header = "OggS"; //[ 0x4f, 0x67, 0x67, 0x53 ];
1043 
1044 private bool capture_pattern (VorbisDecoder f) {
1045   static if (__VERSION__ > 2067) pragma(inline, true);
1046   char[4] sign = void;
1047   if (!getn(f, sign.ptr, 4)) return false;
1048   return (sign == "OggS");
1049 }
1050 
1051 enum PAGEFLAG_continued_packet = 1;
1052 enum PAGEFLAG_first_page = 2;
1053 enum PAGEFLAG_last_page = 4;
1054 
1055 private int start_page_no_capturepattern (VorbisDecoder f) {
1056   uint loc0, loc1, n;
1057   // stream structure version
1058   if (get8(f) != 0) return error(f, STBVorbisError.invalid_stream_structure_version);
1059   // header flag
1060   f.page_flag = get8(f);
1061   // absolute granule position
1062   loc0 = get32(f);
1063   loc1 = get32(f);
1064   // @TODO: validate loc0, loc1 as valid positions?
1065   // stream serial number -- vorbis doesn't interleave, so discard
1066   get32(f);
1067   //if (f.serial != get32(f)) return error(f, STBVorbisError.incorrect_stream_serial_number);
1068   // page sequence number
1069   n = get32(f);
1070   f.last_page = n;
1071   // CRC32
1072   get32(f);
1073   // page_segments
1074   f.segment_count = get8(f);
1075   if (!getn(f, f.segments.ptr, f.segment_count)) return error(f, STBVorbisError.unexpected_eof);
1076   // assume we _don't_ know any the sample position of any segments
1077   f.end_seg_with_known_loc = -2;
1078   if (loc0 != ~0U || loc1 != ~0U) {
1079     int i;
1080     // determine which packet is the last one that will complete
1081     for (i = f.segment_count-1; i >= 0; --i) if (f.segments.ptr[i] < 255) break;
1082     // 'i' is now the index of the _last_ segment of a packet that ends
1083     if (i >= 0) {
1084       f.end_seg_with_known_loc = i;
1085       f.known_loc_for_packet = loc0;
1086     }
1087   }
1088   if (f.first_decode) {
1089     int len;
1090     ProbedPage p;
1091     len = 0;
1092     foreach (int i; 0..f.segment_count) len += f.segments.ptr[i];
1093     len += 27+f.segment_count;
1094     p.page_start = f.first_audio_page_offset;
1095     p.page_end = p.page_start+len;
1096     p.last_decoded_sample = loc0;
1097     f.p_first = p;
1098   }
1099   f.next_seg = 0;
1100   return true;
1101 }
1102 
1103 private int start_page (VorbisDecoder f) {
1104   if (!capture_pattern(f)) return error(f, STBVorbisError.missing_capture_pattern);
1105   return start_page_no_capturepattern(f);
1106 }
1107 
1108 private int start_packet (VorbisDecoder f) {
1109   while (f.next_seg == -1) {
1110     if (!start_page(f)) return false;
1111     if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.continued_packet_flag_invalid);
1112   }
1113   f.last_seg = false;
1114   f.valid_bits = 0;
1115   f.packet_bytes = 0;
1116   f.bytes_in_seg = 0;
1117   // f.next_seg is now valid
1118   return true;
1119 }
1120 
1121 private int maybe_start_packet (VorbisDecoder f) {
1122   if (f.next_seg == -1) {
1123     auto x = get8(f);
1124     if (f.eof) return false; // EOF at page boundary is not an error!
1125     if (0x4f != x      ) return error(f, STBVorbisError.missing_capture_pattern);
1126     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1127     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1128     if (0x53 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1129     if (!start_page_no_capturepattern(f)) return false;
1130     if (f.page_flag&PAGEFLAG_continued_packet) {
1131       // set up enough state that we can read this packet if we want,
1132       // e.g. during recovery
1133       f.last_seg = false;
1134       f.bytes_in_seg = 0;
1135       return error(f, STBVorbisError.continued_packet_flag_invalid);
1136     }
1137   }
1138   return start_packet(f);
1139 }
1140 
1141 private int next_segment (VorbisDecoder f) {
1142   if (f.last_seg) return 0;
1143   if (f.next_seg == -1) {
1144     f.last_seg_which = f.segment_count-1; // in case start_page fails
1145     if (!start_page(f)) { f.last_seg = 1; return 0; }
1146     if (!(f.page_flag&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.continued_packet_flag_invalid);
1147   }
1148   auto len = f.segments.ptr[f.next_seg++];
1149   if (len < 255) {
1150     f.last_seg = true;
1151     f.last_seg_which = f.next_seg-1;
1152   }
1153   if (f.next_seg >= f.segment_count) f.next_seg = -1;
1154   debug(stb_vorbis) assert(f.bytes_in_seg == 0);
1155   f.bytes_in_seg = len;
1156   return len;
1157 }
1158 
1159 enum EOP = (-1);
1160 enum INVALID_BITS = (-1);
1161 
1162 private int get8_packet_raw (VorbisDecoder f) {
1163   if (!f.bytes_in_seg) {  // CLANG!
1164     if (f.last_seg) return EOP;
1165     else if (!next_segment(f)) return EOP;
1166   }
1167   debug(stb_vorbis) assert(f.bytes_in_seg > 0);
1168   --f.bytes_in_seg;
1169   ++f.packet_bytes;
1170   return get8(f);
1171 }
1172 
1173 private int get8_packet (VorbisDecoder f) {
1174   int x = get8_packet_raw(f);
1175   f.valid_bits = 0;
1176   return x;
1177 }
1178 
1179 private uint get32_packet (VorbisDecoder f) {
1180   uint x = get8_packet(f), b;
1181   if (x == EOP) return EOP;
1182   if ((b = get8_packet(f)) == EOP) return EOP;
1183   x += b<<8;
1184   if ((b = get8_packet(f)) == EOP) return EOP;
1185   x += b<<16;
1186   if ((b = get8_packet(f)) == EOP) return EOP;
1187   x += b<<24;
1188   return x;
1189 }
1190 
1191 private void flush_packet (VorbisDecoder f) {
1192   while (get8_packet_raw(f) != EOP) {}
1193 }
1194 
1195 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1196 // as the huffman decoder?
1197 private uint get_bits_main (VorbisDecoder f, int n) {
1198   uint z;
1199   if (f.valid_bits < 0) return 0;
1200   if (f.valid_bits < n) {
1201     if (n > 24) {
1202       // the accumulator technique below would not work correctly in this case
1203       z = get_bits_main(f, 24);
1204       z += get_bits_main(f, n-24)<<24;
1205       return z;
1206     }
1207     if (f.valid_bits == 0) f.acc = 0;
1208     while (f.valid_bits < n) {
1209       z = get8_packet_raw(f);
1210       if (z == EOP) {
1211         f.valid_bits = INVALID_BITS;
1212         return 0;
1213       }
1214       f.acc += z<<f.valid_bits;
1215       f.valid_bits += 8;
1216     }
1217   }
1218   if (f.valid_bits < 0) return 0;
1219   z = f.acc&((1<<n)-1);
1220   f.acc >>= n;
1221   f.valid_bits -= n;
1222   return z;
1223 }
1224 
1225 // chooses minimal possible integer type
1226 private auto get_bits(ubyte n) (VorbisDecoder f) if (n >= 1 && n <= 64) {
1227   static if (n <= 8) return cast(ubyte)get_bits_main(f, n);
1228   else static if (n <= 16) return cast(ushort)get_bits_main(f, n);
1229   else static if (n <= 32) return cast(uint)get_bits_main(f, n);
1230   else static if (n <= 64) return cast(ulong)get_bits_main(f, n);
1231   else static assert(0, "wtf?!");
1232 }
1233 
1234 // chooses minimal possible integer type, assume no overflow
1235 private auto get_bits_add_no(ubyte n) (VorbisDecoder f, ubyte add) if (n >= 1 && n <= 64) {
1236   static if (n <= 8) return cast(ubyte)(get_bits_main(f, n)+add);
1237   else static if (n <= 16) return cast(ushort)(get_bits_main(f, n)+add);
1238   else static if (n <= 32) return cast(uint)(get_bits_main(f, n)+add);
1239   else static if (n <= 64) return cast(ulong)(get_bits_main(f, n)+add);
1240   else static assert(0, "wtf?!");
1241 }
1242 
1243 // @OPTIMIZE: primary accumulator for huffman
1244 // expand the buffer to as many bits as possible without reading off end of packet
1245 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1246 // e.g. cache them locally and decode locally
1247 //private /*__forceinline*/ void prep_huffman (VorbisDecoder f)
1248 enum PrepHuffmanMixin = q{
1249   if (f.valid_bits <= 24) {
1250     if (f.valid_bits == 0) f.acc = 0;
1251     int phmz = void;
1252     do {
1253       if (f.last_seg && !f.bytes_in_seg) break;
1254       phmz = get8_packet_raw(f);
1255       if (phmz == EOP) break;
1256       f.acc += cast(uint)phmz<<f.valid_bits;
1257       f.valid_bits += 8;
1258     } while (f.valid_bits <= 24);
1259   }
1260 };
1261 
1262 enum VorbisPacket {
1263   id = 1,
1264   comment = 3,
1265   setup = 5,
1266 }
1267 
1268 private int codebook_decode_scalar_raw (VorbisDecoder f, Codebook *c) {
1269   mixin(PrepHuffmanMixin);
1270 
1271   if (c.codewords is null && c.sorted_codewords is null) return -1;
1272   // cases to use binary search: sorted_codewords && !c.codewords
1273   //                             sorted_codewords && c.entries > 8
1274   auto cond = (c.entries > 8 ? c.sorted_codewords !is null : !c.codewords);
1275   if (cond) {
1276     // binary search
1277     uint code = bit_reverse(f.acc);
1278     int x = 0, n = c.sorted_entries, len;
1279     while (n > 1) {
1280       // invariant: sc[x] <= code < sc[x+n]
1281       int m = x+(n>>1);
1282       if (c.sorted_codewords[m] <= code) {
1283         x = m;
1284         n -= (n>>1);
1285       } else {
1286         n >>= 1;
1287       }
1288     }
1289     // x is now the sorted index
1290     if (!c.sparse) x = c.sorted_values[x];
1291     // x is now sorted index if sparse, or symbol otherwise
1292     len = c.codeword_lengths[x];
1293     if (f.valid_bits >= len) {
1294       f.acc >>= len;
1295       f.valid_bits -= len;
1296       return x;
1297     }
1298     f.valid_bits = 0;
1299     return -1;
1300   }
1301   // if small, linear search
1302   debug(stb_vorbis) assert(!c.sparse);
1303   foreach (uint i; 0..c.entries) {
1304     if (c.codeword_lengths[i] == NO_CODE) continue;
1305     if (c.codewords[i] == (f.acc&((1<<c.codeword_lengths[i])-1))) {
1306       if (f.valid_bits >= c.codeword_lengths[i]) {
1307         f.acc >>= c.codeword_lengths[i];
1308         f.valid_bits -= c.codeword_lengths[i];
1309         return i;
1310       }
1311       f.valid_bits = 0;
1312       return -1;
1313     }
1314   }
1315   error(f, STBVorbisError.invalid_stream);
1316   f.valid_bits = 0;
1317   return -1;
1318 }
1319 
1320 
1321 template DECODE_RAW(string var, string c) {
1322   enum DECODE_RAW = q{
1323     if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) { mixin(PrepHuffmanMixin); }
1324     // fast huffman table lookup
1325     ${i} = f.acc&FAST_HUFFMAN_TABLE_MASK;
1326     ${i} = ${c}.fast_huffman.ptr[${i}];
1327     if (${i} >= 0) {
1328       auto ${__temp_prefix__}n = ${c}.codeword_lengths[${i}];
1329       f.acc >>= ${__temp_prefix__}n;
1330       f.valid_bits -= ${__temp_prefix__}n;
1331       if (f.valid_bits < 0) { f.valid_bits = 0; ${i} = -1; }
1332     } else {
1333       ${i} = codebook_decode_scalar_raw(f, ${c});
1334     }
1335   }.cmacroFixVars!("i", "c")(var, c);
1336 }
1337 
1338 enum DECODE(string var, string c) = q{
1339   ${DECODE_RAW}
1340   if (${c}.sparse) ${var} = ${c}.sorted_values[${var}];
1341 }.cmacroFixVars!("var", "c", "DECODE_RAW")(var, c, DECODE_RAW!(var, c));
1342 
1343 
1344 version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1345   alias DECODE_VQ = DECODE;
1346 } else {
1347   alias DECODE_VQ = DECODE_RAW;
1348 }
1349 
1350 
1351 
1352 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1353 // where we avoid one addition
1354 enum CODEBOOK_ELEMENT(string c, string off) = "("~c~".multiplicands["~off~"])";
1355 enum CODEBOOK_ELEMENT_FAST(string c, string off) = "("~c~".multiplicands["~off~"])";
1356 enum CODEBOOK_ELEMENT_BASE(string c) = "(0)";
1357 
1358 
1359 private int codebook_decode_start (VorbisDecoder f, Codebook* c) {
1360   int z = -1;
1361   // type 0 is only legal in a scalar context
1362   if (c.lookup_type == 0) {
1363     error(f, STBVorbisError.invalid_stream);
1364   } else {
1365     mixin(DECODE_VQ!("z", "c"));
1366     debug(stb_vorbis) if (c.sparse) assert(z < c.sorted_entries);
1367     if (z < 0) {  // check for EOP
1368       if (!f.bytes_in_seg && f.last_seg) return z;
1369       error(f, STBVorbisError.invalid_stream);
1370     }
1371   }
1372   return z;
1373 }
1374 
1375 private int codebook_decode (VorbisDecoder f, Codebook* c, float* output, int len) {
1376   int z = codebook_decode_start(f, c);
1377   if (z < 0) return false;
1378   if (len > c.dimensions) len = c.dimensions;
1379 
1380   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1381     if (c.lookup_type == 1) {
1382       float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1383       int div = 1;
1384       foreach (immutable i; 0..len) {
1385         int off = (z/div)%c.lookup_values;
1386         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1387         output[i] += val;
1388         if (c.sequence_p) last = val+c.minimum_value;
1389         div *= c.lookup_values;
1390       }
1391       return true;
1392     }
1393   }
1394 
1395   z *= c.dimensions;
1396   if (c.sequence_p) {
1397     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1398     foreach (immutable i; 0..len) {
1399       float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1400       output[i] += val;
1401       last = val+c.minimum_value;
1402     }
1403   } else {
1404     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1405     foreach (immutable i; 0..len) output[i] += mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1406   }
1407 
1408   return true;
1409 }
1410 
1411 private int codebook_decode_step (VorbisDecoder f, Codebook* c, float* output, int len, int step) {
1412   int z = codebook_decode_start(f, c);
1413   float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1414   if (z < 0) return false;
1415   if (len > c.dimensions) len = c.dimensions;
1416 
1417   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1418     if (c.lookup_type == 1) {
1419       int div = 1;
1420       foreach (immutable i; 0..len) {
1421         int off = (z/div)%c.lookup_values;
1422         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1423         output[i*step] += val;
1424         if (c.sequence_p) last = val;
1425         div *= c.lookup_values;
1426       }
1427       return true;
1428     }
1429   }
1430 
1431   z *= c.dimensions;
1432   foreach (immutable i; 0..len) {
1433     float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1434     output[i*step] += val;
1435     if (c.sequence_p) last = val;
1436   }
1437 
1438   return true;
1439 }
1440 
1441 private int codebook_decode_deinterleave_repeat (VorbisDecoder f, Codebook* c, ref float*[STB_VORBIS_MAX_CHANNELS] outputs, int ch, int* c_inter_p, int* p_inter_p, int len, int total_decode) {
1442   int c_inter = *c_inter_p;
1443   int p_inter = *p_inter_p;
1444   int z, effective = c.dimensions;
1445 
1446   // type 0 is only legal in a scalar context
1447   if (c.lookup_type == 0) return error(f, STBVorbisError.invalid_stream);
1448 
1449   while (total_decode > 0) {
1450     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1451     mixin(DECODE_VQ!("z", "c"));
1452     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
1453       debug(stb_vorbis) assert(!c.sparse || z < c.sorted_entries);
1454     }
1455     if (z < 0) {
1456       if (!f.bytes_in_seg && f.last_seg) return false;
1457       return error(f, STBVorbisError.invalid_stream);
1458     }
1459 
1460     // if this will take us off the end of the buffers, stop short!
1461     // we check by computing the length of the virtual interleaved
1462     // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1463     // and the length we'll be using (effective)
1464     if (c_inter+p_inter*ch+effective > len*ch) effective = len*ch-(p_inter*ch-c_inter);
1465 
1466     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1467       if (c.lookup_type == 1) {
1468         int div = 1;
1469         foreach (immutable i; 0..effective) {
1470           int off = (z/div)%c.lookup_values;
1471           float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1472           if (outputs.ptr[c_inter]) outputs.ptr[c_inter].ptr[p_inter] += val;
1473           if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1474           if (c.sequence_p) last = val;
1475           div *= c.lookup_values;
1476         }
1477         goto skipit;
1478       }
1479     }
1480     z *= c.dimensions;
1481     if (c.sequence_p) {
1482       foreach (immutable i; 0..effective) {
1483         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1484         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1485         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1486         last = val;
1487       }
1488     } else {
1489       foreach (immutable i; 0..effective) {
1490         float val = mixin(CODEBOOK_ELEMENT_FAST!("c","z+i"))+last;
1491         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1492         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1493       }
1494     }
1495    skipit:
1496     total_decode -= effective;
1497   }
1498   *c_inter_p = c_inter;
1499   *p_inter_p = p_inter;
1500   return true;
1501 }
1502 
1503 //private int predict_point (int x, int x0, int x1, int y0, int y1)
1504 enum predict_point(string dest, string x, string x0, string x1, string y0, string y1) = q{{
1505   //import std.math : abs;
1506   int dy = ${y1}-${y0};
1507   int adx = ${x1}-${x0};
1508   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1509   int err = /*abs(dy)*/(dy < 0 ? -dy : dy)*(${x}-${x0});
1510   int off = err/adx;
1511   /*return*/${dest} = (dy < 0 ? ${y0}-off : ${y0}+off);
1512 }}.cmacroFixVars!("dest", "x", "x0", "x1", "y0", "y1")(dest, x, x0, x1, y0, y1);
1513 
1514 // the following table is block-copied from the specification
1515 immutable float[256] inverse_db_table = [
1516   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1517   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1518   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1519   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1520   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1521   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1522   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1523   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1524   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1525   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1526   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1527   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1528   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1529   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1530   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1531   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1532   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1533   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1534   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1535   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1536   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1537   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1538   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1539   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1540   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1541   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1542   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1543   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1544   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1545   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1546   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1547   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1548   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1549   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1550   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1551   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1552   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1553   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1554   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1555   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1556   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1557   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1558   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1559   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1560   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1561   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1562   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1563   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1564   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1565   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1566   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1567   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1568   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1569   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1570   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1571   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1572   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1573   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1574   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1575   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1576   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1577   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1578   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1579   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1580 ];
1581 
1582 
1583 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1584 // note that you must produce bit-identical output to decode correctly;
1585 // this specific sequence of operations is specified in the spec (it's
1586 // drawing integer-quantized frequency-space lines that the encoder
1587 // expects to be exactly the same)
1588 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1589 // have to divide in the setup? sigh.
1590 version(STB_VORBIS_NO_DEFER_FLOOR) {
1591   enum LINE_OP(string a, string b) = a~" = "~b~";";
1592 } else {
1593   enum LINE_OP(string a, string b) = a~" *= "~b~";";
1594 }
1595 
1596 version(STB_VORBIS_DIVIDE_TABLE) {
1597   enum DIVTAB_NUMER = 32;
1598   enum DIVTAB_DENOM = 64;
1599   byte[DIVTAB_DENOM][DIVTAB_NUMER] integer_divide_table; // 2KB
1600 }
1601 
1602 // nobranch abs trick
1603 enum ABS(string v) = q{(((${v})+((${v})>>31))^((${v})>>31))}.cmacroFixVars!"v"(v);
1604 
1605 // this is forceinline, but dmd inliner sux
1606 // but hey, i have my k00l macrosystem!
1607 //void draw_line (float* ${output}, int ${x0}, int ${y0}, int ${x1}, int ${y1}, int ${n})
1608 enum draw_line(string output, string x0, string y0, string x1, string y1, string n) = q{{
1609   int ${__temp_prefix__}dy = ${y1}-${y0};
1610   int ${__temp_prefix__}adx = ${x1}-${x0};
1611   int ${__temp_prefix__}ady = mixin(ABS!"${__temp_prefix__}dy");
1612   int ${__temp_prefix__}base;
1613   int ${__temp_prefix__}x = ${x0}, ${__temp_prefix__}y = ${y0};
1614   int ${__temp_prefix__}err = 0;
1615   int ${__temp_prefix__}sy;
1616 
1617   version(STB_VORBIS_DIVIDE_TABLE) {
1618     if (${__temp_prefix__}adx < DIVTAB_DENOM && ${__temp_prefix__}ady < DIVTAB_NUMER) {
1619       if (${__temp_prefix__}dy < 0) {
1620         ${__temp_prefix__}base = -integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1621         ${__temp_prefix__}sy = ${__temp_prefix__}base-1;
1622       } else {
1623         ${__temp_prefix__}base = integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1624         ${__temp_prefix__}sy = ${__temp_prefix__}base+1;
1625       }
1626     } else {
1627       ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1628       ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1629     }
1630   } else {
1631     ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1632     ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1633   }
1634   ${__temp_prefix__}ady -= mixin(ABS!"${__temp_prefix__}base")*${__temp_prefix__}adx;
1635   if (${x1} > ${n}) ${x1} = ${n};
1636   if (${__temp_prefix__}x < ${x1}) {
1637     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1638     for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1639       ${__temp_prefix__}err += ${__temp_prefix__}ady;
1640       if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1641         ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1642         ${__temp_prefix__}y += ${__temp_prefix__}sy;
1643       } else {
1644         ${__temp_prefix__}y += ${__temp_prefix__}base;
1645       }
1646       mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1647     }
1648   }
1649   /*
1650   mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1651   for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1652     ${__temp_prefix__}err += ${__temp_prefix__}ady;
1653     if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1654       ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1655       ${__temp_prefix__}y += ${__temp_prefix__}sy;
1656     } else {
1657       ${__temp_prefix__}y += ${__temp_prefix__}base;
1658     }
1659     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1660   }
1661   */
1662 }}.cmacroFixVars!("output", "x0", "y0", "x1", "y1", "n")(output, x0, y0, x1, y1, n);
1663 
1664 private int residue_decode (VorbisDecoder f, Codebook* book, float* target, int offset, int n, int rtype) {
1665   if (rtype == 0) {
1666     int step = n/book.dimensions;
1667     foreach (immutable k; 0..step) if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) return false;
1668   } else {
1669     for (int k = 0; k < n; ) {
1670       if (!codebook_decode(f, book, target+offset, n-k)) return false;
1671       k += book.dimensions;
1672       offset += book.dimensions;
1673     }
1674   }
1675   return true;
1676 }
1677 
1678 private void decode_residue (VorbisDecoder f, ref float*[STB_VORBIS_MAX_CHANNELS] residue_buffers, int ch, int n, int rn, ubyte* do_not_decode) {
1679   import core.stdc.stdlib : alloca;
1680   import core.stdc.string : memset;
1681 
1682   Residue* r = f.residue_config+rn;
1683   int rtype = f.residue_types.ptr[rn];
1684   int c = r.classbook;
1685   int classwords = f.codebooks[c].dimensions;
1686   int n_read = r.end-r.begin;
1687   int part_read = n_read/r.part_size;
1688   uint temp_alloc_point = temp_alloc_save(f);
1689   version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1690     int** classifications = cast(int**)mixin(temp_block_array!("f.vrchannels", "part_read*int.sizeof"));
1691   } else {
1692     ubyte*** part_classdata = cast(ubyte***)mixin(temp_block_array!("f.vrchannels", "part_read*cast(int)(ubyte*).sizeof"));
1693   }
1694 
1695   //stb_prof(2);
1696   foreach (immutable i; 0..ch) if (!do_not_decode[i]) memset(residue_buffers.ptr[i], 0, float.sizeof*n);
1697 
1698   if (rtype == 2 && ch != 1) {
1699     int j = void;
1700     for (j = 0; j < ch; ++j) if (!do_not_decode[j]) break;
1701     if (j == ch) goto done;
1702 
1703     //stb_prof(3);
1704     foreach (immutable pass; 0..8) {
1705       int pcount = 0, class_set = 0;
1706       if (ch == 2) {
1707         //stb_prof(13);
1708         while (pcount < part_read) {
1709           int z = r.begin+pcount*r.part_size;
1710           int c_inter = (z&1), p_inter = z>>1;
1711           if (pass == 0) {
1712             Codebook *cc = f.codebooks+r.classbook;
1713             int q;
1714             mixin(DECODE!("q", "cc"));
1715             if (q == EOP) goto done;
1716             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1717               for (int i = classwords-1; i >= 0; --i) {
1718                 classifications[0].ptr[i+pcount] = q%r.classifications;
1719                 q /= r.classifications;
1720               }
1721             } else {
1722               part_classdata[0][class_set] = r.classdata[q];
1723             }
1724           }
1725           //stb_prof(5);
1726           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1727             int zz = r.begin+pcount*r.part_size;
1728             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1729               int cc = classifications[0].ptr[pcount];
1730             } else {
1731               int cc = part_classdata[0][class_set][i];
1732             }
1733             int b = r.residue_books[cc].ptr[pass];
1734             if (b >= 0) {
1735               Codebook* book = f.codebooks+b;
1736               //stb_prof(20); // accounts for X time
1737               version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1738                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1739               } else {
1740                 // saves 1%
1741                 //if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r.part_size)) goto done; // according to C source
1742                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1743               }
1744               //stb_prof(7);
1745             } else {
1746               zz += r.part_size;
1747               c_inter = zz&1;
1748               p_inter = zz>>1;
1749             }
1750           }
1751           //stb_prof(8);
1752           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1753             ++class_set;
1754           }
1755         }
1756       } else if (ch == 1) {
1757         while (pcount < part_read) {
1758           int z = r.begin+pcount*r.part_size;
1759           int c_inter = 0, p_inter = z;
1760           if (pass == 0) {
1761             Codebook* cc = f.codebooks+r.classbook;
1762             int q;
1763             mixin(DECODE!("q", "cc"));
1764             if (q == EOP) goto done;
1765             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1766               for (int i = classwords-1; i >= 0; --i) {
1767                 classifications[0].ptr[i+pcount] = q%r.classifications;
1768                 q /= r.classifications;
1769               }
1770             } else {
1771               part_classdata[0][class_set] = r.classdata[q];
1772             }
1773           }
1774           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1775             int zz = r.begin+pcount*r.part_size;
1776             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1777               int cc = classifications[0].ptr[pcount];
1778             } else {
1779               int cc = part_classdata[0][class_set][i];
1780             }
1781             int b = r.residue_books[cc].ptr[pass];
1782             if (b >= 0) {
1783               Codebook* book = f.codebooks+b;
1784               //stb_prof(22);
1785               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1786               //stb_prof(3);
1787             } else {
1788               zz += r.part_size;
1789               c_inter = 0;
1790               p_inter = zz;
1791             }
1792           }
1793           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1794             ++class_set;
1795           }
1796         }
1797       } else {
1798         while (pcount < part_read) {
1799           int z = r.begin+pcount*r.part_size;
1800           int c_inter = z%ch, p_inter = z/ch;
1801           if (pass == 0) {
1802             Codebook* cc = f.codebooks+r.classbook;
1803             int q;
1804             mixin(DECODE!("q", "cc"));
1805             if (q == EOP) goto done;
1806             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1807               for (int i = classwords-1; i >= 0; --i) {
1808                 classifications[0].ptr[i+pcount] = q%r.classifications;
1809                 q /= r.classifications;
1810               }
1811             } else {
1812               part_classdata[0][class_set] = r.classdata[q];
1813             }
1814           }
1815           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1816             int zz = r.begin+pcount*r.part_size;
1817             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1818               int cc = classifications[0].ptr[pcount];
1819             } else {
1820               int cc = part_classdata[0][class_set][i];
1821             }
1822             int b = r.residue_books[cc].ptr[pass];
1823             if (b >= 0) {
1824               Codebook* book = f.codebooks+b;
1825               //stb_prof(22);
1826               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1827               //stb_prof(3);
1828             } else {
1829               zz += r.part_size;
1830               c_inter = zz%ch;
1831               p_inter = zz/ch;
1832             }
1833           }
1834           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1835             ++class_set;
1836           }
1837         }
1838       }
1839     }
1840     goto done;
1841   }
1842   //stb_prof(9);
1843 
1844   foreach (immutable pass; 0..8) {
1845     int pcount = 0, class_set=0;
1846     while (pcount < part_read) {
1847       if (pass == 0) {
1848         foreach (immutable j; 0..ch) {
1849           if (!do_not_decode[j]) {
1850             Codebook* cc = f.codebooks+r.classbook;
1851             int temp;
1852             mixin(DECODE!("temp", "cc"));
1853             if (temp == EOP) goto done;
1854             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1855               for (int i = classwords-1; i >= 0; --i) {
1856                 classifications[j].ptr[i+pcount] = temp%r.classifications;
1857                 temp /= r.classifications;
1858               }
1859             } else {
1860               part_classdata[j][class_set] = r.classdata[temp];
1861             }
1862           }
1863         }
1864       }
1865       for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1866         foreach (immutable j; 0..ch) {
1867           if (!do_not_decode[j]) {
1868             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1869               int cc = classifications[j].ptr[pcount];
1870             } else {
1871               int cc = part_classdata[j][class_set][i];
1872             }
1873             int b = r.residue_books[cc].ptr[pass];
1874             if (b >= 0) {
1875               float* target = residue_buffers.ptr[j];
1876               int offset = r.begin+pcount*r.part_size;
1877               int nn = r.part_size;
1878               Codebook* book = f.codebooks+b;
1879               if (!residue_decode(f, book, target, offset, nn, rtype)) goto done;
1880             }
1881           }
1882         }
1883       }
1884       version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1885         ++class_set;
1886       }
1887     }
1888   }
1889  done:
1890   //stb_prof(0);
1891   version(STB_VORBIS_DIVIDES_IN_RESIDUE) temp_free(f, classifications); else temp_free(f, part_classdata);
1892   temp_alloc_restore(f, temp_alloc_point);
1893 }
1894 
1895 
1896 // the following were split out into separate functions while optimizing;
1897 // they could be pushed back up but eh. __forceinline showed no change;
1898 // they're probably already being inlined.
1899 private void imdct_step3_iter0_loop (int n, float* e, int i_off, int k_off, float* A) {
1900   float* ee0 = e+i_off;
1901   float* ee2 = ee0+k_off;
1902   debug(stb_vorbis) assert((n&3) == 0);
1903   foreach (immutable _; 0..n>>2) {
1904     float k00_20, k01_21;
1905     k00_20 = ee0[ 0]-ee2[ 0];
1906     k01_21 = ee0[-1]-ee2[-1];
1907     ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0]+ee2[ 0];
1908     ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1]+ee2[-1];
1909     ee2[ 0] = k00_20*A[0]-k01_21*A[1];
1910     ee2[-1] = k01_21*A[0]+k00_20*A[1];
1911     A += 8;
1912 
1913     k00_20 = ee0[-2]-ee2[-2];
1914     k01_21 = ee0[-3]-ee2[-3];
1915     ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2]+ee2[-2];
1916     ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3]+ee2[-3];
1917     ee2[-2] = k00_20*A[0]-k01_21*A[1];
1918     ee2[-3] = k01_21*A[0]+k00_20*A[1];
1919     A += 8;
1920 
1921     k00_20 = ee0[-4]-ee2[-4];
1922     k01_21 = ee0[-5]-ee2[-5];
1923     ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4]+ee2[-4];
1924     ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5]+ee2[-5];
1925     ee2[-4] = k00_20*A[0]-k01_21*A[1];
1926     ee2[-5] = k01_21*A[0]+k00_20*A[1];
1927     A += 8;
1928 
1929     k00_20 = ee0[-6]-ee2[-6];
1930     k01_21 = ee0[-7]-ee2[-7];
1931     ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6]+ee2[-6];
1932     ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7]+ee2[-7];
1933     ee2[-6] = k00_20*A[0]-k01_21*A[1];
1934     ee2[-7] = k01_21*A[0]+k00_20*A[1];
1935     A += 8;
1936     ee0 -= 8;
1937     ee2 -= 8;
1938   }
1939 }
1940 
1941 private void imdct_step3_inner_r_loop (int lim, float* e, int d0, int k_off, float* A, int k1) {
1942   float k00_20, k01_21;
1943   float* e0 = e+d0;
1944   float* e2 = e0+k_off;
1945   foreach (immutable _; 0..lim>>2) {
1946     k00_20 = e0[-0]-e2[-0];
1947     k01_21 = e0[-1]-e2[-1];
1948     e0[-0] += e2[-0];//e0[-0] = e0[-0]+e2[-0];
1949     e0[-1] += e2[-1];//e0[-1] = e0[-1]+e2[-1];
1950     e2[-0] = (k00_20)*A[0]-(k01_21)*A[1];
1951     e2[-1] = (k01_21)*A[0]+(k00_20)*A[1];
1952 
1953     A += k1;
1954 
1955     k00_20 = e0[-2]-e2[-2];
1956     k01_21 = e0[-3]-e2[-3];
1957     e0[-2] += e2[-2];//e0[-2] = e0[-2]+e2[-2];
1958     e0[-3] += e2[-3];//e0[-3] = e0[-3]+e2[-3];
1959     e2[-2] = (k00_20)*A[0]-(k01_21)*A[1];
1960     e2[-3] = (k01_21)*A[0]+(k00_20)*A[1];
1961 
1962     A += k1;
1963 
1964     k00_20 = e0[-4]-e2[-4];
1965     k01_21 = e0[-5]-e2[-5];
1966     e0[-4] += e2[-4];//e0[-4] = e0[-4]+e2[-4];
1967     e0[-5] += e2[-5];//e0[-5] = e0[-5]+e2[-5];
1968     e2[-4] = (k00_20)*A[0]-(k01_21)*A[1];
1969     e2[-5] = (k01_21)*A[0]+(k00_20)*A[1];
1970 
1971     A += k1;
1972 
1973     k00_20 = e0[-6]-e2[-6];
1974     k01_21 = e0[-7]-e2[-7];
1975     e0[-6] += e2[-6];//e0[-6] = e0[-6]+e2[-6];
1976     e0[-7] += e2[-7];//e0[-7] = e0[-7]+e2[-7];
1977     e2[-6] = (k00_20)*A[0]-(k01_21)*A[1];
1978     e2[-7] = (k01_21)*A[0]+(k00_20)*A[1];
1979 
1980     e0 -= 8;
1981     e2 -= 8;
1982 
1983     A += k1;
1984   }
1985 }
1986 
1987 private void imdct_step3_inner_s_loop (int n, float* e, int i_off, int k_off, float* A, int a_off, int k0) {
1988   float A0 = A[0];
1989   float A1 = A[0+1];
1990   float A2 = A[0+a_off];
1991   float A3 = A[0+a_off+1];
1992   float A4 = A[0+a_off*2+0];
1993   float A5 = A[0+a_off*2+1];
1994   float A6 = A[0+a_off*3+0];
1995   float A7 = A[0+a_off*3+1];
1996   float k00, k11;
1997   float *ee0 = e  +i_off;
1998   float *ee2 = ee0+k_off;
1999   foreach (immutable _; 0..n) {
2000     k00 = ee0[ 0]-ee2[ 0];
2001     k11 = ee0[-1]-ee2[-1];
2002     ee0[ 0] = ee0[ 0]+ee2[ 0];
2003     ee0[-1] = ee0[-1]+ee2[-1];
2004     ee2[ 0] = (k00)*A0-(k11)*A1;
2005     ee2[-1] = (k11)*A0+(k00)*A1;
2006 
2007     k00 = ee0[-2]-ee2[-2];
2008     k11 = ee0[-3]-ee2[-3];
2009     ee0[-2] = ee0[-2]+ee2[-2];
2010     ee0[-3] = ee0[-3]+ee2[-3];
2011     ee2[-2] = (k00)*A2-(k11)*A3;
2012     ee2[-3] = (k11)*A2+(k00)*A3;
2013 
2014     k00 = ee0[-4]-ee2[-4];
2015     k11 = ee0[-5]-ee2[-5];
2016     ee0[-4] = ee0[-4]+ee2[-4];
2017     ee0[-5] = ee0[-5]+ee2[-5];
2018     ee2[-4] = (k00)*A4-(k11)*A5;
2019     ee2[-5] = (k11)*A4+(k00)*A5;
2020 
2021     k00 = ee0[-6]-ee2[-6];
2022     k11 = ee0[-7]-ee2[-7];
2023     ee0[-6] = ee0[-6]+ee2[-6];
2024     ee0[-7] = ee0[-7]+ee2[-7];
2025     ee2[-6] = (k00)*A6-(k11)*A7;
2026     ee2[-7] = (k11)*A6+(k00)*A7;
2027 
2028     ee0 -= k0;
2029     ee2 -= k0;
2030   }
2031 }
2032 
2033 // this was forceinline
2034 //void iter_54(float *z)
2035 enum iter_54(string z) = q{{
2036   auto ${__temp_prefix__}z = (${z});
2037   float ${__temp_prefix__}k00, ${__temp_prefix__}k11, ${__temp_prefix__}k22, ${__temp_prefix__}k33;
2038   float ${__temp_prefix__}y0, ${__temp_prefix__}y1, ${__temp_prefix__}y2, ${__temp_prefix__}y3;
2039 
2040   ${__temp_prefix__}k00 = ${__temp_prefix__}z[ 0]-${__temp_prefix__}z[-4];
2041   ${__temp_prefix__}y0  = ${__temp_prefix__}z[ 0]+${__temp_prefix__}z[-4];
2042   ${__temp_prefix__}y2  = ${__temp_prefix__}z[-2]+${__temp_prefix__}z[-6];
2043   ${__temp_prefix__}k22 = ${__temp_prefix__}z[-2]-${__temp_prefix__}z[-6];
2044 
2045   ${__temp_prefix__}z[-0] = ${__temp_prefix__}y0+${__temp_prefix__}y2;   // z0+z4+z2+z6
2046   ${__temp_prefix__}z[-2] = ${__temp_prefix__}y0-${__temp_prefix__}y2;   // z0+z4-z2-z6
2047 
2048   // done with ${__temp_prefix__}y0, ${__temp_prefix__}y2
2049 
2050   ${__temp_prefix__}k33 = ${__temp_prefix__}z[-3]-${__temp_prefix__}z[-7];
2051 
2052   ${__temp_prefix__}z[-4] = ${__temp_prefix__}k00+${__temp_prefix__}k33; // z0-z4+z3-z7
2053   ${__temp_prefix__}z[-6] = ${__temp_prefix__}k00-${__temp_prefix__}k33; // z0-z4-z3+z7
2054 
2055   // done with ${__temp_prefix__}k33
2056 
2057   ${__temp_prefix__}k11 = ${__temp_prefix__}z[-1]-${__temp_prefix__}z[-5];
2058   ${__temp_prefix__}y1  = ${__temp_prefix__}z[-1]+${__temp_prefix__}z[-5];
2059   ${__temp_prefix__}y3  = ${__temp_prefix__}z[-3]+${__temp_prefix__}z[-7];
2060 
2061   ${__temp_prefix__}z[-1] = ${__temp_prefix__}y1+${__temp_prefix__}y3;   // z1+z5+z3+z7
2062   ${__temp_prefix__}z[-3] = ${__temp_prefix__}y1-${__temp_prefix__}y3;   // z1+z5-z3-z7
2063   ${__temp_prefix__}z[-5] = ${__temp_prefix__}k11-${__temp_prefix__}k22; // z1-z5+z2-z6
2064   ${__temp_prefix__}z[-7] = ${__temp_prefix__}k11+${__temp_prefix__}k22; // z1-z5-z2+z6
2065 }}.cmacroFixVars!"z"(z);
2066 
2067 private void imdct_step3_inner_s_loop_ld654 (int n, float* e, int i_off, float* A, int base_n) {
2068   int a_off = base_n>>3;
2069   float A2 = A[0+a_off];
2070   float* z = e+i_off;
2071   float* base = z-16*n;
2072   float k00, k11;
2073   while (z > base) {
2074     k00   = z[-0]-z[-8];
2075     k11   = z[-1]-z[-9];
2076     z[-0] = z[-0]+z[-8];
2077     z[-1] = z[-1]+z[-9];
2078     z[-8] =  k00;
2079     z[-9] =  k11;
2080 
2081     k00    = z[ -2]-z[-10];
2082     k11    = z[ -3]-z[-11];
2083     z[ -2] = z[ -2]+z[-10];
2084     z[ -3] = z[ -3]+z[-11];
2085     z[-10] = (k00+k11)*A2;
2086     z[-11] = (k11-k00)*A2;
2087 
2088     k00    = z[-12]-z[ -4];  // reverse to avoid a unary negation
2089     k11    = z[ -5]-z[-13];
2090     z[ -4] = z[ -4]+z[-12];
2091     z[ -5] = z[ -5]+z[-13];
2092     z[-12] = k11;
2093     z[-13] = k00;
2094 
2095     k00    = z[-14]-z[ -6];  // reverse to avoid a unary negation
2096     k11    = z[ -7]-z[-15];
2097     z[ -6] = z[ -6]+z[-14];
2098     z[ -7] = z[ -7]+z[-15];
2099     z[-14] = (k00+k11)*A2;
2100     z[-15] = (k00-k11)*A2;
2101 
2102     mixin(iter_54!"z");
2103     mixin(iter_54!"z-8");
2104     z -= 16;
2105   }
2106 }
2107 
2108 private void inverse_mdct (float* buffer, int n, VorbisDecoder f, int blocktype) {
2109   import core.stdc.stdlib : alloca;
2110 
2111   int n2 = n>>1, n4 = n>>2, n8 = n>>3, l;
2112   int ld;
2113   // @OPTIMIZE: reduce register pressure by using fewer variables?
2114   int save_point = temp_alloc_save(f);
2115   float *buf2;
2116   buf2 = cast(float*)mixin(temp_alloc!("n2*float.sizeof"));
2117   float *u = null, v = null;
2118   // twiddle factors
2119   float *A = f.A.ptr[blocktype];
2120 
2121   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2122   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2123 
2124   // kernel from paper
2125 
2126 
2127   // merged:
2128   //   copy and reflect spectral data
2129   //   step 0
2130 
2131   // note that it turns out that the items added together during
2132   // this step are, in fact, being added to themselves (as reflected
2133   // by step 0). inexplicable inefficiency! this became obvious
2134   // once I combined the passes.
2135 
2136   // so there's a missing 'times 2' here (for adding X to itself).
2137   // this propogates through linearly to the end, where the numbers
2138   // are 1/2 too small, and need to be compensated for.
2139 
2140   {
2141     float* d, e, AA, e_stop;
2142     d = &buf2[n2-2];
2143     AA = A;
2144     e = &buffer[0];
2145     e_stop = &buffer[n2];
2146     while (e != e_stop) {
2147       d[1] = (e[0]*AA[0]-e[2]*AA[1]);
2148       d[0] = (e[0]*AA[1]+e[2]*AA[0]);
2149       d -= 2;
2150       AA += 2;
2151       e += 4;
2152     }
2153     e = &buffer[n2-3];
2154     while (d >= buf2) {
2155       d[1] = (-e[2]*AA[0]- -e[0]*AA[1]);
2156       d[0] = (-e[2]*AA[1]+ -e[0]*AA[0]);
2157       d -= 2;
2158       AA += 2;
2159       e -= 4;
2160     }
2161   }
2162 
2163   // now we use symbolic names for these, so that we can
2164   // possibly swap their meaning as we change which operations
2165   // are in place
2166 
2167   u = buffer;
2168   v = buf2;
2169 
2170   // step 2    (paper output is w, now u)
2171   // this could be in place, but the data ends up in the wrong
2172   // place... _somebody_'s got to swap it, so this is nominated
2173   {
2174     float* AA = &A[n2-8];
2175     float* d0, d1, e0, e1;
2176     e0 = &v[n4];
2177     e1 = &v[0];
2178     d0 = &u[n4];
2179     d1 = &u[0];
2180     while (AA >= A) {
2181       float v40_20, v41_21;
2182 
2183       v41_21 = e0[1]-e1[1];
2184       v40_20 = e0[0]-e1[0];
2185       d0[1]  = e0[1]+e1[1];
2186       d0[0]  = e0[0]+e1[0];
2187       d1[1]  = v41_21*AA[4]-v40_20*AA[5];
2188       d1[0]  = v40_20*AA[4]+v41_21*AA[5];
2189 
2190       v41_21 = e0[3]-e1[3];
2191       v40_20 = e0[2]-e1[2];
2192       d0[3]  = e0[3]+e1[3];
2193       d0[2]  = e0[2]+e1[2];
2194       d1[3]  = v41_21*AA[0]-v40_20*AA[1];
2195       d1[2]  = v40_20*AA[0]+v41_21*AA[1];
2196 
2197       AA -= 8;
2198 
2199       d0 += 4;
2200       d1 += 4;
2201       e0 += 4;
2202       e1 += 4;
2203     }
2204   }
2205 
2206   // step 3
2207   ld = ilog(n)-1; // ilog is off-by-one from normal definitions
2208 
2209   // optimized step 3:
2210 
2211   // the original step3 loop can be nested r inside s or s inside r;
2212   // it's written originally as s inside r, but this is dumb when r
2213   // iterates many times, and s few. So I have two copies of it and
2214   // switch between them halfway.
2215 
2216   // this is iteration 0 of step 3
2217   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*0, -(n>>3), A);
2218   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*1, -(n>>3), A);
2219 
2220   // this is iteration 1 of step 3
2221   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*0, -(n>>4), A, 16);
2222   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*1, -(n>>4), A, 16);
2223   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*2, -(n>>4), A, 16);
2224   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*3, -(n>>4), A, 16);
2225 
2226   l = 2;
2227   for (; l < (ld-3)>>1; ++l) {
2228     int k0 = n>>(l+2), k0_2 = k0>>1;
2229     int lim = 1<<(l+1);
2230     foreach (int i; 0..lim) imdct_step3_inner_r_loop(n>>(l+4), u, n2-1-k0*i, -k0_2, A, 1<<(l+3));
2231   }
2232 
2233   for (; l < ld-6; ++l) {
2234     int k0 = n>>(l+2), k1 = 1<<(l+3), k0_2 = k0>>1;
2235     int rlim = n>>(l+6);
2236     int lim = 1<<(l+1);
2237     int i_off;
2238     float *A0 = A;
2239     i_off = n2-1;
2240     foreach (immutable _; 0..rlim) {
2241       imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2242       A0 += k1*4;
2243       i_off -= 8;
2244     }
2245   }
2246 
2247   // iterations with count:
2248   //   ld-6,-5,-4 all interleaved together
2249   //       the big win comes from getting rid of needless flops
2250   //         due to the constants on pass 5 & 4 being all 1 and 0;
2251   //       combining them to be simultaneous to improve cache made little difference
2252   imdct_step3_inner_s_loop_ld654(n>>5, u, n2-1, A, n);
2253 
2254   // output is u
2255 
2256   // step 4, 5, and 6
2257   // cannot be in-place because of step 5
2258   {
2259     ushort *bitrev = f.bit_reverse.ptr[blocktype];
2260     // weirdly, I'd have thought reading sequentially and writing
2261     // erratically would have been better than vice-versa, but in
2262     // fact that's not what my testing showed. (That is, with
2263     // j = bitreverse(i), do you read i and write j, or read j and write i.)
2264     float *d0 = &v[n4-4];
2265     float *d1 = &v[n2-4];
2266     int k4;
2267     while (d0 >= v) {
2268       k4 = bitrev[0];
2269       d1[3] = u[k4+0];
2270       d1[2] = u[k4+1];
2271       d0[3] = u[k4+2];
2272       d0[2] = u[k4+3];
2273 
2274       k4 = bitrev[1];
2275       d1[1] = u[k4+0];
2276       d1[0] = u[k4+1];
2277       d0[1] = u[k4+2];
2278       d0[0] = u[k4+3];
2279 
2280       d0 -= 4;
2281       d1 -= 4;
2282       bitrev += 2;
2283     }
2284   }
2285   // (paper output is u, now v)
2286 
2287 
2288   // data must be in buf2
2289   debug(stb_vorbis) assert(v == buf2);
2290 
2291   // step 7   (paper output is v, now v)
2292   // this is now in place
2293   {
2294     float a02, a11, b0, b1, b2, b3;
2295     float* C = f.C.ptr[blocktype];
2296     float* d, e;
2297     d = v;
2298     e = v+n2-4;
2299     while (d < e) {
2300       a02 = d[0]-e[2];
2301       a11 = d[1]+e[3];
2302 
2303       b0 = C[1]*a02+C[0]*a11;
2304       b1 = C[1]*a11-C[0]*a02;
2305 
2306       b2 = d[0]+e[ 2];
2307       b3 = d[1]-e[ 3];
2308 
2309       d[0] = b2+b0;
2310       d[1] = b3+b1;
2311       e[2] = b2-b0;
2312       e[3] = b1-b3;
2313 
2314       a02 = d[2]-e[0];
2315       a11 = d[3]+e[1];
2316 
2317       b0 = C[3]*a02+C[2]*a11;
2318       b1 = C[3]*a11-C[2]*a02;
2319 
2320       b2 = d[2]+e[ 0];
2321       b3 = d[3]-e[ 1];
2322 
2323       d[2] = b2+b0;
2324       d[3] = b3+b1;
2325       e[0] = b2-b0;
2326       e[1] = b1-b3;
2327 
2328       C += 4;
2329       d += 4;
2330       e -= 4;
2331     }
2332   }
2333 
2334   // data must be in buf2
2335 
2336 
2337   // step 8+decode   (paper output is X, now buffer)
2338   // this generates pairs of data a la 8 and pushes them directly through
2339   // the decode kernel (pushing rather than pulling) to avoid having
2340   // to make another pass later
2341 
2342   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2343   {
2344     float p0, p1, p2, p3;
2345     float* d0, d1, d2, d3;
2346     float* B = f.B.ptr[blocktype]+n2-8;
2347     float* e = buf2+n2-8;
2348     d0 = &buffer[0];
2349     d1 = &buffer[n2-4];
2350     d2 = &buffer[n2];
2351     d3 = &buffer[n-4];
2352     while (e >= v) {
2353       p3 =  e[6]*B[7]-e[7]*B[6];
2354       p2 = -e[6]*B[6]-e[7]*B[7];
2355 
2356       d0[0] =   p3;
2357       d1[3] =  -p3;
2358       d2[0] =   p2;
2359       d3[3] =   p2;
2360 
2361       p1 =  e[4]*B[5]-e[5]*B[4];
2362       p0 = -e[4]*B[4]-e[5]*B[5];
2363 
2364       d0[1] =   p1;
2365       d1[2] = - p1;
2366       d2[1] =   p0;
2367       d3[2] =   p0;
2368 
2369       p3 =  e[2]*B[3]-e[3]*B[2];
2370       p2 = -e[2]*B[2]-e[3]*B[3];
2371 
2372       d0[2] =   p3;
2373       d1[1] = - p3;
2374       d2[2] =   p2;
2375       d3[1] =   p2;
2376 
2377       p1 =  e[0]*B[1]-e[1]*B[0];
2378       p0 = -e[0]*B[0]-e[1]*B[1];
2379 
2380       d0[3] =   p1;
2381       d1[0] = - p1;
2382       d2[3] =   p0;
2383       d3[0] =   p0;
2384 
2385       B -= 8;
2386       e -= 8;
2387       d0 += 4;
2388       d2 += 4;
2389       d1 -= 4;
2390       d3 -= 4;
2391     }
2392   }
2393 
2394   temp_free(f, buf2);
2395   temp_alloc_restore(f, save_point);
2396 }
2397 
2398 private float *get_window (VorbisDecoder f, int len) {
2399   len <<= 1;
2400   if (len == f.blocksize_0) return f.window.ptr[0];
2401   if (len == f.blocksize_1) return f.window.ptr[1];
2402   assert(0);
2403 }
2404 
2405 version(STB_VORBIS_NO_DEFER_FLOOR) {
2406   alias YTYPE = int;
2407 } else {
2408   alias YTYPE = short;
2409 }
2410 
2411 private int do_floor (VorbisDecoder f, Mapping* map, int i, int n, float* target, YTYPE* finalY, ubyte* step2_flag) {
2412   int n2 = n>>1;
2413   int s = map.chan[i].mux, floor;
2414   floor = map.submap_floor.ptr[s];
2415   if (f.floor_types.ptr[floor] == 0) {
2416     return error(f, STBVorbisError.invalid_stream);
2417   } else {
2418     Floor1* g = &f.floor_config[floor].floor1;
2419     int lx = 0, ly = finalY[0]*g.floor1_multiplier;
2420     foreach (immutable q; 1..g.values) {
2421       int j = g.sorted_order.ptr[q];
2422       version(STB_VORBIS_NO_DEFER_FLOOR) {
2423         auto cond = step2_flag[j];
2424       } else {
2425         auto cond = (finalY[j] >= 0);
2426       }
2427       if (cond) {
2428         int hy = finalY[j]*g.floor1_multiplier;
2429         int hx = g.Xlist.ptr[j];
2430         if (lx != hx) { mixin(draw_line!("target", "lx", "ly", "hx", "hy", "n2")); }
2431         lx = hx; ly = hy;
2432       }
2433     }
2434     if (lx < n2) {
2435       // optimization of: draw_line(target, lx, ly, n, ly, n2);
2436       foreach (immutable j; lx..n2) { mixin(LINE_OP!("target[j]", "inverse_db_table[ly]")); }
2437     }
2438   }
2439   return true;
2440 }
2441 
2442 // The meaning of "left" and "right"
2443 //
2444 // For a given frame:
2445 //     we compute samples from 0..n
2446 //     window_center is n/2
2447 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2448 //     all of the samples from left_end to right_start can be output without mixing; however,
2449 //        this interval is 0-length except when transitioning between short and long frames
2450 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2451 //        which we don't have, so those get saved in a buffer
2452 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2453 //        has to be the same as frame N+1's left_end-left_start (which they are by
2454 //        construction)
2455 
2456 private int vorbis_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
2457   Mode *m;
2458   int i, n, prev, next, window_center;
2459   f.channel_buffer_start = f.channel_buffer_end = 0;
2460 
2461  retry:
2462   if (f.eof) return false;
2463   if (!maybe_start_packet(f)) return false;
2464   // check packet type
2465   if (get_bits!1(f) != 0) {
2466     /+if (f.push_mode) return error(f, STBVorbisError.bad_packet_type);+/
2467     while (EOP != get8_packet(f)) {}
2468     goto retry;
2469   }
2470 
2471   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2472 
2473   i = get_bits_main(f, ilog(f.mode_count-1));
2474   if (i == EOP) return false;
2475   if (i >= f.mode_count) return false;
2476   *mode = i;
2477   m = f.mode_config.ptr+i;
2478   if (m.blockflag) {
2479     n = f.blocksize_1;
2480     prev = get_bits!1(f);
2481     next = get_bits!1(f);
2482   } else {
2483     prev = next = 0;
2484     n = f.blocksize_0;
2485   }
2486 
2487   // WINDOWING
2488   window_center = n>>1;
2489   if (m.blockflag && !prev) {
2490     *p_left_start = (n-f.blocksize_0)>>2;
2491     *p_left_end   = (n+f.blocksize_0)>>2;
2492   } else {
2493     *p_left_start = 0;
2494     *p_left_end   = window_center;
2495   }
2496   if (m.blockflag && !next) {
2497     *p_right_start = (n*3-f.blocksize_0)>>2;
2498     *p_right_end   = (n*3+f.blocksize_0)>>2;
2499   } else {
2500     *p_right_start = window_center;
2501     *p_right_end   = n;
2502   }
2503   return true;
2504 }
2505 
2506 private int vorbis_decode_packet_rest (VorbisDecoder f, int* len, Mode* m, int left_start, int left_end, int right_start, int right_end, int* p_left) {
2507   import core.stdc.string : memcpy, memset;
2508 
2509   Mapping* map;
2510   int n, n2;
2511   int[256] zero_channel;
2512   int[256] really_zero_channel;
2513 
2514   // WINDOWING
2515   n = f.blocksize.ptr[m.blockflag];
2516   map = &f.mapping[m.mapping];
2517 
2518   // FLOORS
2519   n2 = n>>1;
2520 
2521   //stb_prof(1);
2522   foreach (immutable i; 0..f.vrchannels) {
2523     int s = map.chan[i].mux, floor;
2524     zero_channel[i] = false;
2525     floor = map.submap_floor.ptr[s];
2526     if (f.floor_types.ptr[floor] == 0) {
2527       return error(f, STBVorbisError.invalid_stream);
2528     } else {
2529       Floor1* g = &f.floor_config[floor].floor1;
2530       if (get_bits!1(f)) {
2531         short* finalY;
2532         ubyte[256] step2_flag = void;
2533         immutable int[4] range_list = [ 256, 128, 86, 64 ];
2534         int range = range_list[g.floor1_multiplier-1];
2535         int offset = 2;
2536         finalY = f.finalY.ptr[i];
2537         finalY[0] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2538         finalY[1] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2539         foreach (immutable j; 0..g.partitions) {
2540           int pclass = g.partition_class_list.ptr[j];
2541           int cdim = g.class_dimensions.ptr[pclass];
2542           int cbits = g.class_subclasses.ptr[pclass];
2543           int csub = (1<<cbits)-1;
2544           int cval = 0;
2545           if (cbits) {
2546             Codebook *cc = f.codebooks+g.class_masterbooks.ptr[pclass];
2547             mixin(DECODE!("cval", "cc"));
2548           }
2549           foreach (immutable k; 0..cdim) {
2550             int book = g.subclass_books.ptr[pclass].ptr[cval&csub];
2551             cval = cval>>cbits;
2552             if (book >= 0) {
2553               int temp;
2554               Codebook *cc = f.codebooks+book;
2555               mixin(DECODE!("temp", "cc"));
2556               finalY[offset++] = cast(short)temp; //k8
2557             } else {
2558               finalY[offset++] = 0;
2559             }
2560           }
2561         }
2562         if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2563         step2_flag[0] = step2_flag[1] = 1;
2564         foreach (immutable j; 2..g.values) {
2565           int low = g.neighbors.ptr[j].ptr[0];
2566           int high = g.neighbors.ptr[j].ptr[1];
2567           //neighbors(g.Xlist, j, &low, &high);
2568           int pred = void;
2569           mixin(predict_point!("pred", "g.Xlist.ptr[j]", "g.Xlist.ptr[low]", "g.Xlist.ptr[high]", "finalY[low]", "finalY[high]"));
2570           int val = finalY[j];
2571           int highroom = range-pred;
2572           int lowroom = pred;
2573           auto room = (highroom < lowroom ? highroom : lowroom)*2;
2574           if (val) {
2575             step2_flag[low] = step2_flag[high] = 1;
2576             step2_flag[j] = 1;
2577             if (val >= room) {
2578               finalY[j] = cast(short)(highroom > lowroom ? val-lowroom+pred : pred-val+highroom-1); //k8
2579             } else {
2580               finalY[j] = cast(short)(val&1 ? pred-((val+1)>>1) : pred+(val>>1)); //k8
2581             }
2582           } else {
2583             step2_flag[j] = 0;
2584             finalY[j] = cast(short)pred; //k8
2585           }
2586         }
2587 
2588         version(STB_VORBIS_NO_DEFER_FLOOR) {
2589           do_floor(f, map, i, n, f.floor_buffers.ptr[i], finalY, step2_flag);
2590         } else {
2591           // defer final floor computation until _after_ residue
2592           foreach (immutable j; 0..g.values) if (!step2_flag[j]) finalY[j] = -1;
2593         }
2594       } else {
2595   error:
2596         zero_channel[i] = true;
2597       }
2598       // So we just defer everything else to later
2599       // at this point we've decoded the floor into buffer
2600     }
2601   }
2602   //stb_prof(0);
2603   // at this point we've decoded all floors
2604 
2605   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2606 
2607   // re-enable coupled channels if necessary
2608   memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof*f.vrchannels);
2609   foreach (immutable i; 0..map.coupling_steps) {
2610     if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2611       zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = false;
2612     }
2613   }
2614 
2615   // RESIDUE DECODE
2616   foreach (immutable i; 0..map.submaps) {
2617     float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2618     ubyte[256] do_not_decode = void;
2619     int ch = 0;
2620     foreach (immutable j; 0..f.vrchannels) {
2621       if (map.chan[j].mux == i) {
2622         if (zero_channel[j]) {
2623           do_not_decode[ch] = true;
2624           residue_buffers.ptr[ch] = null;
2625         } else {
2626           do_not_decode[ch] = false;
2627           residue_buffers.ptr[ch] = f.channel_buffers.ptr[j];
2628         }
2629         ++ch;
2630       }
2631     }
2632     int r = map.submap_residue.ptr[i];
2633     decode_residue(f, residue_buffers, ch, n2, r, do_not_decode.ptr);
2634   }
2635 
2636   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2637 
2638    // INVERSE COUPLING
2639   //stb_prof(14);
2640   foreach_reverse (immutable i; 0..map.coupling_steps) {
2641     int n2n = n>>1;
2642     float* mm = f.channel_buffers.ptr[map.chan[i].magnitude];
2643     float* a = f.channel_buffers.ptr[map.chan[i].angle];
2644     foreach (immutable j; 0..n2n) {
2645       float a2, m2;
2646       if (mm[j] > 0) {
2647         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]-a[j]; } else { a2 = mm[j]; m2 = mm[j]+a[j]; }
2648       } else {
2649         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]+a[j]; } else { a2 = mm[j]; m2 = mm[j]-a[j]; }
2650       }
2651       mm[j] = m2;
2652       a[j] = a2;
2653     }
2654   }
2655 
2656   // finish decoding the floors
2657   version(STB_VORBIS_NO_DEFER_FLOOR) {
2658     foreach (immutable i; 0..f.vrchannels) {
2659       if (really_zero_channel[i]) {
2660         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2661       } else {
2662         foreach (immutable j; 0..n2) f.channel_buffers.ptr[i].ptr[j] *= f.floor_buffers.ptr[i].ptr[j];
2663       }
2664     }
2665   } else {
2666     //stb_prof(15);
2667     foreach (immutable i; 0..f.vrchannels) {
2668       if (really_zero_channel[i]) {
2669         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2670       } else {
2671         do_floor(f, map, i, n, f.channel_buffers.ptr[i], f.finalY.ptr[i], null);
2672       }
2673     }
2674   }
2675 
2676   // INVERSE MDCT
2677   //stb_prof(16);
2678   foreach (immutable i; 0..f.vrchannels) inverse_mdct(f.channel_buffers.ptr[i], n, f, m.blockflag);
2679   //stb_prof(0);
2680 
2681   // this shouldn't be necessary, unless we exited on an error
2682   // and want to flush to get to the next packet
2683   flush_packet(f);
2684 
2685   if (f.first_decode) {
2686     // assume we start so first non-discarded sample is sample 0
2687     // this isn't to spec, but spec would require us to read ahead
2688     // and decode the size of all current frames--could be done,
2689     // but presumably it's not a commonly used feature
2690     f.current_loc = -n2; // start of first frame is positioned for discard
2691     // we might have to discard samples "from" the next frame too,
2692     // if we're lapping a large block then a small at the start?
2693     f.discard_samples_deferred = n-right_end;
2694     f.current_loc_valid = true;
2695     f.first_decode = false;
2696   } else if (f.discard_samples_deferred) {
2697     if (f.discard_samples_deferred >= right_start-left_start) {
2698       f.discard_samples_deferred -= (right_start-left_start);
2699       left_start = right_start;
2700       *p_left = left_start;
2701     } else {
2702       left_start += f.discard_samples_deferred;
2703       *p_left = left_start;
2704       f.discard_samples_deferred = 0;
2705     }
2706   } else if (f.previous_length == 0 && f.current_loc_valid) {
2707     // we're recovering from a seek... that means we're going to discard
2708     // the samples from this packet even though we know our position from
2709     // the last page header, so we need to update the position based on
2710     // the discarded samples here
2711     // but wait, the code below is going to add this in itself even
2712     // on a discard, so we don't need to do it here...
2713   }
2714 
2715   // check if we have ogg information about the sample # for this packet
2716   if (f.last_seg_which == f.end_seg_with_known_loc) {
2717     // if we have a valid current loc, and this is final:
2718     if (f.current_loc_valid && (f.page_flag&PAGEFLAG_last_page)) {
2719       uint current_end = f.known_loc_for_packet-(n-right_end);
2720       // then let's infer the size of the (probably) short final frame
2721       if (current_end < f.current_loc+right_end) {
2722         if (current_end < f.current_loc+(right_end-left_start)) {
2723           // negative truncation, that's impossible!
2724           *len = 0;
2725         } else {
2726           *len = current_end-f.current_loc;
2727         }
2728         *len += left_start;
2729         if (*len > right_end) *len = right_end; // this should never happen
2730         f.current_loc += *len;
2731         return true;
2732       }
2733     }
2734     // otherwise, just set our sample loc
2735     // guess that the ogg granule pos refers to the _middle_ of the
2736     // last frame?
2737     // set f.current_loc to the position of left_start
2738     f.current_loc = f.known_loc_for_packet-(n2-left_start);
2739     f.current_loc_valid = true;
2740   }
2741   if (f.current_loc_valid) f.current_loc += (right_start-left_start);
2742 
2743   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2744 
2745   *len = right_end;  // ignore samples after the window goes to 0
2746   return true;
2747 }
2748 
2749 private int vorbis_decode_packet (VorbisDecoder f, int* len, int* p_left, int* p_right) {
2750   int mode, left_end, right_end;
2751   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2752   return vorbis_decode_packet_rest(f, len, f.mode_config.ptr+mode, *p_left, left_end, *p_right, right_end, p_left);
2753 }
2754 
2755 private int vorbis_finish_frame (VorbisDecoder f, int len, int left, int right) {
2756   // we use right&left (the start of the right- and left-window sin()-regions)
2757   // to determine how much to return, rather than inferring from the rules
2758   // (same result, clearer code); 'left' indicates where our sin() window
2759   // starts, therefore where the previous window's right edge starts, and
2760   // therefore where to start mixing from the previous buffer. 'right'
2761   // indicates where our sin() ending-window starts, therefore that's where
2762   // we start saving, and where our returned-data ends.
2763 
2764   // mixin from previous window
2765   if (f.previous_length) {
2766     int n = f.previous_length;
2767     float *w = get_window(f, n);
2768     foreach (immutable i; 0..f.vrchannels) {
2769       foreach (immutable j; 0..n) {
2770         (f.channel_buffers.ptr[i])[left+j] =
2771           (f.channel_buffers.ptr[i])[left+j]*w[    j]+
2772           (f.previous_window.ptr[i])[     j]*w[n-1-j];
2773       }
2774     }
2775   }
2776 
2777   auto prev = f.previous_length;
2778 
2779   // last half of this data becomes previous window
2780   f.previous_length = len-right;
2781 
2782   // @OPTIMIZE: could avoid this copy by double-buffering the
2783   // output (flipping previous_window with channel_buffers), but
2784   // then previous_window would have to be 2x as large, and
2785   // channel_buffers couldn't be temp mem (although they're NOT
2786   // currently temp mem, they could be (unless we want to level
2787   // performance by spreading out the computation))
2788   foreach (immutable i; 0..f.vrchannels) {
2789     for (uint j = 0; right+j < len; ++j) (f.previous_window.ptr[i])[j] = (f.channel_buffers.ptr[i])[right+j];
2790   }
2791 
2792   if (!prev) {
2793     // there was no previous packet, so this data isn't valid...
2794     // this isn't entirely true, only the would-have-overlapped data
2795     // isn't valid, but this seems to be what the spec requires
2796     return 0;
2797   }
2798 
2799   // truncate a short frame
2800   if (len < right) right = len;
2801 
2802   f.samples_output += right-left;
2803 
2804   return right-left;
2805 }
2806 
2807 private bool vorbis_pump_first_frame (VorbisDecoder f) {
2808   int len, right, left;
2809   if (vorbis_decode_packet(f, &len, &left, &right)) {
2810     vorbis_finish_frame(f, len, left, right);
2811     return true;
2812   }
2813   return false;
2814 }
2815 
2816 /+ k8: i don't need that, so it's dead
2817 private int is_whole_packet_present (VorbisDecoder f, int end_page) {
2818   import core.stdc.string : memcmp;
2819 
2820   // make sure that we have the packet available before continuing...
2821   // this requires a full ogg parse, but we know we can fetch from f.stream
2822 
2823   // instead of coding this out explicitly, we could save the current read state,
2824   // read the next packet with get8() until end-of-packet, check f.eof, then
2825   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
2826   // of state to restore (primarily the page segment table)
2827 
2828   int s = f.next_seg, first = true;
2829   ubyte *p = f.stream;
2830 
2831   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
2832     for (; s < f.segment_count; ++s) {
2833       p += f.segments[s];
2834       if (f.segments[s] < 255) break; // stop at first short segment
2835     }
2836     // either this continues, or it ends it...
2837     if (end_page && s < f.segment_count-1) return error(f, STBVorbisError.invalid_stream);
2838     if (s == f.segment_count) s = -1; // set 'crosses page' flag
2839     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2840     first = false;
2841   }
2842   while (s == -1) {
2843     ubyte* q = void;
2844     int n = void;
2845     // check that we have the page header ready
2846     if (p+26 >= f.stream_end) return error(f, STBVorbisError.need_more_data);
2847     // validate the page
2848     if (memcmp(p, ogg_page_header.ptr, 4)) return error(f, STBVorbisError.invalid_stream);
2849     if (p[4] != 0) return error(f, STBVorbisError.invalid_stream);
2850     if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
2851       if (f.previous_length && (p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2852       // if no previous length, we're resynching, so we can come in on a continued-packet,
2853       // which we'll just drop
2854     } else {
2855       if (!(p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2856     }
2857     n = p[26]; // segment counts
2858     q = p+27; // q points to segment table
2859     p = q+n; // advance past header
2860     // make sure we've read the segment table
2861     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2862     for (s = 0; s < n; ++s) {
2863       p += q[s];
2864       if (q[s] < 255) break;
2865     }
2866     if (end_page && s < n-1) return error(f, STBVorbisError.invalid_stream);
2867     if (s == n) s = -1; // set 'crosses page' flag
2868     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2869     first = false;
2870   }
2871   return true;
2872 }
2873 +/
2874 
2875 private int start_decoder (VorbisDecoder f) {
2876   import core.stdc.string : memcpy, memset;
2877 
2878   ubyte[6] header;
2879   ubyte x, y;
2880   int len, max_submaps = 0;
2881   int longest_floorlist = 0;
2882 
2883   // first page, first packet
2884 
2885   if (!start_page(f)) return false;
2886   // validate page flag
2887   if (!(f.page_flag&PAGEFLAG_first_page)) return error(f, STBVorbisError.invalid_first_page);
2888   if (f.page_flag&PAGEFLAG_last_page) return error(f, STBVorbisError.invalid_first_page);
2889   if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.invalid_first_page);
2890   // check for expected packet length
2891   if (f.segment_count != 1) return error(f, STBVorbisError.invalid_first_page);
2892   if (f.segments[0] != 30) return error(f, STBVorbisError.invalid_first_page);
2893   // read packet
2894   // check packet header
2895   if (get8(f) != VorbisPacket.id) return error(f, STBVorbisError.invalid_first_page);
2896   if (!getn(f, header.ptr, 6)) return error(f, STBVorbisError.unexpected_eof);
2897   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_first_page);
2898   // vorbis_version
2899   if (get32(f) != 0) return error(f, STBVorbisError.invalid_first_page);
2900   f.vrchannels = get8(f); if (!f.vrchannels) return error(f, STBVorbisError.invalid_first_page);
2901   if (f.vrchannels > STB_VORBIS_MAX_CHANNELS) return error(f, STBVorbisError.too_many_channels);
2902   f.sample_rate = get32(f); if (!f.sample_rate) return error(f, STBVorbisError.invalid_first_page);
2903   get32(f); // bitrate_maximum
2904   get32(f); // bitrate_nominal
2905   get32(f); // bitrate_minimum
2906   x = get8(f);
2907   {
2908     int log0 = x&15;
2909     int log1 = x>>4;
2910     f.blocksize_0 = 1<<log0;
2911     f.blocksize_1 = 1<<log1;
2912     if (log0 < 6 || log0 > 13) return error(f, STBVorbisError.invalid_setup);
2913     if (log1 < 6 || log1 > 13) return error(f, STBVorbisError.invalid_setup);
2914     if (log0 > log1) return error(f, STBVorbisError.invalid_setup);
2915   }
2916 
2917   // framing_flag
2918   x = get8(f);
2919   if (!(x&1)) return error(f, STBVorbisError.invalid_first_page);
2920 
2921   // second packet! (comments)
2922   if (!start_page(f)) return false;
2923 
2924   // read comments
2925   if (!start_packet(f)) return false;
2926 
2927   if (f.read_comments) {
2928     /+if (f.push_mode) {
2929       if (!is_whole_packet_present(f, true)) {
2930         // convert error in ogg header to write type
2931         if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2932         return false;
2933       }
2934     }+/
2935     if (get8_packet(f) != VorbisPacket.comment) return error(f, STBVorbisError.invalid_setup);
2936     foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2937     if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2938 
2939     // skip vendor id
2940     uint vidsize = get32_packet(f);
2941     //{ import core.stdc.stdio; printf("vendor size: %u\n", vidsize); }
2942     if (vidsize == EOP) return error(f, STBVorbisError.invalid_setup);
2943     while (vidsize--) get8_packet(f);
2944 
2945     // read comments section
2946     uint cmtcount = get32_packet(f);
2947     if (cmtcount == EOP) return error(f, STBVorbisError.invalid_setup);
2948     if (cmtcount > 0) {
2949       uint cmtsize = 32768; // this should be enough for everyone
2950       f.comment_data = setup_malloc!ubyte(f, cmtsize);
2951       if (f.comment_data is null) return error(f, STBVorbisError.outofmem);
2952       auto cmtpos = 0;
2953       auto d = f.comment_data;
2954       while (cmtcount--) {
2955         uint linelen = get32_packet(f);
2956         //{ import core.stdc.stdio; printf("linelen: %u; lines left: %u\n", linelen, cmtcount); }
2957         if (linelen == EOP || linelen > ushort.max-2) break;
2958         if (linelen == 0) { continue; }
2959         if (cmtpos+2+linelen > cmtsize) break;
2960         cmtpos += linelen+2;
2961         *d++ = (linelen+2)&0xff;
2962         *d++ = ((linelen+2)>>8)&0xff;
2963         while (linelen--) {
2964           auto b = get8_packet(f);
2965           if (b == EOP) return error(f, STBVorbisError.outofmem);
2966           *d++ = cast(ubyte)b;
2967         }
2968         //{ import core.stdc.stdio; printf("%u bytes of comments read\n", cmtpos); }
2969         f.comment_size = cmtpos;
2970       }
2971     }
2972     flush_packet(f);
2973     f.comment_rewind();
2974   } else {
2975     // skip comments
2976     do {
2977       len = next_segment(f);
2978       skip(f, len);
2979       f.bytes_in_seg = 0;
2980     } while (len);
2981   }
2982 
2983   // third packet!
2984   if (!start_packet(f)) return false;
2985 
2986   /+if (f.push_mode) {
2987     if (!is_whole_packet_present(f, true)) {
2988       // convert error in ogg header to write type
2989       if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2990       return false;
2991     }
2992   }+/
2993 
2994   if (get8_packet(f) != VorbisPacket.setup) return error(f, STBVorbisError.invalid_setup);
2995   foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2996   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2997 
2998   // codebooks
2999   f.codebook_count = get_bits!8(f)+1;
3000   f.codebooks = setup_malloc!Codebook(f, f.codebook_count);
3001   static assert((*f.codebooks).sizeof == Codebook.sizeof);
3002   if (f.codebooks is null) return error(f, STBVorbisError.outofmem);
3003   memset(f.codebooks, 0, (*f.codebooks).sizeof*f.codebook_count);
3004   foreach (immutable i; 0..f.codebook_count) {
3005     uint* values;
3006     int ordered, sorted_count;
3007     int total = 0;
3008     ubyte* lengths;
3009     Codebook* c = f.codebooks+i;
3010     x = get_bits!8(f); if (x != 0x42) return error(f, STBVorbisError.invalid_setup);
3011     x = get_bits!8(f); if (x != 0x43) return error(f, STBVorbisError.invalid_setup);
3012     x = get_bits!8(f); if (x != 0x56) return error(f, STBVorbisError.invalid_setup);
3013     x = get_bits!8(f);
3014     c.dimensions = (get_bits!8(f)<<8)+x;
3015     x = get_bits!8(f);
3016     y = get_bits!8(f);
3017     c.entries = (get_bits!8(f)<<16)+(y<<8)+x;
3018     ordered = get_bits!1(f);
3019     c.sparse = (ordered ? 0 : get_bits!1(f));
3020 
3021     if (c.dimensions == 0 && c.entries != 0) return error(f, STBVorbisError.invalid_setup);
3022 
3023     if (c.sparse) {
3024       lengths = cast(ubyte*)setup_temp_malloc(f, c.entries);
3025     } else {
3026       lengths = c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3027     }
3028 
3029     if (lengths is null) return error(f, STBVorbisError.outofmem);
3030 
3031     if (ordered) {
3032       int current_entry = 0;
3033       int current_length = get_bits_add_no!5(f, 1);
3034       while (current_entry < c.entries) {
3035         int limit = c.entries-current_entry;
3036         int n = get_bits_main(f, ilog(limit));
3037         if (current_entry+n > cast(int)c.entries) return error(f, STBVorbisError.invalid_setup);
3038         memset(lengths+current_entry, current_length, n);
3039         current_entry += n;
3040         ++current_length;
3041       }
3042     } else {
3043       foreach (immutable j; 0..c.entries) {
3044         int present = (c.sparse ? get_bits!1(f) : 1);
3045         if (present) {
3046           lengths[j] = get_bits_add_no!5(f, 1);
3047           ++total;
3048           if (lengths[j] == 32) return error(f, STBVorbisError.invalid_setup);
3049         } else {
3050           lengths[j] = NO_CODE;
3051         }
3052       }
3053     }
3054 
3055     if (c.sparse && total >= c.entries>>2) {
3056       // convert sparse items to non-sparse!
3057       if (c.entries > cast(int)f.setup_temp_memory_required) f.setup_temp_memory_required = c.entries;
3058       c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3059       if (c.codeword_lengths is null) return error(f, STBVorbisError.outofmem);
3060       memcpy(c.codeword_lengths, lengths, c.entries);
3061       setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
3062       lengths = c.codeword_lengths;
3063       c.sparse = 0;
3064     }
3065 
3066     // compute the size of the sorted tables
3067     if (c.sparse) {
3068       sorted_count = total;
3069     } else {
3070       sorted_count = 0;
3071       version(STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH) {} else {
3072         foreach (immutable j; 0..c.entries) if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) ++sorted_count;
3073       }
3074     }
3075 
3076     c.sorted_entries = sorted_count;
3077     values = null;
3078 
3079     if (!c.sparse) {
3080       c.codewords = setup_malloc!uint(f, c.entries);
3081       if (!c.codewords) return error(f, STBVorbisError.outofmem);
3082     } else {
3083       if (c.sorted_entries) {
3084         c.codeword_lengths = setup_malloc!ubyte(f, c.sorted_entries);
3085         if (!c.codeword_lengths) return error(f, STBVorbisError.outofmem);
3086         c.codewords = cast(uint*)setup_temp_malloc(f, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3087         if (!c.codewords) return error(f, STBVorbisError.outofmem);
3088         values = cast(uint*)setup_temp_malloc(f, cast(int)(*values).sizeof*c.sorted_entries);
3089         if (!values) return error(f, STBVorbisError.outofmem);
3090       }
3091       uint size = c.entries+cast(int)((*c.codewords).sizeof+(*values).sizeof)*c.sorted_entries;
3092       if (size > f.setup_temp_memory_required) f.setup_temp_memory_required = size;
3093     }
3094 
3095     if (!compute_codewords(c, lengths, c.entries, values)) {
3096       if (c.sparse) setup_temp_free(f, values, 0);
3097       return error(f, STBVorbisError.invalid_setup);
3098     }
3099 
3100     if (c.sorted_entries) {
3101       // allocate an extra slot for sentinels
3102       c.sorted_codewords = setup_malloc!uint(f, c.sorted_entries+1);
3103       if (c.sorted_codewords is null) return error(f, STBVorbisError.outofmem);
3104       // allocate an extra slot at the front so that c.sorted_values[-1] is defined
3105       // so that we can catch that case without an extra if
3106       c.sorted_values = setup_malloc!int(f, c.sorted_entries+1);
3107       if (c.sorted_values is null) return error(f, STBVorbisError.outofmem);
3108       ++c.sorted_values;
3109       c.sorted_values[-1] = -1;
3110       compute_sorted_huffman(c, lengths, values);
3111     }
3112 
3113     if (c.sparse) {
3114       setup_temp_free(f, values, cast(int)(*values).sizeof*c.sorted_entries);
3115       setup_temp_free(f, c.codewords, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3116       setup_temp_free(f, lengths, c.entries);
3117       c.codewords = null;
3118     }
3119 
3120     compute_accelerated_huffman(c);
3121 
3122     c.lookup_type = get_bits!4(f);
3123     if (c.lookup_type > 2) return error(f, STBVorbisError.invalid_setup);
3124     if (c.lookup_type > 0) {
3125       ushort* mults;
3126       c.minimum_value = float32_unpack(get_bits!32(f));
3127       c.delta_value = float32_unpack(get_bits!32(f));
3128       c.value_bits = get_bits_add_no!4(f, 1);
3129       c.sequence_p = get_bits!1(f);
3130       if (c.lookup_type == 1) {
3131         c.lookup_values = lookup1_values(c.entries, c.dimensions);
3132       } else {
3133         c.lookup_values = c.entries*c.dimensions;
3134       }
3135       if (c.lookup_values == 0) return error(f, STBVorbisError.invalid_setup);
3136       mults = cast(ushort*)setup_temp_malloc(f, cast(int)(mults[0]).sizeof*c.lookup_values);
3137       if (mults is null) return error(f, STBVorbisError.outofmem);
3138       foreach (immutable j; 0..cast(int)c.lookup_values) {
3139         int q = get_bits_main(f, c.value_bits);
3140         if (q == EOP) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.invalid_setup); }
3141         mults[j] = cast(ushort)q; //k8
3142       }
3143 
3144       version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
3145         if (c.lookup_type == 1) {
3146           int sparse = c.sparse; //len
3147           float last = 0;
3148           // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3149           if (sparse) {
3150             if (c.sorted_entries == 0) goto skip;
3151             c.multiplicands = setup_malloc!codetype(f, c.sorted_entries*c.dimensions);
3152           } else {
3153             c.multiplicands = setup_malloc!codetype(f, c.entries*c.dimensions);
3154           }
3155           if (c.multiplicands is null) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3156           foreach (immutable j; 0..(sparse ? c.sorted_entries : c.entries)) {
3157             uint z = (sparse ? c.sorted_values[j] : j);
3158             uint div = 1;
3159             foreach (immutable k; 0..c.dimensions) {
3160               int off = (z/div)%c.lookup_values;
3161               float val = mults[off];
3162               val = val*c.delta_value+c.minimum_value+last;
3163               c.multiplicands[j*c.dimensions+k] = val;
3164               if (c.sequence_p) last = val;
3165               if (k+1 < c.dimensions) {
3166                  if (div > uint.max/cast(uint)c.lookup_values) {
3167                     setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3168                     return error(f, STBVorbisError.invalid_setup);
3169                  }
3170                  div *= c.lookup_values;
3171               }
3172             }
3173           }
3174           c.lookup_type = 2;
3175           goto skip;
3176         }
3177         //else
3178       }
3179       {
3180         float last = 0;
3181         c.multiplicands = setup_malloc!codetype(f, c.lookup_values);
3182         if (c.multiplicands is null) { setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3183         foreach (immutable j; 0..cast(int)c.lookup_values) {
3184           float val = mults[j]*c.delta_value+c.minimum_value+last;
3185           c.multiplicands[j] = val;
3186           if (c.sequence_p) last = val;
3187         }
3188       }
3189      //version(STB_VORBIS_DIVIDES_IN_CODEBOOK)
3190      skip: // this is versioned out in C
3191       setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3192     }
3193   }
3194 
3195   // time domain transfers (notused)
3196   x = get_bits_add_no!6(f, 1);
3197   foreach (immutable i; 0..x) {
3198     auto z = get_bits!16(f);
3199     if (z != 0) return error(f, STBVorbisError.invalid_setup);
3200   }
3201 
3202   // Floors
3203   f.floor_count = get_bits_add_no!6(f, 1);
3204   f.floor_config = setup_malloc!Floor(f, f.floor_count);
3205   if (f.floor_config is null) return error(f, STBVorbisError.outofmem);
3206   foreach (immutable i; 0..f.floor_count) {
3207     f.floor_types[i] = get_bits!16(f);
3208     if (f.floor_types[i] > 1) return error(f, STBVorbisError.invalid_setup);
3209     if (f.floor_types[i] == 0) {
3210       Floor0* g = &f.floor_config[i].floor0;
3211       g.order = get_bits!8(f);
3212       g.rate = get_bits!16(f);
3213       g.bark_map_size = get_bits!16(f);
3214       g.amplitude_bits = get_bits!6(f);
3215       g.amplitude_offset = get_bits!8(f);
3216       g.number_of_books = get_bits_add_no!4(f, 1);
3217       foreach (immutable j; 0..g.number_of_books) g.book_list[j] = get_bits!8(f);
3218       return error(f, STBVorbisError.feature_not_supported);
3219     } else {
3220       Point[31*8+2] p;
3221       Floor1 *g = &f.floor_config[i].floor1;
3222       int max_class = -1;
3223       g.partitions = get_bits!5(f);
3224       foreach (immutable j; 0..g.partitions) {
3225         g.partition_class_list[j] = get_bits!4(f);
3226         if (g.partition_class_list[j] > max_class) max_class = g.partition_class_list[j];
3227       }
3228       foreach (immutable j; 0..max_class+1) {
3229         g.class_dimensions[j] = get_bits_add_no!3(f, 1);
3230         g.class_subclasses[j] = get_bits!2(f);
3231         if (g.class_subclasses[j]) {
3232           g.class_masterbooks[j] = get_bits!8(f);
3233           if (g.class_masterbooks[j] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3234         }
3235         foreach (immutable k; 0..1<<g.class_subclasses[j]) {
3236           g.subclass_books[j].ptr[k] = get_bits!8(f)-1;
3237           if (g.subclass_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3238         }
3239       }
3240       g.floor1_multiplier = get_bits_add_no!2(f, 1);
3241       g.rangebits = get_bits!4(f);
3242       g.Xlist[0] = 0;
3243       g.Xlist[1] = cast(ushort)(1<<g.rangebits); //k8
3244       g.values = 2;
3245       foreach (immutable j; 0..g.partitions) {
3246         int c = g.partition_class_list[j];
3247         foreach (immutable k; 0..g.class_dimensions[c]) {
3248           g.Xlist[g.values] = cast(ushort)get_bits_main(f, g.rangebits); //k8
3249           ++g.values;
3250         }
3251       }
3252       assert(g.values <= ushort.max);
3253       // precompute the sorting
3254       foreach (ushort j; 0..cast(ushort)g.values) {
3255         p[j].x = g.Xlist[j];
3256         p[j].y = j;
3257       }
3258       qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
3259       foreach (uint j; 0..g.values) g.sorted_order.ptr[j] = cast(ubyte)p.ptr[j].y;
3260       // precompute the neighbors
3261       foreach (uint j; 2..g.values) {
3262         ushort low = void, hi = void;
3263         neighbors(g.Xlist.ptr, j, &low, &hi);
3264         assert(low <= ubyte.max);
3265         assert(hi <= ubyte.max);
3266         g.neighbors[j].ptr[0] = cast(ubyte)low;
3267         g.neighbors[j].ptr[1] = cast(ubyte)hi;
3268       }
3269       if (g.values > longest_floorlist) longest_floorlist = g.values;
3270     }
3271   }
3272 
3273   // Residue
3274   f.residue_count = get_bits_add_no!6(f, 1);
3275   f.residue_config = setup_malloc!Residue(f, f.residue_count);
3276   if (f.residue_config is null) return error(f, STBVorbisError.outofmem);
3277   memset(f.residue_config, 0, f.residue_count*(f.residue_config[0]).sizeof);
3278   foreach (immutable i; 0..f.residue_count) {
3279     ubyte[64] residue_cascade;
3280     Residue* r = f.residue_config+i;
3281     f.residue_types[i] = get_bits!16(f);
3282     if (f.residue_types[i] > 2) return error(f, STBVorbisError.invalid_setup);
3283     r.begin = get_bits!24(f);
3284     r.end = get_bits!24(f);
3285     if (r.end < r.begin) return error(f, STBVorbisError.invalid_setup);
3286     r.part_size = get_bits_add_no!24(f, 1);
3287     r.classifications = get_bits_add_no!6(f, 1);
3288     r.classbook = get_bits!8(f);
3289     if (r.classbook >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3290     foreach (immutable j; 0..r.classifications) {
3291       ubyte high_bits = 0;
3292       ubyte low_bits = get_bits!3(f);
3293       if (get_bits!1(f)) high_bits = get_bits!5(f);
3294       assert(high_bits*8+low_bits <= ubyte.max);
3295       residue_cascade[j] = cast(ubyte)(high_bits*8+low_bits);
3296     }
3297     static assert(r.residue_books[0].sizeof == 16);
3298     r.residue_books = setup_malloc!(short[8])(f, r.classifications);
3299     if (r.residue_books is null) return error(f, STBVorbisError.outofmem);
3300     foreach (immutable j; 0..r.classifications) {
3301       foreach (immutable k; 0..8) {
3302         if (residue_cascade[j]&(1<<k)) {
3303           r.residue_books[j].ptr[k] = get_bits!8(f);
3304           if (r.residue_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3305         } else {
3306           r.residue_books[j].ptr[k] = -1;
3307         }
3308       }
3309     }
3310     // precompute the classifications[] array to avoid inner-loop mod/divide
3311     // call it 'classdata' since we already have r.classifications
3312     r.classdata = setup_malloc!(ubyte*)(f, f.codebooks[r.classbook].entries);
3313     if (!r.classdata) return error(f, STBVorbisError.outofmem);
3314     memset(r.classdata, 0, (*r.classdata).sizeof*f.codebooks[r.classbook].entries);
3315     foreach (immutable j; 0..f.codebooks[r.classbook].entries) {
3316       int classwords = f.codebooks[r.classbook].dimensions;
3317       int temp = j;
3318       r.classdata[j] = setup_malloc!ubyte(f, classwords);
3319       if (r.classdata[j] is null) return error(f, STBVorbisError.outofmem);
3320       foreach_reverse (immutable k; 0..classwords) {
3321         assert(temp%r.classifications >= 0 && temp%r.classifications <= ubyte.max);
3322         r.classdata[j][k] = cast(ubyte)(temp%r.classifications);
3323         temp /= r.classifications;
3324       }
3325     }
3326   }
3327 
3328   f.mapping_count = get_bits_add_no!6(f, 1);
3329   f.mapping = setup_malloc!Mapping(f, f.mapping_count);
3330   if (f.mapping is null) return error(f, STBVorbisError.outofmem);
3331   memset(f.mapping, 0, f.mapping_count*(*f.mapping).sizeof);
3332   foreach (immutable i; 0..f.mapping_count) {
3333     Mapping* m = f.mapping+i;
3334     int mapping_type = get_bits!16(f);
3335     if (mapping_type != 0) return error(f, STBVorbisError.invalid_setup);
3336     m.chan = setup_malloc!MappingChannel(f, f.vrchannels);
3337     if (m.chan is null) return error(f, STBVorbisError.outofmem);
3338     m.submaps = (get_bits!1(f) ? get_bits_add_no!4(f, 1) : 1);
3339     if (m.submaps > max_submaps) max_submaps = m.submaps;
3340     if (get_bits!1(f)) {
3341       m.coupling_steps = get_bits_add_no!8(f, 1);
3342       foreach (immutable k; 0..m.coupling_steps) {
3343         m.chan[k].magnitude = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3344         m.chan[k].angle = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3345         if (m.chan[k].magnitude >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3346         if (m.chan[k].angle     >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3347         if (m.chan[k].magnitude == m.chan[k].angle) return error(f, STBVorbisError.invalid_setup);
3348       }
3349     } else {
3350       m.coupling_steps = 0;
3351     }
3352 
3353     // reserved field
3354     if (get_bits!2(f)) return error(f, STBVorbisError.invalid_setup);
3355     if (m.submaps > 1) {
3356       foreach (immutable j; 0..f.vrchannels) {
3357         m.chan[j].mux = get_bits!4(f);
3358         if (m.chan[j].mux >= m.submaps) return error(f, STBVorbisError.invalid_setup);
3359       }
3360     } else {
3361       // @SPECIFICATION: this case is missing from the spec
3362       foreach (immutable j; 0..f.vrchannels) m.chan[j].mux = 0;
3363     }
3364     foreach (immutable j; 0..m.submaps) {
3365       get_bits!8(f); // discard
3366       m.submap_floor[j] = get_bits!8(f);
3367       m.submap_residue[j] = get_bits!8(f);
3368       if (m.submap_floor[j] >= f.floor_count) return error(f, STBVorbisError.invalid_setup);
3369       if (m.submap_residue[j] >= f.residue_count) return error(f, STBVorbisError.invalid_setup);
3370     }
3371   }
3372 
3373   // Modes
3374   f.mode_count = get_bits_add_no!6(f, 1);
3375   foreach (immutable i; 0..f.mode_count) {
3376     Mode* m = f.mode_config.ptr+i;
3377     m.blockflag = get_bits!1(f);
3378     m.windowtype = get_bits!16(f);
3379     m.transformtype = get_bits!16(f);
3380     m.mapping = get_bits!8(f);
3381     if (m.windowtype != 0) return error(f, STBVorbisError.invalid_setup);
3382     if (m.transformtype != 0) return error(f, STBVorbisError.invalid_setup);
3383     if (m.mapping >= f.mapping_count) return error(f, STBVorbisError.invalid_setup);
3384   }
3385 
3386   flush_packet(f);
3387 
3388   f.previous_length = 0;
3389 
3390   foreach (immutable i; 0..f.vrchannels) {
3391     f.channel_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1);
3392     f.previous_window.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3393     f.finalY.ptr[i]          = setup_malloc!short(f, longest_floorlist);
3394     if (f.channel_buffers.ptr[i] is null || f.previous_window.ptr[i] is null || f.finalY.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3395     version(STB_VORBIS_NO_DEFER_FLOOR) {
3396       f.floor_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3397       if (f.floor_buffers.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3398     }
3399   }
3400 
3401   if (!init_blocksize(f, 0, f.blocksize_0)) return false;
3402   if (!init_blocksize(f, 1, f.blocksize_1)) return false;
3403   f.blocksize.ptr[0] = f.blocksize_0;
3404   f.blocksize.ptr[1] = f.blocksize_1;
3405 
3406   version(STB_VORBIS_DIVIDE_TABLE) {
3407     if (integer_divide_table[1].ptr[1] == 0) {
3408       foreach (immutable i; 0..DIVTAB_NUMER) foreach (immutable j; 1..DIVTAB_DENOM) integer_divide_table[i].ptr[j] = i/j;
3409     }
3410   }
3411 
3412   // compute how much temporary memory is needed
3413 
3414   // 1.
3415   {
3416     uint imdct_mem = (f.blocksize_1*cast(uint)(float).sizeof>>1);
3417     uint classify_mem;
3418     int max_part_read = 0;
3419     foreach (immutable i; 0..f.residue_count) {
3420       Residue* r = f.residue_config+i;
3421       int n_read = r.end-r.begin;
3422       int part_read = n_read/r.part_size;
3423       if (part_read > max_part_read) max_part_read = part_read;
3424     }
3425     version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
3426       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(int*).sizeof);
3427     } else {
3428       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(ubyte*).sizeof);
3429     }
3430     f.temp_memory_required = classify_mem;
3431     if (imdct_mem > f.temp_memory_required) f.temp_memory_required = imdct_mem;
3432   }
3433 
3434   f.first_decode = true;
3435 
3436   /+
3437   if (f.alloc.alloc_buffer) {
3438     debug(stb_vorbis) assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3439     // check if there's enough temp memory so we don't error later
3440     if (f.setup_offset+ /*(*f).sizeof+*/ f.temp_memory_required > cast(uint)f.temp_offset) return error(f, STBVorbisError.outofmem);
3441   }
3442   +/
3443 
3444   f.first_audio_page_offset = f.fileOffset();
3445 
3446   return true;
3447 }
3448 
3449 /+
3450 private int vorbis_search_for_page_pushdata (VorbisDecoder f, ubyte* data, int data_len) {
3451   import core.stdc.string : memcmp;
3452 
3453   foreach (immutable i; 0..f.page_crc_tests) f.scan.ptr[i].bytes_done = 0;
3454 
3455   // if we have room for more scans, search for them first, because
3456   // they may cause us to stop early if their header is incomplete
3457   if (f.page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
3458     if (data_len < 4) return 0;
3459     data_len -= 3; // need to look for 4-byte sequence, so don't miss one that straddles a boundary
3460     foreach (immutable i; 0..data_len) {
3461       if (data[i] == 0x4f) {
3462         if (memcmp(data+i, ogg_page_header.ptr, 4) == 0) {
3463           // make sure we have the whole page header
3464           if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
3465             // only read up to this page start, so hopefully we'll
3466             // have the whole page header start next time
3467             data_len = i;
3468             break;
3469           }
3470           // ok, we have it all; compute the length of the page
3471           auto len = 27+data[i+26];
3472           foreach (immutable j; 0..data[i+26]) len += data[i+27+j];
3473           // scan everything up to the embedded crc (which we must 0)
3474           uint crc = 0;
3475           foreach (immutable j; 0..22) crc = crc32_update(crc, data[i+j]);
3476           // now process 4 0-bytes
3477           foreach (immutable j; 22..26) crc = crc32_update(crc, 0);
3478           // len is the total number of bytes we need to scan
3479           auto n = f.page_crc_tests++;
3480           f.scan.ptr[n].bytes_left = len-/*j*/26;
3481           f.scan.ptr[n].crc_so_far = crc;
3482           f.scan.ptr[n].goal_crc = data[i+22]+(data[i+23]<<8)+(data[i+24]<<16)+(data[i+25]<<24);
3483           // if the last frame on a page is continued to the next, then
3484           // we can't recover the sample_loc immediately
3485           if (data[i+27+data[i+26]-1] == 255) {
3486             f.scan.ptr[n].sample_loc = ~0;
3487           } else {
3488             f.scan.ptr[n].sample_loc = data[i+6]+(data[i+7]<<8)+(data[i+8]<<16)+(data[i+9]<<24);
3489           }
3490           f.scan.ptr[n].bytes_done = i+26/*j*/;
3491           if (f.page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) break;
3492           // keep going if we still have room for more
3493         }
3494       }
3495     }
3496   }
3497 
3498   for (uint i = 0; i < f.page_crc_tests; ) {
3499     int nn = f.scan.ptr[i].bytes_done;
3500     int m = f.scan.ptr[i].bytes_left;
3501     if (m > data_len-nn) m = data_len-nn;
3502     // m is the bytes to scan in the current chunk
3503     uint crc = f.scan.ptr[i].crc_so_far;
3504     foreach (immutable j; 0..m) crc = crc32_update(crc, data[nn+j]);
3505     f.scan.ptr[i].bytes_left -= m;
3506     f.scan.ptr[i].crc_so_far = crc;
3507     if (f.scan.ptr[i].bytes_left == 0) {
3508       // does it match?
3509       if (f.scan.ptr[i].crc_so_far == f.scan.ptr[i].goal_crc) {
3510         // Houston, we have page
3511         data_len = nn+m; // consumption amount is wherever that scan ended
3512         f.page_crc_tests = -1; // drop out of page scan mode
3513         f.previous_length = 0; // decode-but-don't-output one frame
3514         f.next_seg = -1;       // start a new page
3515         f.current_loc = f.scan.ptr[i].sample_loc; // set the current sample location to the amount we'd have decoded had we decoded this page
3516         f.current_loc_valid = f.current_loc != ~0U;
3517         return data_len;
3518       }
3519       // delete entry
3520       f.scan.ptr[i] = f.scan.ptr[--f.page_crc_tests];
3521     } else {
3522       ++i;
3523     }
3524   }
3525 
3526   return data_len;
3527 }
3528 +/
3529 
3530 private uint vorbis_find_page (VorbisDecoder f, uint* end, uint* last) {
3531   for (;;) {
3532     if (f.eof) return 0;
3533     auto n = get8(f);
3534     if (n == 0x4f) { // page header candidate
3535       uint retry_loc = f.fileOffset;
3536       // check if we're off the end of a file_section stream
3537       if (retry_loc-25 > f.stream_len) return 0;
3538       // check the rest of the header
3539       int i = void;
3540       for (i = 1; i < 4; ++i) if (get8(f) != ogg_page_header[i]) break;
3541       if (f.eof) return 0;
3542       if (i == 4) {
3543         ubyte[27] header;
3544         //for (i=0; i < 4; ++i) header[i] = ogg_page_header[i];
3545         header[0..4] = cast(immutable(ubyte)[])ogg_page_header[0..4];
3546         for (i = 4; i < 27; ++i) header[i] = get8(f);
3547         if (f.eof) return 0;
3548         if (header[4] != 0) goto invalid;
3549         uint goal = header[22]+(header[23]<<8)+(header[24]<<16)+(header[25]<<24);
3550         for (i = 22; i < 26; ++i) header[i] = 0;
3551         uint crc = 0;
3552         for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]);
3553         uint len = 0;
3554         for (i = 0; i < header[26]; ++i) {
3555           auto s = get8(f);
3556           crc = crc32_update(crc, s);
3557           len += s;
3558         }
3559         if (len && f.eof) return 0;
3560         for (i = 0; i < len; ++i) crc = crc32_update(crc, get8(f));
3561         // finished parsing probable page
3562         if (crc == goal) {
3563           // we could now check that it's either got the last
3564           // page flag set, OR it's followed by the capture
3565           // pattern, but I guess TECHNICALLY you could have
3566           // a file with garbage between each ogg page and recover
3567           // from it automatically? So even though that paranoia
3568           // might decrease the chance of an invalid decode by
3569           // another 2^32, not worth it since it would hose those
3570           // invalid-but-useful files?
3571           if (end) *end = f.fileOffset;
3572           if (last) *last = (header[5]&0x04 ? 1 : 0);
3573           set_file_offset(f, retry_loc-1);
3574           return 1;
3575         }
3576       }
3577      invalid:
3578       // not a valid page, so rewind and look for next one
3579       set_file_offset(f, retry_loc);
3580     }
3581   }
3582   assert(0);
3583 }
3584 
3585 enum SAMPLE_unknown = 0xffffffff;
3586 
3587 // seeking is implemented with a binary search, which narrows down the range to
3588 // 64K, before using a linear search (because finding the synchronization
3589 // pattern can be expensive, and the chance we'd find the end page again is
3590 // relatively high for small ranges)
3591 //
3592 // two initial interpolation-style probes are used at the start of the search
3593 // to try to bound either side of the binary search sensibly, while still
3594 // working in O(log n) time if they fail.
3595 private int get_seek_page_info (VorbisDecoder f, ProbedPage* z) {
3596   ubyte[27] header;
3597   ubyte[255] lacing;
3598 
3599   // record where the page starts
3600   z.page_start = f.fileOffset;
3601 
3602   // parse the header
3603   getn(f, header.ptr, 27);
3604   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') return 0;
3605   getn(f, lacing.ptr, header[26]);
3606 
3607   // determine the length of the payload
3608   uint len = 0;
3609   foreach (immutable i; 0..header[26]) len += lacing[i];
3610 
3611   // this implies where the page ends
3612   z.page_end = z.page_start+27+header[26]+len;
3613 
3614   // read the last-decoded sample out of the data
3615   z.last_decoded_sample = header[6]+(header[7]<<8)+(header[8]<<16)+(header[9]<<24);
3616 
3617   // restore file state to where we were
3618   set_file_offset(f, z.page_start);
3619   return 1;
3620 }
3621 
3622 // rarely used function to seek back to the preceeding page while finding the start of a packet
3623 private int go_to_page_before (VorbisDecoder f, uint limit_offset) {
3624   uint previous_safe, end;
3625 
3626   // now we want to seek back 64K from the limit
3627   if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) {
3628     previous_safe = limit_offset-65536;
3629   } else {
3630     previous_safe = f.first_audio_page_offset;
3631   }
3632 
3633   set_file_offset(f, previous_safe);
3634 
3635   while (vorbis_find_page(f, &end, null)) {
3636     if (end >= limit_offset && f.fileOffset < limit_offset) return 1;
3637     set_file_offset(f, end);
3638   }
3639 
3640   return 0;
3641 }
3642 
3643 // implements the search logic for finding a page and starting decoding. if
3644 // the function succeeds, current_loc_valid will be true and current_loc will
3645 // be less than or equal to the provided sample number (the closer the
3646 // better).
3647 private int seek_to_sample_coarse (VorbisDecoder f, uint sample_number) {
3648   ProbedPage left, right, mid;
3649   int i, start_seg_with_known_loc, end_pos, page_start;
3650   uint delta, stream_length, padding;
3651   double offset, bytes_per_sample;
3652   int probe = 0;
3653 
3654   // find the last page and validate the target sample
3655   stream_length = f.streamLengthInSamples;
3656   if (stream_length == 0) return error(f, STBVorbisError.seek_without_length);
3657   if (sample_number > stream_length) return error(f, STBVorbisError.seek_invalid);
3658 
3659   // this is the maximum difference between the window-center (which is the
3660   // actual granule position value), and the right-start (which the spec
3661   // indicates should be the granule position (give or take one)).
3662   padding = ((f.blocksize_1-f.blocksize_0)>>2);
3663   if (sample_number < padding) sample_number = 0; else sample_number -= padding;
3664 
3665   left = f.p_first;
3666   while (left.last_decoded_sample == ~0U) {
3667     // (untested) the first page does not have a 'last_decoded_sample'
3668     set_file_offset(f, left.page_end);
3669     if (!get_seek_page_info(f, &left)) goto error;
3670   }
3671 
3672   right = f.p_last;
3673   debug(stb_vorbis) assert(right.last_decoded_sample != ~0U);
3674 
3675   // starting from the start is handled differently
3676   if (sample_number <= left.last_decoded_sample) {
3677     f.seekStart;
3678     return 1;
3679   }
3680 
3681   while (left.page_end != right.page_start) {
3682     debug(stb_vorbis) assert(left.page_end < right.page_start);
3683     // search range in bytes
3684     delta = right.page_start-left.page_end;
3685     if (delta <= 65536) {
3686       // there's only 64K left to search - handle it linearly
3687       set_file_offset(f, left.page_end);
3688     } else {
3689       if (probe < 2) {
3690         if (probe == 0) {
3691           // first probe (interpolate)
3692           double data_bytes = right.page_end-left.page_start;
3693           bytes_per_sample = data_bytes/right.last_decoded_sample;
3694           offset = left.page_start+bytes_per_sample*(sample_number-left.last_decoded_sample);
3695         } else {
3696           // second probe (try to bound the other side)
3697           double error = (cast(double)sample_number-mid.last_decoded_sample)*bytes_per_sample;
3698           if (error >= 0 && error <  8000) error =  8000;
3699           if (error <  0 && error > -8000) error = -8000;
3700           offset += error*2;
3701         }
3702 
3703         // ensure the offset is valid
3704         if (offset < left.page_end) offset = left.page_end;
3705         if (offset > right.page_start-65536) offset = right.page_start-65536;
3706 
3707         set_file_offset(f, cast(uint)offset);
3708       } else {
3709         // binary search for large ranges (offset by 32K to ensure
3710         // we don't hit the right page)
3711         set_file_offset(f, left.page_end+(delta/2)-32768);
3712       }
3713 
3714       if (!vorbis_find_page(f, null, null)) goto error;
3715     }
3716 
3717     for (;;) {
3718       if (!get_seek_page_info(f, &mid)) goto error;
3719       if (mid.last_decoded_sample != ~0U) break;
3720       // (untested) no frames end on this page
3721       set_file_offset(f, mid.page_end);
3722       debug(stb_vorbis) assert(mid.page_start < right.page_start);
3723     }
3724 
3725     // if we've just found the last page again then we're in a tricky file,
3726     // and we're close enough.
3727     if (mid.page_start == right.page_start) break;
3728 
3729     if (sample_number < mid.last_decoded_sample) right = mid; else left = mid;
3730 
3731     ++probe;
3732   }
3733 
3734   // seek back to start of the last packet
3735   page_start = left.page_start;
3736   set_file_offset(f, page_start);
3737   if (!start_page(f)) return error(f, STBVorbisError.seek_failed);
3738   end_pos = f.end_seg_with_known_loc;
3739   debug(stb_vorbis) assert(end_pos >= 0);
3740 
3741   for (;;) {
3742     for (i = end_pos; i > 0; --i) if (f.segments.ptr[i-1] != 255) break;
3743     start_seg_with_known_loc = i;
3744     if (start_seg_with_known_loc > 0 || !(f.page_flag&PAGEFLAG_continued_packet)) break;
3745     // (untested) the final packet begins on an earlier page
3746     if (!go_to_page_before(f, page_start)) goto error;
3747     page_start = f.fileOffset;
3748     if (!start_page(f)) goto error;
3749     end_pos = f.segment_count-1;
3750   }
3751 
3752   // prepare to start decoding
3753   f.current_loc_valid = false;
3754   f.last_seg = false;
3755   f.valid_bits = 0;
3756   f.packet_bytes = 0;
3757   f.bytes_in_seg = 0;
3758   f.previous_length = 0;
3759   f.next_seg = start_seg_with_known_loc;
3760 
3761   for (i = 0; i < start_seg_with_known_loc; ++i) skip(f, f.segments.ptr[i]);
3762 
3763   // start decoding (optimizable - this frame is generally discarded)
3764   if (!vorbis_pump_first_frame(f)) return 0;
3765   if (f.current_loc > sample_number) return error(f, STBVorbisError.seek_failed);
3766   return 1;
3767 
3768 error:
3769   // try to restore the file to a valid state
3770   f.seekStart;
3771   return error(f, STBVorbisError.seek_failed);
3772 }
3773 
3774 // the same as vorbis_decode_initial, but without advancing
3775 private int peek_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
3776   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) return 0;
3777 
3778   // either 1 or 2 bytes were read, figure out which so we can rewind
3779   int bits_read = 1+ilog(f.mode_count-1);
3780   if (f.mode_config.ptr[*mode].blockflag) bits_read += 2;
3781   int bytes_read = (bits_read+7)/8;
3782 
3783   f.bytes_in_seg += bytes_read;
3784   f.packet_bytes -= bytes_read;
3785   skip(f, -bytes_read);
3786   if (f.next_seg == -1) f.next_seg = f.segment_count-1; else --f.next_seg;
3787   f.valid_bits = 0;
3788 
3789   return 1;
3790 }
3791 
3792 // ////////////////////////////////////////////////////////////////////////// //
3793 // utility and supporting functions for getting s16 samples
3794 enum PLAYBACK_MONO  = (1<<0);
3795 enum PLAYBACK_LEFT  = (1<<1);
3796 enum PLAYBACK_RIGHT = (1<<2);
3797 
3798 enum L = (PLAYBACK_LEFT |PLAYBACK_MONO);
3799 enum C = (PLAYBACK_LEFT |PLAYBACK_RIGHT|PLAYBACK_MONO);
3800 enum R = (PLAYBACK_RIGHT|PLAYBACK_MONO);
3801 
3802 immutable byte[6][7] channel_position = [
3803   [ 0 ],
3804   [ C ],
3805   [ L, R ],
3806   [ L, C, R ],
3807   [ L, R, L, R ],
3808   [ L, C, R, L, R ],
3809   [ L, C, R, L, R, C ],
3810 ];
3811 
3812 
3813 version(STB_VORBIS_NO_FAST_SCALED_FLOAT) {
3814   enum declfcvar(string name) = "{}";
3815   template FAST_SCALED_FLOAT_TO_INT(string x, string s) {
3816     static assert(s == "15");
3817     enum FAST_SCALED_FLOAT_TO_INT = q{import core.stdc.math : lrintf; int v = lrintf((${x})*32768.0f);}.cmacroFixVars!"x"(x);
3818   }
3819 } else {
3820   //k8: actually, this is only marginally faster than using `lrintf()`, but anyway...
3821   align(1) union float_conv {
3822   align(1):
3823     float f;
3824     int i;
3825   }
3826   enum declfcvar(string name) = "float_conv "~name~" = void;";
3827   static assert(float_conv.i.sizeof == 4 && float_conv.f.sizeof == 4);
3828   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
3829   //#define check_endianness()
3830   enum MAGIC(string SHIFT) = q{(1.5f*(1<<(23-${SHIFT}))+0.5f/(1<<${SHIFT}))}.cmacroFixVars!("SHIFT")(SHIFT);
3831   enum ADDEND(string SHIFT) = q{(((150-${SHIFT})<<23)+(1<<22))}.cmacroFixVars!("SHIFT")(SHIFT);
3832   enum FAST_SCALED_FLOAT_TO_INT(string x, string s) = q{temp.f = (${x})+${MAGIC}; int v = temp.i-${ADDEND};}
3833     .cmacroFixVars!("x", "s", "MAGIC", "ADDEND")(x, s, MAGIC!(s), ADDEND!(s));
3834 }
3835 
3836 private void copy_samples (short* dest, float* src, int len) {
3837   //check_endianness();
3838   mixin(declfcvar!"temp");
3839   foreach (immutable _; 0..len) {
3840     mixin(FAST_SCALED_FLOAT_TO_INT!("*src", "15"));
3841     if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3842     *dest++ = cast(short)v; //k8
3843     ++src;
3844   }
3845 }
3846 
3847 private void compute_samples (int mask, short* output, int num_c, float** data, int d_offset, int len) {
3848   import core.stdc.string : memset;
3849   enum BUFFER_SIZE = 32;
3850   float[BUFFER_SIZE] buffer;
3851   int n = BUFFER_SIZE;
3852   //check_endianness();
3853   mixin(declfcvar!"temp");
3854   for (uint o = 0; o < len; o += BUFFER_SIZE) {
3855     memset(buffer.ptr, 0, (buffer).sizeof);
3856     if (o+n > len) n = len-o;
3857     foreach (immutable j; 0..num_c) {
3858       if (channel_position[num_c].ptr[j]&mask) foreach (immutable i; 0..n) buffer.ptr[i] += data[j][d_offset+o+i];
3859     }
3860     foreach (immutable i; 0..n) {
3861       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3862       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3863       output[o+i] = cast(short)v; //k8
3864     }
3865   }
3866 }
3867 
3868 private void compute_stereo_samples (short* output, int num_c, float** data, int d_offset, int len) {
3869   import core.stdc.string : memset;
3870 
3871   enum BUFFER_SIZE = 32;
3872   float[BUFFER_SIZE] buffer;
3873   int n = BUFFER_SIZE>>1;
3874   // o is the offset in the source data
3875   //check_endianness();
3876   mixin(declfcvar!"temp");
3877   for (uint o = 0; o < len; o += BUFFER_SIZE>>1) {
3878     // o2 is the offset in the output data
3879     int o2 = o<<1;
3880     memset(buffer.ptr, 0, buffer.sizeof);
3881     if (o+n > len) n = len-o;
3882     foreach (immutable j; 0..num_c) {
3883       int m = channel_position[num_c].ptr[j]&(PLAYBACK_LEFT|PLAYBACK_RIGHT);
3884       if (m == (PLAYBACK_LEFT|PLAYBACK_RIGHT)) {
3885         foreach (immutable i; 0..n) {
3886           buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3887           buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3888         }
3889       } else if (m == PLAYBACK_LEFT) {
3890         foreach (immutable i; 0..n) buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3891       } else if (m == PLAYBACK_RIGHT) {
3892         foreach (immutable i; 0..n) buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3893       }
3894     }
3895     foreach (immutable i; 0..n<<1) {
3896       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3897       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3898       output[o2+i] = cast(short)v; //k8
3899     }
3900   }
3901 }
3902 
3903 private void convert_samples_short (int buf_c, short** buffer, int b_offset, int data_c, float** data, int d_offset, int samples) {
3904   import core.stdc.string : memset;
3905 
3906   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3907     immutable int[2][3] channel_selector = [ [0,0], [PLAYBACK_MONO,0], [PLAYBACK_LEFT, PLAYBACK_RIGHT] ];
3908     foreach (immutable i; 0..buf_c) compute_samples(channel_selector[buf_c].ptr[i], buffer[i]+b_offset, data_c, data, d_offset, samples);
3909   } else {
3910     int limit = (buf_c < data_c ? buf_c : data_c);
3911     foreach (immutable i; 0..limit) copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
3912     foreach (immutable i; limit..buf_c) memset(buffer[i]+b_offset, 0, short.sizeof*samples);
3913   }
3914 }
3915 
3916 private void convert_channels_short_interleaved (int buf_c, short* buffer, int data_c, float** data, int d_offset, int len) {
3917   //check_endianness();
3918   mixin(declfcvar!"temp");
3919   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3920     debug(stb_vorbis) assert(buf_c == 2);
3921     foreach (immutable i; 0..buf_c) compute_stereo_samples(buffer, data_c, data, d_offset, len);
3922   } else {
3923     int limit = (buf_c < data_c ? buf_c : data_c);
3924     foreach (immutable j; 0..len) {
3925       foreach (immutable i; 0..limit) {
3926         float f = data[i][d_offset+j];
3927         mixin(FAST_SCALED_FLOAT_TO_INT!("f", "15"));//data[i][d_offset+j], 15);
3928         if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3929         *buffer++ = cast(short)v; //k8
3930       }
3931       foreach (immutable i; limit..buf_c) *buffer++ = 0;
3932     }
3933   }
3934 }
3935 } // @nogc
3936 
3937 
3938 public class VorbisDecoder {
3939   // return # of bytes read, 0 on eof, -1 on error
3940   // if called with `buf is null`, do `close()`
3941   alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3942 
3943   //TODO
3944   static struct Allocator {
3945   static nothrow @nogc: // because
3946     void* alloc (uint sz, VorbisDecoder vb) {
3947       import core.stdc.stdlib : malloc;
3948       return malloc(sz);
3949     }
3950     void free (void* p, VorbisDecoder vb) {
3951       import core.stdc.stdlib : free;
3952       free(p);
3953     }
3954     void* allocTemp (uint sz, VorbisDecoder vb) {
3955       import core.stdc.stdlib : malloc;
3956       return malloc(sz);
3957     }
3958     void freeTemp (void* p, uint sz, VorbisDecoder vb) {
3959       import core.stdc.stdlib : free;
3960       free(p);
3961     }
3962     uint tempSave (VorbisDecoder vb) { return 0; }
3963     void tempRestore (uint pos, VorbisDecoder vb) {}
3964   }
3965 
3966 nothrow @nogc:
3967 private:
3968   bool isOpened;
3969   readCB stmread;
3970   uint stlastofs = uint.max;
3971   uint stst;
3972   uint stpos;
3973   uint stend;
3974   bool stclose;
3975   FILE* stfl;
3976 
3977 private:
3978   //ubyte* stream;
3979   //ubyte* stream_start;
3980   //ubyte* stream_end;
3981   //uint stream_len;
3982 
3983   /+bool push_mode;+/
3984 
3985   uint first_audio_page_offset;
3986 
3987   ProbedPage p_first, p_last;
3988 
3989   // memory management
3990   Allocator alloc;
3991   int setup_offset;
3992   int temp_offset;
3993 
3994   // run-time results
3995   bool eof = true;
3996   STBVorbisError error;
3997 
3998   // header info
3999   int[2] blocksize;
4000   int blocksize_0, blocksize_1;
4001   int codebook_count;
4002   Codebook* codebooks;
4003   int floor_count;
4004   ushort[64] floor_types; // varies
4005   Floor* floor_config;
4006   int residue_count;
4007   ushort[64] residue_types; // varies
4008   Residue* residue_config;
4009   int mapping_count;
4010   Mapping* mapping;
4011   int mode_count;
4012   Mode[64] mode_config;  // varies
4013 
4014   uint total_samples;
4015 
4016   // decode buffer
4017   float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
4018   float*[STB_VORBIS_MAX_CHANNELS] outputs;
4019 
4020   float*[STB_VORBIS_MAX_CHANNELS] previous_window;
4021   int previous_length;
4022 
4023   version(STB_VORBIS_NO_DEFER_FLOOR) {
4024     float*[STB_VORBIS_MAX_CHANNELS] floor_buffers;
4025   } else {
4026     short*[STB_VORBIS_MAX_CHANNELS] finalY;
4027   }
4028 
4029   uint current_loc; // sample location of next frame to decode
4030   int current_loc_valid;
4031 
4032   // per-blocksize precomputed data
4033 
4034   // twiddle factors
4035   float*[2] A, B, C;
4036   float*[2] window;
4037   ushort*[2] bit_reverse;
4038 
4039   // current page/packet/segment streaming info
4040   uint serial; // stream serial number for verification
4041   int last_page;
4042   int segment_count;
4043   ubyte[255] segments;
4044   ubyte page_flag;
4045   ubyte bytes_in_seg;
4046   ubyte first_decode;
4047   int next_seg;
4048   int last_seg;  // flag that we're on the last segment
4049   int last_seg_which; // what was the segment number of the last seg?
4050   uint acc;
4051   int valid_bits;
4052   int packet_bytes;
4053   int end_seg_with_known_loc;
4054   uint known_loc_for_packet;
4055   int discard_samples_deferred;
4056   uint samples_output;
4057 
4058   // push mode scanning
4059   /+
4060   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
4061   CRCscan[STB_VORBIS_PUSHDATA_CRC_COUNT] scan;
4062   +/
4063 
4064   // sample-access
4065   int channel_buffer_start;
4066   int channel_buffer_end;
4067 
4068 private: // k8: 'cause i'm evil
4069   // user-accessible info
4070   uint sample_rate;
4071   int vrchannels;
4072 
4073   uint setup_memory_required;
4074   uint temp_memory_required;
4075   uint setup_temp_memory_required;
4076 
4077   bool read_comments;
4078   ubyte* comment_data;
4079   uint comment_size;
4080 
4081   // functions to get comment data
4082   uint comment_data_pos;
4083 
4084 private:
4085   int rawRead (void[] buf) {
4086     static if (__VERSION__ > 2067) pragma(inline, true);
4087     if (isOpened && buf.length > 0 && stpos < stend) {
4088       if (stend-stpos < buf.length) buf = buf[0..stend-stpos];
4089       auto rd = stmread(buf, stpos, this);
4090       if (rd > 0) stpos += rd;
4091       return rd;
4092     }
4093     return 0;
4094   }
4095   void rawSkip (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened && n > 0) { if ((stpos += n) > stend) stpos = stend; } }
4096   void rawSeek (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { stpos = stst+(n < 0 ? 0 : n); if (stpos > stend) stpos = stend; } }
4097   void rawClose () { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { isOpened = false; stmread(null, 0, this); } }
4098 
4099 final:
4100 private:
4101   void doInit () {
4102     import core.stdc.string : memset;
4103     /*
4104     if (z) {
4105       alloc = *z;
4106       alloc.alloc_buffer_length_in_bytes = (alloc.alloc_buffer_length_in_bytes+3)&~3;
4107       temp_offset = alloc.alloc_buffer_length_in_bytes;
4108     }
4109     */
4110     eof = false;
4111     error = STBVorbisError.no_error;
4112     /+stream = null;+/
4113     codebooks = null;
4114     /+page_crc_tests = -1;+/
4115   }
4116 
4117   static int stflRead (void[] buf, uint ofs, VorbisDecoder vb) {
4118     if (buf !is null) {
4119       //{ import core.stdc.stdio; printf("stflRead: ofs=%u; len=%u\n", ofs, cast(uint)buf.length); }
4120       if (vb.stlastofs != ofs) {
4121         import core.stdc.stdio : fseek, SEEK_SET;
4122         vb.stlastofs = ofs;
4123         fseek(vb.stfl, ofs, SEEK_SET);
4124       }
4125       import core.stdc.stdio : fread;
4126       return cast(int)fread(buf.ptr, 1, buf.length, vb.stfl);
4127     } else {
4128       if (vb.stclose) {
4129         import core.stdc.stdio : fclose;
4130         if (vb.stfl !is null) fclose(vb.stfl);
4131       }
4132       vb.stfl = null;
4133       return 0;
4134     }
4135   }
4136 
4137 public:
4138   this () {}
4139   ~this () { close(); }
4140 
4141   this (int asize, readCB rcb) { assert(rcb !is null); stend = (asize > 0 ? asize : 0); stmread = rcb; }
4142   this (FILE* fl, bool doclose=true) { open(fl, doclose); }
4143   this (const(char)[] filename) { open(filename); }
4144 
4145   @property bool closed () { return !isOpened; }
4146 
4147   void open (FILE *fl, bool doclose=true) {
4148     import core.stdc.stdio : ftell, fseek, SEEK_SET, SEEK_END;
4149     close();
4150     if (fl is null) { error = STBVorbisError.invalid_stream; return; }
4151     stclose = doclose;
4152     stst = stpos = cast(uint)ftell(fl);
4153     fseek(fl, 0, SEEK_END);
4154     stend = cast(uint)ftell(fl);
4155     stlastofs = stlastofs.max;
4156     stclose = false;
4157     stfl = fl;
4158     import std.functional : toDelegate;
4159     stmread = toDelegate(&stflRead);
4160     isOpened = true;
4161     eof = false;
4162     read_comments = true;
4163     if (start_decoder(this)) {
4164       vorbis_pump_first_frame(this);
4165       return;
4166     }
4167     auto err = error;
4168     close();
4169     error = err;
4170   }
4171 
4172   void open (const(char)[] filename) {
4173     import core.stdc.stdio : fopen;
4174     import std.internal.cstring; // sorry
4175     close();
4176     FILE* fl = fopen(filename.tempCString, "rb");
4177     if (fl is null) { error = STBVorbisError.file_open_failure; return; }
4178     open(fl, true);
4179   }
4180 
4181   /+
4182   void openPushdata(void* data, int data_len, // the memory available for decoding
4183                     int* data_used)           // only defined on success
4184   {
4185     close();
4186     eof = false;
4187     stream = cast(ubyte*)data;
4188     stream_end = stream+data_len;
4189     push_mode = true;
4190     if (!start_decoder(this)) {
4191       auto err = error;
4192       if (eof) err = STBVorbisError.need_more_data; else close();
4193       error = err;
4194       return;
4195     }
4196     *data_used = stream-(cast(ubyte*)data);
4197     error = STBVorbisError.no_error;
4198   }
4199   +/
4200 
4201   void close () {
4202     import core.stdc.string : memset;
4203 
4204     setup_free(this, this.comment_data);
4205     if (this.residue_config) {
4206       foreach (immutable i; 0..this.residue_count) {
4207         Residue* r = this.residue_config+i;
4208         if (r.classdata) {
4209           foreach (immutable j; 0..this.codebooks[r.classbook].entries) setup_free(this, r.classdata[j]);
4210           setup_free(this, r.classdata);
4211         }
4212         setup_free(this, r.residue_books);
4213       }
4214     }
4215 
4216     if (this.codebooks) {
4217       foreach (immutable i; 0..this.codebook_count) {
4218         Codebook* c = this.codebooks+i;
4219         setup_free(this, c.codeword_lengths);
4220         setup_free(this, c.multiplicands);
4221         setup_free(this, c.codewords);
4222         setup_free(this, c.sorted_codewords);
4223         // c.sorted_values[-1] is the first entry in the array
4224         setup_free(this, c.sorted_values ? c.sorted_values-1 : null);
4225       }
4226       setup_free(this, this.codebooks);
4227     }
4228     setup_free(this, this.floor_config);
4229     setup_free(this, this.residue_config);
4230     if (this.mapping) {
4231       foreach (immutable i; 0..this.mapping_count) setup_free(this, this.mapping[i].chan);
4232       setup_free(this, this.mapping);
4233     }
4234     foreach (immutable i; 0..(this.vrchannels > STB_VORBIS_MAX_CHANNELS ? STB_VORBIS_MAX_CHANNELS : this.vrchannels)) {
4235       setup_free(this, this.channel_buffers.ptr[i]);
4236       setup_free(this, this.previous_window.ptr[i]);
4237       version(STB_VORBIS_NO_DEFER_FLOOR) setup_free(this, this.floor_buffers.ptr[i]);
4238       setup_free(this, this.finalY.ptr[i]);
4239     }
4240     foreach (immutable i; 0..2) {
4241       setup_free(this, this.A.ptr[i]);
4242       setup_free(this, this.B.ptr[i]);
4243       setup_free(this, this.C.ptr[i]);
4244       setup_free(this, this.window.ptr[i]);
4245       setup_free(this, this.bit_reverse.ptr[i]);
4246     }
4247 
4248     rawClose();
4249     isOpened = false;
4250     stmread = null;
4251     stlastofs = uint.max;
4252     stst = 0;
4253     stpos = 0;
4254     stend = 0;
4255     stclose = false;
4256     stfl = null;
4257 
4258     sample_rate = 0;
4259     vrchannels = 0;
4260 
4261     setup_memory_required = 0;
4262     temp_memory_required = 0;
4263     setup_temp_memory_required = 0;
4264 
4265     read_comments = 0;
4266     comment_data = null;
4267     comment_size = 0;
4268 
4269     comment_data_pos = 0;
4270 
4271     /+
4272     stream = null;
4273     stream_start = null;
4274     stream_end = null;
4275     +/
4276 
4277     //stream_len = 0;
4278 
4279     /+push_mode = false;+/
4280 
4281     first_audio_page_offset = 0;
4282 
4283     p_first = p_first.init;
4284     p_last = p_last.init;
4285 
4286     setup_offset = 0;
4287     temp_offset = 0;
4288 
4289     eof = true;
4290     error = STBVorbisError.no_error;
4291 
4292     blocksize[] = 0;
4293     blocksize_0 = 0;
4294     blocksize_1 = 0;
4295     codebook_count = 0;
4296     codebooks = null;
4297     floor_count = 0;
4298     floor_types[] = 0;
4299     floor_config = null;
4300     residue_count = 0;
4301     residue_types[] = 0;
4302     residue_config = null;
4303     mapping_count = 0;
4304     mapping = null;
4305     mode_count = 0;
4306     mode_config[] = Mode.init;
4307 
4308     total_samples = 0;
4309 
4310     channel_buffers[] = null;
4311     outputs[] = null;
4312 
4313     previous_window[] = null;
4314     previous_length = 0;
4315 
4316     version(STB_VORBIS_NO_DEFER_FLOOR) {
4317       floor_buffers[] = null;
4318     } else {
4319       finalY[] = null;
4320     }
4321 
4322     current_loc = 0;
4323     current_loc_valid = 0;
4324 
4325     A[] = null;
4326     B[] = null;
4327     C[] = null;
4328     window[] = null;
4329     bit_reverse = null;
4330 
4331     serial = 0;
4332     last_page = 0;
4333     segment_count = 0;
4334     segments[] = 0;
4335     page_flag = 0;
4336     bytes_in_seg = 0;
4337     first_decode = 0;
4338     next_seg = 0;
4339     last_seg = 0;
4340     last_seg_which = 0;
4341     acc = 0;
4342     valid_bits = 0;
4343     packet_bytes = 0;
4344     end_seg_with_known_loc = 0;
4345     known_loc_for_packet = 0;
4346     discard_samples_deferred = 0;
4347     samples_output = 0;
4348 
4349     /+
4350     page_crc_tests = -1;
4351     scan[] = CRCscan.init;
4352     +/
4353 
4354     channel_buffer_start = 0;
4355     channel_buffer_end = 0;
4356   }
4357 
4358   @property const pure {
4359     int getSampleOffset () { return (current_loc_valid ? current_loc : -1); }
4360 
4361     @property ubyte chans () { return (isOpened ? cast(ubyte)this.vrchannels : 0); }
4362     @property uint sampleRate () { return (isOpened ? this.sample_rate : 0); }
4363     @property uint maxFrameSize () { return (isOpened ? this.blocksize_1>>1 : 0); }
4364 
4365     @property uint getSetupMemoryRequired () { return (isOpened ? this.setup_memory_required : 0); }
4366     @property uint getSetupTempMemoryRequired () { return (isOpened ? this.setup_temp_memory_required : 0); }
4367     @property uint getTempMemoryRequired () { return (isOpened ? this.temp_memory_required : 0); }
4368   }
4369 
4370   // will clear last error
4371   @property int lastError () {
4372     int e = error;
4373     error = STBVorbisError.no_error;
4374     return e;
4375   }
4376 
4377   // PUSHDATA API
4378   /+
4379   void flushPushdata () {
4380     if (push_mode) {
4381       previous_length = 0;
4382       page_crc_tests = 0;
4383       discard_samples_deferred = 0;
4384       current_loc_valid = false;
4385       first_decode = false;
4386       samples_output = 0;
4387       channel_buffer_start = 0;
4388       channel_buffer_end = 0;
4389     }
4390   }
4391 
4392   // return value: number of bytes we used
4393   int decodeFramePushdata(
4394            void* data, int data_len, // the memory available for decoding
4395            int* channels,            // place to write number of float* buffers
4396            float*** output,          // place to write float** array of float* buffers
4397            int* samples              // place to write number of output samples
4398        )
4399   {
4400     if (!this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);
4401 
4402     if (this.page_crc_tests >= 0) {
4403       *samples = 0;
4404       return vorbis_search_for_page_pushdata(this, cast(ubyte*)data, data_len);
4405     }
4406 
4407     this.stream = cast(ubyte*)data;
4408     this.stream_end = this.stream+data_len;
4409     this.error = STBVorbisError.no_error;
4410 
4411     // check that we have the entire packet in memory
4412     if (!is_whole_packet_present(this, false)) {
4413       *samples = 0;
4414       return 0;
4415     }
4416 
4417     int len, left, right;
4418 
4419     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4420       // save the actual error we encountered
4421       STBVorbisError error = this.error;
4422       if (error == STBVorbisError.bad_packet_type) {
4423         // flush and resynch
4424         this.error = STBVorbisError.no_error;
4425         while (get8_packet(this) != EOP) if (this.eof) break;
4426         *samples = 0;
4427         return this.stream-data;
4428       }
4429       if (error == STBVorbisError.continued_packet_flag_invalid) {
4430         if (this.previous_length == 0) {
4431           // we may be resynching, in which case it's ok to hit one
4432           // of these; just discard the packet
4433           this.error = STBVorbisError.no_error;
4434           while (get8_packet(this) != EOP) if (this.eof) break;
4435           *samples = 0;
4436           return this.stream-data;
4437         }
4438       }
4439       // if we get an error while parsing, what to do?
4440       // well, it DEFINITELY won't work to continue from where we are!
4441       flushPushdata();
4442       // restore the error that actually made us bail
4443       this.error = error;
4444       *samples = 0;
4445       return 1;
4446     }
4447 
4448     // success!
4449     len = vorbis_finish_frame(this, len, left, right);
4450     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4451 
4452     if (channels) *channels = this.vrchannels;
4453     *samples = len;
4454     *output = this.outputs.ptr;
4455     return this.stream-data;
4456   }
4457   +/
4458 
4459   public uint fileOffset () {
4460     if (/+push_mode ||+/ !isOpened) return 0;
4461     /+if (stream !is null) return cast(uint)(stream-stream_start);+/
4462     return (stpos > stst ? stpos-stst : 0);
4463   }
4464 
4465   public uint stream_len () { return stend-stst; }
4466 
4467   // DATA-PULLING API
4468   public int seekFrame (uint sample_number) {
4469     uint max_frame_samples;
4470 
4471     /+if (this.push_mode) return -.error(this, STBVorbisError.invalid_api_mixing);+/
4472 
4473     // fast page-level search
4474     if (!seek_to_sample_coarse(this, sample_number)) return 0;
4475 
4476     assert(this.current_loc_valid);
4477     assert(this.current_loc <= sample_number);
4478 
4479     // linear search for the relevant packet
4480     max_frame_samples = (this.blocksize_1*3-this.blocksize_0)>>2;
4481     while (this.current_loc < sample_number) {
4482       int left_start, left_end, right_start, right_end, mode, frame_samples;
4483       if (!peek_decode_initial(this, &left_start, &left_end, &right_start, &right_end, &mode)) return .error(this, STBVorbisError.seek_failed);
4484       // calculate the number of samples returned by the next frame
4485       frame_samples = right_start-left_start;
4486       if (this.current_loc+frame_samples > sample_number) {
4487         return 1; // the next frame will contain the sample
4488       } else if (this.current_loc+frame_samples+max_frame_samples > sample_number) {
4489         // there's a chance the frame after this could contain the sample
4490         vorbis_pump_first_frame(this);
4491       } else {
4492         // this frame is too early to be relevant
4493         this.current_loc += frame_samples;
4494         this.previous_length = 0;
4495         maybe_start_packet(this);
4496         flush_packet(this);
4497       }
4498     }
4499     // the next frame will start with the sample
4500     assert(this.current_loc == sample_number);
4501     return 1;
4502   }
4503 
4504   public int seek (uint sample_number) {
4505     if (!seekFrame(sample_number)) return 0;
4506     if (sample_number != this.current_loc) {
4507       int n;
4508       uint frame_start = this.current_loc;
4509       getFrameFloat(&n, null);
4510       assert(sample_number > frame_start);
4511       assert(this.channel_buffer_start+cast(int)(sample_number-frame_start) <= this.channel_buffer_end);
4512       this.channel_buffer_start += (sample_number-frame_start);
4513     }
4514     return 1;
4515   }
4516 
4517   public bool seekStart () {
4518     /+if (push_mode) { .error(this, STBVorbisError.invalid_api_mixing); return; }+/
4519     set_file_offset(this, first_audio_page_offset);
4520     previous_length = 0;
4521     first_decode = true;
4522     next_seg = -1;
4523     return vorbis_pump_first_frame(this);
4524   }
4525 
4526   public uint streamLengthInSamples () {
4527     uint restore_offset, previous_safe;
4528     uint end, last_page_loc;
4529 
4530     /+if (this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4531     if (!this.total_samples) {
4532       uint last;
4533       uint lo, hi;
4534       char[6] header;
4535 
4536       // first, store the current decode position so we can restore it
4537       restore_offset = fileOffset;
4538 
4539       // now we want to seek back 64K from the end (the last page must
4540       // be at most a little less than 64K, but let's allow a little slop)
4541       if (this.stream_len >= 65536 && this.stream_len-65536 >= this.first_audio_page_offset) {
4542         previous_safe = this.stream_len-65536;
4543       } else {
4544         previous_safe = this.first_audio_page_offset;
4545       }
4546 
4547       set_file_offset(this, previous_safe);
4548       // previous_safe is now our candidate 'earliest known place that seeking
4549       // to will lead to the final page'
4550 
4551       if (!vorbis_find_page(this, &end, &last)) {
4552         // if we can't find a page, we're hosed!
4553         this.error = STBVorbisError.cant_find_last_page;
4554         this.total_samples = 0xffffffff;
4555         goto done;
4556       }
4557 
4558       // check if there are more pages
4559       last_page_loc = fileOffset;
4560 
4561       // stop when the last_page flag is set, not when we reach eof;
4562       // this allows us to stop short of a 'file_section' end without
4563       // explicitly checking the length of the section
4564       while (!last) {
4565         set_file_offset(this, end);
4566         if (!vorbis_find_page(this, &end, &last)) {
4567           // the last page we found didn't have the 'last page' flag set. whoops!
4568           break;
4569         }
4570         previous_safe = last_page_loc+1;
4571         last_page_loc = fileOffset;
4572       }
4573 
4574       set_file_offset(this, last_page_loc);
4575 
4576       // parse the header
4577       getn(this, cast(ubyte*)header, 6);
4578       // extract the absolute granule position
4579       lo = get32(this);
4580       hi = get32(this);
4581       if (lo == 0xffffffff && hi == 0xffffffff) {
4582         this.error = STBVorbisError.cant_find_last_page;
4583         this.total_samples = SAMPLE_unknown;
4584         goto done;
4585       }
4586       if (hi) lo = 0xfffffffe; // saturate
4587       this.total_samples = lo;
4588 
4589       this.p_last.page_start = last_page_loc;
4590       this.p_last.page_end = end;
4591       this.p_last.last_decoded_sample = lo;
4592 
4593      done:
4594       set_file_offset(this, restore_offset);
4595     }
4596     return (this.total_samples == SAMPLE_unknown ? 0 : this.total_samples);
4597   }
4598 
4599   public float streamLengthInSeconds () {
4600     return (isOpened ? streamLengthInSamples()/cast(float)sample_rate : 0.0f);
4601   }
4602 
4603   public int getFrameFloat (int* channels, float*** output) {
4604     int len, right, left;
4605     /+if (push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4606 
4607     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4608       channel_buffer_start = channel_buffer_end = 0;
4609       return 0;
4610     }
4611 
4612     len = vorbis_finish_frame(this, len, left, right);
4613     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4614 
4615     channel_buffer_start = left;
4616     channel_buffer_end = left+len;
4617 
4618     if (channels) *channels = this.vrchannels;
4619     if (output) *output = this.outputs.ptr;
4620     return len;
4621   }
4622 
4623   /+
4624   public VorbisDecoder stb_vorbis_open_memory (const(void)* data, int len, int* error=null, stb_vorbis_alloc* alloc=null) {
4625     VorbisDecoder this;
4626     stb_vorbis_ctx p = void;
4627     if (data is null) return null;
4628     vorbis_init(&p, alloc);
4629     p.stream = cast(ubyte*)data;
4630     p.stream_end = cast(ubyte*)data+len;
4631     p.stream_start = cast(ubyte*)p.stream;
4632     p.stream_len = len;
4633     p.push_mode = false;
4634     if (start_decoder(&p)) {
4635       this = vorbis_alloc(&p);
4636       if (this) {
4637         *this = p;
4638         vorbis_pump_first_frame(this);
4639         return this;
4640       }
4641     }
4642     if (error) *error = p.error;
4643     vorbis_deinit(&p);
4644     return null;
4645   }
4646   +/
4647 
4648   // s16 samples API
4649   int getFrameShort (int num_c, short** buffer, int num_samples) {
4650     float** output;
4651     int len = getFrameFloat(null, &output);
4652     if (len > num_samples) len = num_samples;
4653     if (len) convert_samples_short(num_c, buffer, 0, vrchannels, output, 0, len);
4654     return len;
4655   }
4656 
4657   int getFrameShortInterleaved (int num_c, short* buffer, int num_shorts) {
4658     float** output;
4659     int len;
4660     if (num_c == 1) return getFrameShort(num_c, &buffer, num_shorts);
4661     len = getFrameFloat(null, &output);
4662     if (len) {
4663       if (len*num_c > num_shorts) len = num_shorts/num_c;
4664       convert_channels_short_interleaved(num_c, buffer, vrchannels, output, 0, len);
4665     }
4666     return len;
4667   }
4668 
4669   int getSamplesShortInterleaved (int channels, short* buffer, int num_shorts) {
4670     float** outputs;
4671     int len = num_shorts/channels;
4672     int n = 0;
4673     int z = this.vrchannels;
4674     if (z > channels) z = channels;
4675     while (n < len) {
4676       int k = channel_buffer_end-channel_buffer_start;
4677       if (n+k >= len) k = len-n;
4678       if (k) convert_channels_short_interleaved(channels, buffer, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4679       buffer += k*channels;
4680       n += k;
4681       channel_buffer_start += k;
4682       if (n == len) break;
4683       if (!getFrameFloat(null, &outputs)) break;
4684     }
4685     return n;
4686   }
4687 
4688   int getSamplesShort (int channels, short** buffer, int len) {
4689     float** outputs;
4690     int n = 0;
4691     int z = this.vrchannels;
4692     if (z > channels) z = channels;
4693     while (n < len) {
4694       int k = channel_buffer_end-channel_buffer_start;
4695       if (n+k >= len) k = len-n;
4696       if (k) convert_samples_short(channels, buffer, n, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4697       n += k;
4698       channel_buffer_start += k;
4699       if (n == len) break;
4700       if (!getFrameFloat(null, &outputs)) break;
4701     }
4702     return n;
4703   }
4704 
4705   /+
4706   public int stb_vorbis_decode_filename (string filename, int* channels, int* sample_rate, short** output) {
4707     import core.stdc.stdlib : malloc, realloc;
4708 
4709     int data_len, offset, total, limit, error;
4710     short* data;
4711     VorbisDecoder v = stb_vorbis_open_filename(filename, &error, null);
4712     if (v is null) return -1;
4713     limit = v.vrchannels*4096;
4714     *channels = v.vrchannels;
4715     if (sample_rate) *sample_rate = v.sample_rate;
4716     offset = data_len = 0;
4717     total = limit;
4718     data = cast(short*)malloc(total*(*data).sizeof);
4719     if (data is null) {
4720       stb_vorbis_close(v);
4721       return -2;
4722     }
4723     for (;;) {
4724       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4725       if (n == 0) break;
4726       data_len += n;
4727       offset += n*v.vrchannels;
4728       if (offset+limit > total) {
4729         short *data2;
4730         total *= 2;
4731         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4732         if (data2 is null) {
4733           import core.stdc.stdlib : free;
4734           free(data);
4735           stb_vorbis_close(v);
4736           return -2;
4737         }
4738         data = data2;
4739       }
4740     }
4741     *output = data;
4742     stb_vorbis_close(v);
4743     return data_len;
4744   }
4745 
4746   public int stb_vorbis_decode_memory (const(void)* mem, int len, int* channels, int* sample_rate, short** output) {
4747     import core.stdc.stdlib : malloc, realloc;
4748 
4749     int data_len, offset, total, limit, error;
4750     short* data;
4751     VorbisDecoder v = stb_vorbis_open_memory(mem, len, &error, null);
4752     if (v is null) return -1;
4753     limit = v.vrchannels*4096;
4754     *channels = v.vrchannels;
4755     if (sample_rate) *sample_rate = v.sample_rate;
4756     offset = data_len = 0;
4757     total = limit;
4758     data = cast(short*)malloc(total*(*data).sizeof);
4759     if (data is null) {
4760       stb_vorbis_close(v);
4761       return -2;
4762     }
4763     for (;;) {
4764       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4765       if (n == 0) break;
4766       data_len += n;
4767       offset += n*v.vrchannels;
4768       if (offset+limit > total) {
4769         short *data2;
4770         total *= 2;
4771         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4772         if (data2 is null) {
4773           import core.stdc.stdlib : free;
4774           free(data);
4775           stb_vorbis_close(v);
4776           return -2;
4777         }
4778         data = data2;
4779       }
4780     }
4781     *output = data;
4782     stb_vorbis_close(v);
4783     return data_len;
4784   }
4785 
4786   public int stb_vorbis_get_samples_float_interleaved (VorbisDecoder this, int channels, float* buffer, int num_floats) {
4787     float** outputs;
4788     int len = num_floats/channels;
4789     int n = 0;
4790     int z = this.vrchannels;
4791     if (z > channels) z = channels;
4792     while (n < len) {
4793       int k = this.channel_buffer_end-this.channel_buffer_start;
4794       if (n+k >= len) k = len-n;
4795       foreach (immutable j; 0..k) {
4796         foreach (immutable i; 0..z) *buffer++ = (this.channel_buffers.ptr[i])[this.channel_buffer_start+j];
4797         foreach (immutable i; z..channels) *buffer++ = 0;
4798       }
4799       n += k;
4800       this.channel_buffer_start += k;
4801       if (n == len) break;
4802       if (!stb_vorbis_get_frame_float(this, null, &outputs)) break;
4803     }
4804     return n;
4805   }
4806   +/
4807 
4808   public int getSamplesFloat (int achans, float** buffer, int num_samples) {
4809     import core.stdc.string : memcpy, memset;
4810     float** outputs;
4811     int n = 0;
4812     int z = vrchannels;
4813     if (z > achans) z = achans;
4814     while (n < num_samples) {
4815       int k = channel_buffer_end-channel_buffer_start;
4816       if (n+k >= num_samples) k = num_samples-n;
4817       if (k) {
4818         foreach (immutable i; 0..z) memcpy(buffer[i]+n, channel_buffers.ptr[i]+channel_buffer_start, float.sizeof*k);
4819         foreach (immutable i; z..achans) memset(buffer[i]+n, 0, float.sizeof*k);
4820       }
4821       n += k;
4822       channel_buffer_start += k;
4823       if (n == num_samples) break;
4824       if (!getFrameFloat(null, &outputs)) break;
4825     }
4826     return n;
4827   }
4828 
4829 private: // k8: 'cause i'm evil
4830   private enum cmt_len_size = 2;
4831   nothrow /*@trusted*/ @nogc {
4832     public @property bool comment_empty () const pure { return (comment_get_line_len == 0); }
4833 
4834     // 0: error
4835     // includes length itself
4836     private uint comment_get_line_len () const pure {
4837       if (comment_data_pos >= comment_size) return 0;
4838       if (comment_size-comment_data_pos < cmt_len_size) return 0;
4839       uint len = comment_data[comment_data_pos];
4840       len += cast(uint)comment_data[comment_data_pos+1]<<8;
4841       return (len >= cmt_len_size && comment_data_pos+len <= comment_size ? len : 0);
4842     }
4843 
4844     public bool comment_rewind () {
4845       comment_data_pos = 0;
4846       for (;;) {
4847         auto len = comment_get_line_len();
4848         if (!len) { comment_data_pos = comment_size; return false; }
4849         if (len != cmt_len_size) return true;
4850         comment_data_pos += len;
4851       }
4852     }
4853 
4854     // true: has something to read after skip
4855     public bool comment_skip () {
4856       comment_data_pos += comment_get_line_len();
4857       for (;;) {
4858         auto len = comment_get_line_len();
4859         if (!len) { comment_data_pos = comment_size; return false; }
4860         if (len != cmt_len_size) break;
4861         comment_data_pos += len;
4862       }
4863       return true;
4864     }
4865 
4866     public const(char)[] comment_line () {
4867       auto len = comment_get_line_len();
4868       if (len < cmt_len_size) return null;
4869       if (len == cmt_len_size) return "";
4870       return (cast(char*)comment_data+comment_data_pos+cmt_len_size)[0..len-cmt_len_size];
4871     }
4872 
4873     public const(char)[] comment_name () {
4874       auto line = comment_line();
4875       if (line.length == 0) return line;
4876       uint epos = 0;
4877       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4878       return (epos < line.length ? line[0..epos] : "");
4879     }
4880 
4881     public const(char)[] comment_value () {
4882       auto line = comment_line();
4883       if (line.length == 0) return line;
4884       uint epos = 0;
4885       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4886       return (epos < line.length ? line[epos+1..$] : line);
4887     }
4888   }
4889 }
4890 
4891 
4892 // ////////////////////////////////////////////////////////////////////////// //
4893 private:
4894 // cool helper to translate C defines
4895 template cmacroFixVars(T...) {
4896   /**
4897    * 64-bit implementation of fasthash
4898    *
4899    * Params:
4900    *   buf =  data buffer
4901    *   seed = the seed
4902    *
4903    * Returns:
4904    *   32-bit or 64-bit hash
4905    */
4906   size_t hashOf (const(void)* buf, size_t len, size_t seed=0) pure nothrow @trusted @nogc {
4907     enum Get8Bytes = q{
4908       cast(ulong)data[0]|
4909       (cast(ulong)data[1]<<8)|
4910       (cast(ulong)data[2]<<16)|
4911       (cast(ulong)data[3]<<24)|
4912       (cast(ulong)data[4]<<32)|
4913       (cast(ulong)data[5]<<40)|
4914       (cast(ulong)data[6]<<48)|
4915       (cast(ulong)data[7]<<56)
4916     };
4917     enum m = 0x880355f21e6d1965UL;
4918     auto data = cast(const(ubyte)*)buf;
4919     ulong h = seed;
4920     ulong t;
4921     foreach (immutable _; 0..len/8) {
4922       version(HasUnalignedOps) {
4923         if (__ctfe) {
4924           t = mixin(Get8Bytes);
4925         } else {
4926           t = *cast(ulong*)data;
4927         }
4928       } else {
4929         t = mixin(Get8Bytes);
4930       }
4931       data += 8;
4932       t ^= t>>23;
4933       t *= 0x2127599bf4325c37UL;
4934       t ^= t>>47;
4935       h ^= t;
4936       h *= m;
4937     }
4938 
4939     h ^= len*m;
4940     t = 0;
4941     switch (len&7) {
4942       case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
4943       case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
4944       case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
4945       case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
4946       case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
4947       case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
4948       case 1: t ^= cast(ulong)data[0]; goto default;
4949       default:
4950         t ^= t>>23;
4951         t *= 0x2127599bf4325c37UL;
4952         t ^= t>>47;
4953         h ^= t;
4954         h *= m;
4955         break;
4956     }
4957 
4958     h ^= h>>23;
4959     h *= 0x2127599bf4325c37UL;
4960     h ^= h>>47;
4961     static if (size_t.sizeof == 4) {
4962       // 32-bit hash
4963       // the following trick converts the 64-bit hashcode to Fermat
4964       // residue, which shall retain information from both the higher
4965       // and lower parts of hashcode.
4966       return cast(size_t)(h-(h>>32));
4967     } else {
4968       return h;
4969     }
4970   }
4971 
4972   string cmacroFixVars (string s, string[] names...) {
4973     assert(T.length == names.length, "cmacroFixVars: names and arguments count mismatch");
4974     enum tmpPfxName = "__temp_prefix__";
4975     string res;
4976     string tmppfx;
4977     uint pos = 0;
4978     // skip empty lines (for pretty printing)
4979     // trim trailing spaces
4980     while (s.length > 0 && s[$-1] <= ' ') s = s[0..$-1];
4981     uint linestpos = 0; // start of the current line
4982     while (pos < s.length) {
4983       if (s[pos] > ' ') break;
4984       if (s[pos] == '\n') linestpos = pos+1;
4985       ++pos;
4986     }
4987     pos = linestpos;
4988     while (pos+2 < s.length) {
4989       int epos = pos;
4990       while (epos+2 < s.length && (s[epos] != '$' || s[epos+1] != '{')) ++epos;
4991       if (epos > pos) {
4992         if (s.length-epos < 3) break;
4993         res ~= s[pos..epos];
4994         pos = epos;
4995       }
4996       assert(s[pos] == '$' && s[pos+1] == '{');
4997       pos += 2;
4998       bool found = false;
4999       if (s.length-pos >= tmpPfxName.length+1 && s[pos+tmpPfxName.length] == '}' && s[pos..pos+tmpPfxName.length] == tmpPfxName) {
5000         if (tmppfx.length == 0) {
5001           // generate temporary prefix
5002           auto hash = hashOf(s.ptr, s.length);
5003           immutable char[16] hexChars = "0123456789abcdef";
5004           tmppfx = "_temp_macro_var_";
5005           foreach_reverse (immutable idx; 0..size_t.sizeof*2) {
5006             tmppfx ~= hexChars[hash&0x0f];
5007             hash >>= 4;
5008           }
5009           tmppfx ~= "_";
5010         }
5011         pos += tmpPfxName.length+1;
5012         res ~= tmppfx;
5013         found = true;
5014       } else {
5015         foreach (immutable nidx, string oname; T) {
5016           static assert(oname.length > 0);
5017           if (s.length-pos >= oname.length+1 && s[pos+oname.length] == '}' && s[pos..pos+oname.length] == oname) {
5018             found = true;
5019             pos += oname.length+1;
5020             res ~= names[nidx];
5021             break;
5022           }
5023         }
5024       }
5025       assert(found, "unknown variable in macro");
5026     }
5027     if (pos < s.length) res ~= s[pos..$];
5028     return res;
5029   }
5030 }
5031 
5032 // ////////////////////////////////////////////////////////////////////////// //
5033 /* Version history
5034     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5035     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5036                            avoid discarding last frame of audio data
5037     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5038                            some more crash fixes when out of memory or with corrupt files
5039     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5040                            some crash fixes when out of memory or with corrupt files
5041     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5042     1.04    - 2014/08/27 - fix missing const-correct case in API
5043     1.03    - 2014/08/07 - Warning fixes
5044     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5045     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5046     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5047                            (API change) report sample rate for decode-full-file funcs
5048     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5049     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5050     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5051     0.99993 - remove assert that fired on legal files with empty tables
5052     0.99992 - rewind-to-start
5053     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5054     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5055     0.9998 - add a full-decode function with a memory source
5056     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5057     0.9996 - query length of vorbis stream in samples/seconds
5058     0.9995 - bugfix to another optimization that only happened in certain files
5059     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5060     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5061     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5062     0.9991 - performance improvement of IMDCT
5063     0.999 - (should have been 0.9990) performance improvement of IMDCT
5064     0.998 - no-CRT support from Casey Muratori
5065     0.997 - bugfixes for bugs found by Terje Mathisen
5066     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5067     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5068     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5069     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5070     0.992 - fixes for MinGW warning
5071     0.991 - turn fast-float-conversion on by default
5072     0.990 - fix push-mode seek recovery if you seek into the headers
5073     0.98b - fix to bad release of 0.98
5074     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5075     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5076     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5077     0.95 - clamping code for 16-bit functions
5078     0.94 - not publically released
5079     0.93 - fixed all-zero-floor case (was decoding garbage)
5080     0.92 - fixed a memory leak
5081     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5082     0.90 - first public release
5083 */
5084 
5085 /*
5086 ------------------------------------------------------------------------------
5087 This software is available under 2 licenses -- choose whichever you prefer.
5088 ------------------------------------------------------------------------------
5089 ALTERNATIVE A - MIT License
5090 Copyright (c) 2017 Sean Barrett
5091 Permission is hereby granted, free of charge, to any person obtaining a copy of
5092 this software and associated documentation files (the "Software"), to deal in
5093 the Software without restriction, including without limitation the rights to
5094 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5095 of the Software, and to permit persons to whom the Software is furnished to do
5096 so, subject to the following conditions:
5097 The above copyright notice and this permission notice shall be included in all
5098 copies or substantial portions of the Software.
5099 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5100 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5101 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5102 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5103 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5104 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5105 SOFTWARE.
5106 ------------------------------------------------------------------------------
5107 ALTERNATIVE B - Public Domain (www.unlicense.org)
5108 This is free and unencumbered software released into the public domain.
5109 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5110 software, either in source code form or as a compiled binary, for any purpose,
5111 commercial or non-commercial, and by any means.
5112 In jurisdictions that recognize copyright laws, the author or authors of this
5113 software dedicate any and all copyright interest in the software to the public
5114 domain. We make this dedication for the benefit of the public at large and to
5115 the detriment of our heirs and successors. We intend this dedication to be an
5116 overt act of relinquishment in perpetuity of all present and future rights to
5117 this software under copyright law.
5118 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5119 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5120 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5121 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5122 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5123 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5124 ------------------------------------------------------------------------------
5125 */