1 // Ogg Vorbis audio decoder - v1.10 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix
33 //
34 // Partial history:
35 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
36 //    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
37 //    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
38 //                           avoid discarding last frame of audio data
39 //    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
40 //                           some more crash fixes when out of memory or with corrupt files
41 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
42 //                           some crash fixes when out of memory or with corrupt files
43 //                           fix some inappropriately signed shifts
44 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
45 //    1.04    - 2014/08/27 - fix missing const-correct case in API
46 //    1.03    - 2014/08/07 - warning fixes
47 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
48 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
49 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
50 //                           (API change) report sample rate for decode-full-file funcs
51 //    0.99996 -            - bracket #include <malloc.h> for macintosh compilation
52 //    0.99995 -            - avoid alias-optimization issue in float-to-int conversion
53 //
54 // See end of file for full version history.
55 // D translation by Ketmar // Invisible Vector
56 // stolen by adam and module renamed.
57 /++
58 	Port of stb_vorbis to D. Provides .ogg audio file reading capabilities. See [arsd.simpleaudio] for code that can use this to actually load and play the file.
59 +/
60 module arsd.vorbis;
61 
62 import core.stdc.stdio : FILE;
63 
64 version(Windows)
65 	extern(C) int lrintf(float f) { return cast(int) f; }
66 
67 nothrow /*@trusted*/:
68 @nogc { // code block, as c macro helper is not @nogc; yet it's CTFE-only
69 // import it here, as druntime has no `@nogc` on it (for a reason)
70 private extern(C) void qsort (void* base, size_t nmemb, size_t size, int function(const scope void*, const scope void*) compar);
71 
72 
73 //////////////////////////////////////////////////////////////////////////////
74 //
75 //  HEADER BEGINS HERE
76 //
77 
78 ///////////   THREAD SAFETY
79 
80 // Individual VorbisDecoder handles are not thread-safe; you cannot decode from
81 // them from multiple threads at the same time. However, you can have multiple
82 // VorbisDecoder handles and decode from them independently in multiple thrads.
83 
84 
85 ///////////   MEMORY ALLOCATION
86 
87 // normally stb_vorbis uses malloc() to allocate memory at startup,
88 // and alloca() to allocate temporary memory during a frame on the
89 // stack. (Memory consumption will depend on the amount of setup
90 // data in the file and how you set the compile flags for speed
91 // vs. size. In my test files the maximal-size usage is ~150KB.)
92 //
93 // You can modify the wrapper functions in the source (setup_malloc,
94 // setup_temp_malloc, temp_malloc) to change this behavior, or you
95 // can use a simpler allocation model: you pass in a buffer from
96 // which stb_vorbis will allocate _all_ its memory (including the
97 // temp memory). "open" may fail with a VORBIS_outofmem if you
98 // do not pass in enough data; there is no way to determine how
99 // much you do need except to succeed (at which point you can
100 // query get_info to find the exact amount required. yes I know
101 // this is lame).
102 //
103 // If you pass in a non-null buffer of the type below, allocation
104 // will occur from it as described above. Otherwise just pass null
105 // to use malloc()/alloca()
106 
107 public struct stb_vorbis_alloc {
108   ubyte* alloc_buffer;
109   int alloc_buffer_length_in_bytes;
110 }
111 
112 
113 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
114 
115 /*
116 public struct stb_vorbis_info {
117   uint sample_rate;
118   int channels;
119 
120   uint setup_memory_required;
121   uint setup_temp_memory_required;
122   uint temp_memory_required;
123 
124   int max_frame_size;
125 }
126 */
127 
128 
129 /* ************************************************************************** *
130 // get general information about the file
131 stb_vorbis_info stb_vorbis_get_info (VorbisDecoder f);
132 
133 // get the last error detected (clears it, too)
134 int stb_vorbis_get_error (VorbisDecoder f);
135 
136 // close an ogg vorbis file and free all memory in use
137 void stb_vorbis_close (VorbisDecoder f);
138 
139 // this function returns the offset (in samples) from the beginning of the
140 // file that will be returned by the next decode, if it is known, or -1
141 // otherwise. after a flush_pushdata() call, this may take a while before
142 // it becomes valid again.
143 // NOT WORKING YET after a seek with PULLDATA API
144 int stb_vorbis_get_sample_offset (VorbisDecoder f);
145 
146 // returns the current seek point within the file, or offset from the beginning
147 // of the memory buffer. In pushdata mode it returns 0.
148 uint stb_vorbis_get_file_offset (VorbisDecoder f);
149 
150 
151 ///////////   PUSHDATA API
152 
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
159 
160 // create a vorbis decoder by passing in the initial data block containing
161 //    the ogg&vorbis headers (you don't need to do parse them, just provide
162 //    the first N bytes of the file--you're told if it's not enough, see below)
163 // on success, returns an VorbisDecoder, does not set error, returns the amount of
164 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
165 // on failure, returns null on error and sets *error, does not change *datablock_memory_consumed
166 // if returns null and *error is VORBIS_need_more_data, then the input block was
167 //       incomplete and you need to pass in a larger block from the start of the file
168 VorbisDecoder stb_vorbis_open_pushdata (
169               ubyte* datablock, int datablock_length_in_bytes,
170               int* datablock_memory_consumed_in_bytes,
171               int* error,
172               stb_vorbis_alloc* alloc_buffer
173             );
174 
175 // decode a frame of audio sample data if possible from the passed-in data block
176 //
177 // return value: number of bytes we used from datablock
178 //
179 // possible cases:
180 //     0 bytes used, 0 samples output (need more data)
181 //     N bytes used, 0 samples output (resynching the stream, keep going)
182 //     N bytes used, M samples output (one frame of data)
183 // note that after opening a file, you will ALWAYS get one N-bytes, 0-sample
184 // frame, because Vorbis always "discards" the first frame.
185 //
186 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
187 // instead only datablock_length_in_bytes-3 or less. This is because it wants
188 // to avoid missing parts of a page header if they cross a datablock boundary,
189 // without writing state-machiney code to record a partial detection.
190 //
191 // The number of channels returned are stored in *channels (which can be
192 // null--it is always the same as the number of channels reported by
193 // get_info). *output will contain an array of float* buffers, one per
194 // channel. In other words, (*output)[0][0] contains the first sample from
195 // the first channel, and (*output)[1][0] contains the first sample from
196 // the second channel.
197 int stb_vorbis_decode_frame_pushdata (
198       VorbisDecoder f, ubyte* datablock, int datablock_length_in_bytes,
199       int* channels,   // place to write number of float * buffers
200       float*** output, // place to write float ** array of float * buffers
201       int* samples     // place to write number of output samples
202     );
203 
204 // inform stb_vorbis that your next datablock will not be contiguous with
205 // previous ones (e.g. you've seeked in the data); future attempts to decode
206 // frames will cause stb_vorbis to resynchronize (as noted above), and
207 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
208 // will begin decoding the _next_ frame.
209 //
210 // if you want to seek using pushdata, you need to seek in your file, then
211 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
212 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
213 // if you don't like the result, seek your file again and repeat.
214 void stb_vorbis_flush_pushdata (VorbisDecoder f);
215 
216 
217 //////////   PULLING INPUT API
218 
219 // This API assumes stb_vorbis is allowed to pull data from a source--
220 // either a block of memory containing the _entire_ vorbis stream, or a
221 // FILE* that you or it create, or possibly some other reading mechanism
222 // if you go modify the source to replace the FILE* case with some kind
223 // of callback to your code. (But if you don't support seeking, you may
224 // just want to go ahead and use pushdata.)
225 
226 // decode an entire file and output the data interleaved into a malloc()ed
227 // buffer stored in *output. The return value is the number of samples
228 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
229 // When you're done with it, just free() the pointer returned in *output.
230 int stb_vorbis_decode_filename (const(char)* filename, int* channels, int* sample_rate, short** output);
231 int stb_vorbis_decode_memory (const(ubyte)* mem, int len, int* channels, int* sample_rate, short** output);
232 
233 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
234 // this must be the entire stream!). on failure, returns null and sets *error
235 VorbisDecoder stb_vorbis_open_memory (const(ubyte)* data, int len, int* error, stb_vorbis_alloc* alloc_buffer);
236 
237 // create an ogg vorbis decoder from a filename via fopen(). on failure,
238 // returns null and sets *error (possibly to VORBIS_file_open_failure).
239 VorbisDecoder stb_vorbis_open_filename (const(char)* filename, int* error, stb_vorbis_alloc* alloc_buffer);
240 
241 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
242 // the _current_ seek point (ftell). on failure, returns null and sets *error.
243 // note that stb_vorbis must "own" this stream; if you seek it in between
244 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
245 // perform stb_vorbis_seek_*() operations on this file, it will assume it
246 // owns the _entire_ rest of the file after the start point. Use the next
247 // function, stb_vorbis_open_file_section(), to limit it.
248 VorbisDecoder stb_vorbis_open_file (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer);
249 
250 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
251 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
252 // on failure, returns null and sets *error. note that stb_vorbis must "own"
253 // this stream; if you seek it in between calls to stb_vorbis, it will become
254 // confused.
255 VorbisDecoder stb_vorbis_open_file_section (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer, uint len);
256 
257 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
258 // after calling seek_frame(), the next call to get_frame_*() will include
259 // the specified sample. after calling stb_vorbis_seek(), the next call to
260 // stb_vorbis_get_samples_* will start with the specified sample. If you
261 // do not need to seek to EXACTLY the target sample when using get_samples_*,
262 // you can also use seek_frame().
263 int stb_vorbis_seek_frame (VorbisDecoder f, uint sample_number);
264 int stb_vorbis_seek (VorbisDecoder f, uint sample_number);
265 
266 // this function is equivalent to stb_vorbis_seek(f, 0)
267 int stb_vorbis_seek_start (VorbisDecoder f);
268 
269 // these functions return the total length of the vorbis stream
270 uint stb_vorbis_stream_length_in_samples (VorbisDecoder f);
271 float stb_vorbis_stream_length_in_seconds (VorbisDecoder f);
272 
273 // decode the next frame and return the number of samples. the number of
274 // channels returned are stored in *channels (which can be null--it is always
275 // the same as the number of channels reported by get_info). *output will
276 // contain an array of float* buffers, one per channel. These outputs will
277 // be overwritten on the next call to stb_vorbis_get_frame_*.
278 //
279 // You generally should not intermix calls to stb_vorbis_get_frame_*()
280 // and stb_vorbis_get_samples_*(), since the latter calls the former.
281 int stb_vorbis_get_frame_float (VorbisDecoder f, int* channels, float*** output);
282 
283 // decode the next frame and return the number of *samples* per channel.
284 // Note that for interleaved data, you pass in the number of shorts (the
285 // size of your array), but the return value is the number of samples per
286 // channel, not the total number of samples.
287 //
288 // The data is coerced to the number of channels you request according to the
289 // channel coercion rules (see below). You must pass in the size of your
290 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
291 // The maximum buffer size needed can be gotten from get_info(); however,
292 // the Vorbis I specification implies an absolute maximum of 4096 samples
293 // per channel.
294 int stb_vorbis_get_frame_short_interleaved (VorbisDecoder f, int num_c, short* buffer, int num_shorts);
295 int stb_vorbis_get_frame_short (VorbisDecoder f, int num_c, short** buffer, int num_samples);
296 
297 // Channel coercion rules:
298 //    Let M be the number of channels requested, and N the number of channels present,
299 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
300 //    and stereo R be the sum of all R and center channels (channel assignment from the
301 //    vorbis spec).
302 //        M    N       output
303 //        1    k      sum(Ck) for all k
304 //        2    *      stereo L, stereo R
305 //        k    l      k > l, the first l channels, then 0s
306 //        k    l      k <= l, the first k channels
307 //    Note that this is not _good_ surround etc. mixing at all! It's just so
308 //    you get something useful.
309 
310 // gets num_samples samples, not necessarily on a frame boundary--this requires
311 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
312 // Returns the number of samples stored per channel; it may be less than requested
313 // at the end of the file. If there are no more samples in the file, returns 0.
314 int stb_vorbis_get_samples_float_interleaved (VorbisDecoder f, int channels, float* buffer, int num_floats);
315 int stb_vorbis_get_samples_float (VorbisDecoder f, int channels, float** buffer, int num_samples);
316 
317 // gets num_samples samples, not necessarily on a frame boundary--this requires
318 // buffering so you have to supply the buffers. Applies the coercion rules above
319 // to produce 'channels' channels. Returns the number of samples stored per channel;
320 // it may be less than requested at the end of the file. If there are no more
321 // samples in the file, returns 0.
322 int stb_vorbis_get_samples_short_interleaved (VorbisDecoder f, int channels, short* buffer, int num_shorts);
323 int stb_vorbis_get_samples_short (VorbisDecoder f, int channels, short** buffer, int num_samples);
324 */
325 
326 ////////   ERROR CODES
327 
328 public enum STBVorbisError {
329   no_error,
330 
331   need_more_data = 1,    // not a real error
332 
333   invalid_api_mixing,    // can't mix API modes
334   outofmem,              // not enough memory
335   feature_not_supported, // uses floor 0
336   too_many_channels,     // STB_VORBIS_MAX_CHANNELS is too small
337   file_open_failure,     // fopen() failed
338   seek_without_length,   // can't seek in unknown-length file
339 
340   unexpected_eof = 10,   // file is truncated?
341   seek_invalid,          // seek past EOF
342 
343   // decoding errors (corrupt/invalid stream) -- you probably
344   // don't care about the exact details of these
345 
346   // vorbis errors:
347   invalid_setup = 20,
348   invalid_stream,
349 
350   // ogg errors:
351   missing_capture_pattern = 30,
352   invalid_stream_structure_version,
353   continued_packet_flag_invalid,
354   incorrect_stream_serial_number,
355   invalid_first_page,
356   bad_packet_type,
357   cant_find_last_page,
358   seek_failed,
359 }
360 //
361 //  HEADER ENDS HERE
362 //
363 //////////////////////////////////////////////////////////////////////////////
364 
365 
366 // global configuration settings (e.g. set these in the project/makefile),
367 // or just set them in this file at the top (although ideally the first few
368 // should be visible when the header file is compiled too, although it's not
369 // crucial)
370 
371 // STB_VORBIS_NO_INTEGER_CONVERSION
372 //     does not compile the code for converting audio sample data from
373 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
374 //version = STB_VORBIS_NO_INTEGER_CONVERSION;
375 
376 // STB_VORBIS_NO_FAST_SCALED_FLOAT
377 //      does not use a fast float-to-int trick to accelerate float-to-int on
378 //      most platforms which requires endianness be defined correctly.
379 //version = STB_VORBIS_NO_FAST_SCALED_FLOAT;
380 
381 // STB_VORBIS_MAX_CHANNELS [number]
382 //     globally define this to the maximum number of channels you need.
383 //     The spec does not put a restriction on channels except that
384 //     the count is stored in a byte, so 255 is the hard limit.
385 //     Reducing this saves about 16 bytes per value, so using 16 saves
386 //     (255-16)*16 or around 4KB. Plus anything other memory usage
387 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
388 //     6 (5.1 audio), or 2 (stereo only).
389 enum STB_VORBIS_MAX_CHANNELS = 16; // enough for anyone?
390 
391 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
392 //     after a flush_pushdata(), stb_vorbis begins scanning for the
393 //     next valid page, without backtracking. when it finds something
394 //     that looks like a page, it streams through it and verifies its
395 //     CRC32. Should that validation fail, it keeps scanning. But it's
396 //     possible that _while_ streaming through to check the CRC32 of
397 //     one candidate page, it sees another candidate page. This #define
398 //     determines how many "overlapping" candidate pages it can search
399 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
400 //     garbage pages could be as big as 64KB, but probably average ~16KB.
401 //     So don't hose ourselves by scanning an apparent 64KB page and
402 //     missing a ton of real ones in the interim; so minimum of 2
403 enum STB_VORBIS_PUSHDATA_CRC_COUNT = 4;
404 
405 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
406 //     sets the log size of the huffman-acceleration table.  Maximum
407 //     supported value is 24. with larger numbers, more decodings are O(1),
408 //     but the table size is larger so worse cache missing, so you'll have
409 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
410 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
411 
412 // STB_VORBIS_FAST_BINARY_LENGTH [number]
413 //     sets the log size of the binary-search acceleration table. this
414 //     is used in similar fashion to the fast-huffman size to set initial
415 //     parameters for the binary search
416 
417 // STB_VORBIS_FAST_HUFFMAN_INT
418 //     The fast huffman tables are much more efficient if they can be
419 //     stored as 16-bit results instead of 32-bit results. This restricts
420 //     the codebooks to having only 65535 possible outcomes, though.
421 //     (At least, accelerated by the huffman table.)
422 //version = STB_VORBIS_FAST_HUFFMAN_INT;
423 version(STB_VORBIS_FAST_HUFFMAN_INT) {} else version = STB_VORBIS_FAST_HUFFMAN_SHORT;
424 
425 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
426 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
427 //     back on binary searching for the correct one. This requires storing
428 //     extra tables with the huffman codes in sorted order. Defining this
429 //     symbol trades off space for speed by forcing a linear search in the
430 //     non-fast case, except for "sparse" codebooks.
431 //version = STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH;
432 
433 // STB_VORBIS_DIVIDES_IN_RESIDUE
434 //     stb_vorbis precomputes the result of the scalar residue decoding
435 //     that would otherwise require a divide per chunk. you can trade off
436 //     space for time by defining this symbol.
437 //version = STB_VORBIS_DIVIDES_IN_RESIDUE;
438 
439 // STB_VORBIS_DIVIDES_IN_CODEBOOK
440 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
441 //     stored, or with all elements being chosen from a small range of values,
442 //     and all values possible in all elements. By default, stb_vorbis expands
443 //     this latter kind out to look like the former kind for ease of decoding,
444 //     because otherwise an integer divide-per-vector-element is required to
445 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
446 //     trade off storage for speed.
447 //version = STB_VORBIS_DIVIDES_IN_CODEBOOK;
448 
449 version(STB_VORBIS_CODEBOOK_SHORTS) static assert(0, "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats");
450 
451 // STB_VORBIS_DIVIDE_TABLE
452 //     this replaces small integer divides in the floor decode loop with
453 //     table lookups. made less than 1% difference, so disabled by default.
454 //version = STB_VORBIS_DIVIDE_TABLE;
455 
456 // STB_VORBIS_NO_DEFER_FLOOR
457 //     Normally we only decode the floor without synthesizing the actual
458 //     full curve. We can instead synthesize the curve immediately. This
459 //     requires more memory and is very likely slower, so I don't think
460 //     you'd ever want to do it except for debugging.
461 //version = STB_VORBIS_NO_DEFER_FLOOR;
462 //version(STB_VORBIS_CODEBOOK_FLOATS) static assert(0);
463 
464 
465 // ////////////////////////////////////////////////////////////////////////// //
466 private:
467 static assert(STB_VORBIS_MAX_CHANNELS <= 256, "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range");
468 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24, "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range");
469 
470 enum MAX_BLOCKSIZE_LOG = 13; // from specification
471 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
472 
473 
474 alias codetype = float;
475 
476 // @NOTE
477 //
478 // Some arrays below are tagged "//varies", which means it's actually
479 // a variable-sized piece of data, but rather than malloc I assume it's
480 // small enough it's better to just allocate it all together with the
481 // main thing
482 //
483 // Most of the variables are specified with the smallest size I could pack
484 // them into. It might give better performance to make them all full-sized
485 // integers. It should be safe to freely rearrange the structures or change
486 // the sizes larger--nothing relies on silently truncating etc., nor the
487 // order of variables.
488 
489 enum FAST_HUFFMAN_TABLE_SIZE = (1<<STB_VORBIS_FAST_HUFFMAN_LENGTH);
490 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE-1);
491 
492 struct Codebook {
493   int dimensions, entries;
494   ubyte* codeword_lengths;
495   float minimum_value;
496   float delta_value;
497   ubyte value_bits;
498   ubyte lookup_type;
499   ubyte sequence_p;
500   ubyte sparse;
501   uint lookup_values;
502   codetype* multiplicands;
503   uint *codewords;
504   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
505     short[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
506   } else {
507     int[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
508   }
509   uint* sorted_codewords;
510   int* sorted_values;
511   int sorted_entries;
512 }
513 
514 struct Floor0 {
515   ubyte order;
516   ushort rate;
517   ushort bark_map_size;
518   ubyte amplitude_bits;
519   ubyte amplitude_offset;
520   ubyte number_of_books;
521   ubyte[16] book_list; // varies
522 }
523 
524 struct Floor1 {
525   ubyte partitions;
526   ubyte[32] partition_class_list; // varies
527   ubyte[16] class_dimensions; // varies
528   ubyte[16] class_subclasses; // varies
529   ubyte[16] class_masterbooks; // varies
530   short[8][16] subclass_books; // varies
531   ushort[31*8+2] Xlist; // varies
532   ubyte[31*8+2] sorted_order;
533   ubyte[2][31*8+2] neighbors;
534   ubyte floor1_multiplier;
535   ubyte rangebits;
536   int values;
537 }
538 
539 union Floor {
540   Floor0 floor0;
541   Floor1 floor1;
542 }
543 
544 struct Residue {
545   uint begin, end;
546   uint part_size;
547   ubyte classifications;
548   ubyte classbook;
549   ubyte** classdata;
550   //int16 (*residue_books)[8];
551   short[8]* residue_books;
552 }
553 
554 struct MappingChannel {
555   ubyte magnitude;
556   ubyte angle;
557   ubyte mux;
558 }
559 
560 struct Mapping {
561   ushort coupling_steps;
562   MappingChannel* chan;
563   ubyte submaps;
564   ubyte[15] submap_floor; // varies
565   ubyte[15] submap_residue; // varies
566 }
567 
568 struct Mode {
569   ubyte blockflag;
570   ubyte mapping;
571   ushort windowtype;
572   ushort transformtype;
573 }
574 
575 struct CRCscan {
576   uint goal_crc;   // expected crc if match
577   int bytes_left;  // bytes left in packet
578   uint crc_so_far; // running crc
579   int bytes_done;  // bytes processed in _current_ chunk
580   uint sample_loc; // granule pos encoded in page
581 }
582 
583 struct ProbedPage {
584   uint page_start, page_end;
585   uint last_decoded_sample;
586 }
587 
588 private int error (VorbisDecoder f, STBVorbisError e) {
589   f.error = e;
590   if (!f.eof && e != STBVorbisError.need_more_data) {
591     // import std.stdio; debug writeln(e);
592     f.error = e; // breakpoint for debugging
593   }
594   return 0;
595 }
596 
597 // these functions are used for allocating temporary memory
598 // while decoding. if you can afford the stack space, use
599 // alloca(); otherwise, provide a temp buffer and it will
600 // allocate out of those.
601 uint temp_alloc_save (VorbisDecoder f) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); return f.alloc.tempSave(f); }
602 void temp_alloc_restore (VorbisDecoder f, uint p) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); f.alloc.tempRestore(p, f); }
603 void temp_free (VorbisDecoder f, void* p) nothrow @nogc {}
604 /*
605 T* temp_alloc(T) (VorbisDecoder f, uint count) nothrow @nogc {
606   auto res = f.alloc.alloc(count*T.sizeof, f);
607   return cast(T*)res;
608 }
609 */
610 
611 /+
612 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
613 
614 // has to be a mixin, due to `alloca`
615 template temp_alloc(string size) {
616   enum temp_alloc = q{(f.alloc.alloc_buffer ? setup_temp_malloc(f, (${size})) : alloca(${size}))}.cmacroFixVars!("size")(size);
617 }
618 
619 // has to be a mixin, due to `alloca`
620 template temp_block_array(string count, string size) {
621   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
622     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
623 }
624 +/
625 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
626 
627 template temp_alloc(string size) {
628   enum temp_alloc = q{alloca(${size})}.cmacroFixVars!("size")(size);
629 }
630 
631 template temp_block_array(string count, string size) {
632   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
633     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
634 }
635 
636 /*
637 T** temp_block_array(T) (VorbisDecoder f, uint count, uint size) {
638   size *= T.sizeof;
639   auto mem = f.alloc.alloc(count*(void*).sizeof+size, f);
640   if (mem !is null) make_block_array(mem, count, size);
641   return cast(T**)mem;
642 }
643 */
644 
645 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
646 private void* make_block_array (void* mem, int count, int size) {
647   void** p = cast(void**)mem;
648   char* q = cast(char*)(p+count);
649   foreach (immutable i; 0..count) {
650     p[i] = q;
651     q += size;
652   }
653   return p;
654 }
655 
656 private T* setup_malloc(T) (VorbisDecoder f, uint sz) {
657   sz *= T.sizeof;
658   /*
659   f.setup_memory_required += sz;
660   if (f.alloc.alloc_buffer) {
661     void* p = cast(char*)f.alloc.alloc_buffer+f.setup_offset;
662     if (f.setup_offset+sz > f.temp_offset) return null;
663     f.setup_offset += sz;
664     return cast(T*)p;
665   }
666   */
667   auto res = f.alloc.alloc(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
668   if (res !is null) {
669     import core.stdc.string : memset;
670     memset(res, 0, sz+8);
671   }
672   return cast(T*)res;
673 }
674 
675 private void setup_free (VorbisDecoder f, void* p) {
676   //if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
677   if (p !is null) f.alloc.free(p, f);
678 }
679 
680 private void* setup_temp_malloc (VorbisDecoder f, uint sz) {
681   auto res = f.alloc.allocTemp(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
682   if (res !is null) {
683     import core.stdc.string : memset;
684     memset(res, 0, sz+8);
685   }
686   return res;
687 }
688 
689 private void setup_temp_free (VorbisDecoder f, void* p, uint sz) {
690   if (p !is null) f.alloc.freeTemp(p, (sz ? sz : 1)+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
691 }
692 
693 immutable uint[256] crc_table;
694 shared static this () {
695   enum CRC32_POLY = 0x04c11db7; // from spec
696   // init crc32 table
697   foreach (uint i; 0..256) {
698     uint s = i<<24;
699     foreach (immutable _; 0..8) s = (s<<1)^(s >= (1U<<31) ? CRC32_POLY : 0);
700     crc_table[i] = s;
701   }
702 }
703 
704 uint crc32_update (uint crc, ubyte b) {
705   static if (__VERSION__ > 2067) pragma(inline, true);
706   return (crc<<8)^crc_table[b^(crc>>24)];
707 }
708 
709 // used in setup, and for huffman that doesn't go fast path
710 private uint bit_reverse (uint n) {
711   static if (__VERSION__ > 2067) pragma(inline, true);
712   n = ((n&0xAAAAAAAA)>>1)|((n&0x55555555)<<1);
713   n = ((n&0xCCCCCCCC)>>2)|((n&0x33333333)<<2);
714   n = ((n&0xF0F0F0F0)>>4)|((n&0x0F0F0F0F)<<4);
715   n = ((n&0xFF00FF00)>>8)|((n&0x00FF00FF)<<8);
716   return (n>>16)|(n<<16);
717 }
718 
719 private float square (float x) {
720   static if (__VERSION__ > 2067) pragma(inline, true);
721   return x*x;
722 }
723 
724 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
725 // as required by the specification. fast(?) implementation from stb.h
726 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
727 immutable byte[16] log2_4 = [0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4];
728 private int ilog (int n) {
729   //static if (__VERSION__ > 2067) pragma(inline, true);
730   if (n < 0) return 0; // signed n returns 0
731   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
732   if (n < (1<<14)) {
733     if (n < (1<<4)) return 0+log2_4[n];
734     if (n < (1<<9)) return 5+log2_4[n>>5];
735     return 10+log2_4[n>>10];
736   } else if (n < (1<<24)) {
737     if (n < (1<<19)) return 15+log2_4[n>>15];
738     return 20+log2_4[n>>20];
739   } else {
740     if (n < (1<<29)) return 25+log2_4[n>>25];
741     return 30+log2_4[n>>30];
742   }
743 }
744 
745 
746 // code length assigned to a value with no huffman encoding
747 enum NO_CODE = 255;
748 
749 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
750 //
751 // these functions are only called at setup, and only a few times per file
752 private float float32_unpack (uint x) {
753   import core.math : ldexp;
754   //static if (__VERSION__ > 2067) pragma(inline, true);
755   // from the specification
756   uint mantissa = x&0x1fffff;
757   uint sign = x&0x80000000;
758   uint exp = (x&0x7fe00000)>>21;
759   double res = (sign ? -cast(double)mantissa : cast(double)mantissa);
760   return cast(float)ldexp(cast(float)res, cast(int)exp-788);
761 }
762 
763 // zlib & jpeg huffman tables assume that the output symbols
764 // can either be arbitrarily arranged, or have monotonically
765 // increasing frequencies--they rely on the lengths being sorted;
766 // this makes for a very simple generation algorithm.
767 // vorbis allows a huffman table with non-sorted lengths. This
768 // requires a more sophisticated construction, since symbols in
769 // order do not map to huffman codes "in order".
770 private void add_entry (Codebook* c, uint huff_code, int symbol, int count, ubyte len, uint* values) {
771   if (!c.sparse) {
772     c.codewords[symbol] = huff_code;
773   } else {
774     c.codewords[count] = huff_code;
775     c.codeword_lengths[count] = len;
776     values[count] = symbol;
777   }
778 }
779 
780 private int compute_codewords (Codebook* c, ubyte* len, int n, uint* values) {
781   import core.stdc.string : memset;
782 
783   int i, k, m = 0;
784   uint[32] available;
785 
786   memset(available.ptr, 0, available.sizeof);
787   // find the first entry
788   for (k = 0; k < n; ++k) if (len[k] < NO_CODE) break;
789   if (k == n) { assert(c.sorted_entries == 0); return true; }
790   // add to the list
791   add_entry(c, 0, k, m++, len[k], values);
792   // add all available leaves
793   for (i = 1; i <= len[k]; ++i) available[i] = 1U<<(32-i);
794   // note that the above code treats the first case specially,
795   // but it's really the same as the following code, so they
796   // could probably be combined (except the initial code is 0,
797   // and I use 0 in available[] to mean 'empty')
798   for (i = k+1; i < n; ++i) {
799     uint res;
800     int z = len[i];
801     if (z == NO_CODE) continue;
802     // find lowest available leaf (should always be earliest,
803     // which is what the specification calls for)
804     // note that this property, and the fact we can never have
805     // more than one free leaf at a given level, isn't totally
806     // trivial to prove, but it seems true and the assert never
807     // fires, so!
808     while (z > 0 && !available[z]) --z;
809     if (z == 0) return false;
810     res = available[z];
811     assert(z >= 0 && z < 32);
812     available[z] = 0;
813     ubyte xxx = len[i];
814     add_entry(c,
815       bit_reverse(res),
816       i,
817       m++,
818       xxx, // dmd bug: it reads 4 bytes without temp
819       values);
820     // propogate availability up the tree
821     if (z != len[i]) {
822       assert(len[i] >= 0 && len[i] < 32);
823       for (int y = len[i]; y > z; --y) {
824         assert(available[y] == 0);
825         available[y] = res+(1<<(32-y));
826       }
827     }
828   }
829   return true;
830 }
831 
832 // accelerated huffman table allows fast O(1) match of all symbols
833 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
834 private void compute_accelerated_huffman (Codebook* c) {
835   //for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) c.fast_huffman.ptr[i] = -1;
836   c.fast_huffman.ptr[0..FAST_HUFFMAN_TABLE_SIZE] = -1;
837   auto len = (c.sparse ? c.sorted_entries : c.entries);
838   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
839     if (len > 32767) len = 32767; // largest possible value we can encode!
840   }
841   foreach (uint i; 0..len) {
842     if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
843       uint z = (c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]);
844       // set table entries for all bit combinations in the higher bits
845       while (z < FAST_HUFFMAN_TABLE_SIZE) {
846         c.fast_huffman.ptr[z] = cast(typeof(c.fast_huffman[0]))i; //k8
847         z += 1<<c.codeword_lengths[i];
848       }
849     }
850   }
851 }
852 
853 extern(C) int uint32_compare (const scope void* p, const scope void* q) {
854   uint x = *cast(uint*)p;
855   uint y = *cast(uint*)q;
856   return (x < y ? -1 : x > y);
857 }
858 
859 private int include_in_sort (Codebook* c, uint len) {
860   if (c.sparse) { assert(len != NO_CODE); return true; }
861   if (len == NO_CODE) return false;
862   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return true;
863   return false;
864 }
865 
866 // if the fast table above doesn't work, we want to binary
867 // search them... need to reverse the bits
868 private void compute_sorted_huffman (Codebook* c, ubyte* lengths, uint* values) {
869   // build a list of all the entries
870   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
871   // this is kind of a frivolous optimization--I don't see any performance improvement,
872   // but it's like 4 extra lines of code, so.
873   if (!c.sparse) {
874     int k = 0;
875     foreach (uint i; 0..c.entries) if (include_in_sort(c, lengths[i])) c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
876     assert(k == c.sorted_entries);
877   } else {
878     foreach (uint i; 0..c.sorted_entries) c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
879   }
880 
881   qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
882   c.sorted_codewords[c.sorted_entries] = 0xffffffff;
883 
884   auto len = (c.sparse ? c.sorted_entries : c.entries);
885   // now we need to indicate how they correspond; we could either
886   //   #1: sort a different data structure that says who they correspond to
887   //   #2: for each sorted entry, search the original list to find who corresponds
888   //   #3: for each original entry, find the sorted entry
889   // #1 requires extra storage, #2 is slow, #3 can use binary search!
890   foreach (uint i; 0..len) {
891     auto huff_len = (c.sparse ? lengths[values[i]] : lengths[i]);
892     if (include_in_sort(c, huff_len)) {
893       uint code = bit_reverse(c.codewords[i]);
894       int x = 0, n = c.sorted_entries;
895       while (n > 1) {
896         // invariant: sc[x] <= code < sc[x+n]
897         int m = x+(n>>1);
898         if (c.sorted_codewords[m] <= code) {
899           x = m;
900           n -= (n>>1);
901         } else {
902           n >>= 1;
903         }
904       }
905       assert(c.sorted_codewords[x] == code);
906       if (c.sparse) {
907         c.sorted_values[x] = values[i];
908         c.codeword_lengths[x] = huff_len;
909       } else {
910         c.sorted_values[x] = i;
911       }
912     }
913   }
914 }
915 
916 // only run while parsing the header (3 times)
917 private int vorbis_validate (const(void)* data) {
918   static if (__VERSION__ > 2067) pragma(inline, true);
919   immutable char[6] vorbis = "vorbis";
920   return ((cast(char*)data)[0..6] == vorbis[]);
921 }
922 
923 // called from setup only, once per code book
924 // (formula implied by specification)
925 private int lookup1_values (int entries, int dim) {
926   import core.stdc.math : lrintf;
927   import std.math : floor, exp, pow, log;
928   int r = cast(int)lrintf(floor(exp(cast(float)log(cast(float)entries)/dim)));
929   if (lrintf(floor(pow(cast(float)r+1, dim))) <= entries) ++r; // (int) cast for MinGW warning; floor() to avoid _ftol() when non-CRT
930   assert(pow(cast(float)r+1, dim) > entries);
931   assert(lrintf(floor(pow(cast(float)r, dim))) <= entries); // (int), floor() as above
932   return r;
933 }
934 
935 // called twice per file
936 private void compute_twiddle_factors (int n, float* A, float* B, float* C) {
937   import std.math : cos, sin, PI;
938   int n4 = n>>2, n8 = n>>3;
939   int k, k2;
940   for (k = k2 = 0; k < n4; ++k, k2 += 2) {
941     A[k2  ] = cast(float) cos(4*k*PI/n);
942     A[k2+1] = cast(float)-sin(4*k*PI/n);
943     B[k2  ] = cast(float) cos((k2+1)*PI/n/2)*0.5f;
944     B[k2+1] = cast(float) sin((k2+1)*PI/n/2)*0.5f;
945   }
946   for (k = k2 = 0; k < n8; ++k, k2 += 2) {
947     C[k2  ] = cast(float) cos(2*(k2+1)*PI/n);
948     C[k2+1] = cast(float)-sin(2*(k2+1)*PI/n);
949   }
950 }
951 
952 private void compute_window (int n, float* window) {
953   import std.math : sin, PI;
954   int n2 = n>>1;
955   foreach (int i; 0..n2) *window++ = cast(float)sin(0.5*PI*square(cast(float)sin((i-0+0.5)/n2*0.5*PI)));
956 }
957 
958 private void compute_bitreverse (int n, ushort* rev) {
959   int ld = ilog(n)-1; // ilog is off-by-one from normal definitions
960   int n8 = n>>3;
961   foreach (int i; 0..n8) *rev++ = cast(ushort)((bit_reverse(i)>>(32-ld+3))<<2); //k8
962 }
963 
964 private int init_blocksize (VorbisDecoder f, int b, int n) {
965   int n2 = n>>1, n4 = n>>2, n8 = n>>3;
966   f.A[b] = setup_malloc!float(f, n2);
967   f.B[b] = setup_malloc!float(f, n2);
968   f.C[b] = setup_malloc!float(f, n4);
969   if (f.A[b] is null || f.B[b] is null || f.C[b] is null) return error(f, STBVorbisError.outofmem);
970   compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
971   f.window[b] = setup_malloc!float(f, n2);
972   if (f.window[b] is null) return error(f, STBVorbisError.outofmem);
973   compute_window(n, f.window[b]);
974   f.bit_reverse[b] = setup_malloc!ushort(f, n8);
975   if (f.bit_reverse[b] is null) return error(f, STBVorbisError.outofmem);
976   compute_bitreverse(n, f.bit_reverse[b]);
977   return true;
978 }
979 
980 private void neighbors (ushort* x, int n, ushort* plow, ushort* phigh) {
981   int low = -1;
982   int high = 65536;
983   assert(n >= 0 && n <= ushort.max);
984   foreach (ushort i; 0..cast(ushort)n) {
985     if (x[i] > low  && x[i] < x[n]) { *plow = i; low = x[i]; }
986     if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
987   }
988 }
989 
990 // this has been repurposed so y is now the original index instead of y
991 struct Point {
992   ushort x, y;
993 }
994 
995 extern(C) int point_compare (const scope void *p, const scope void *q) {
996   auto a = cast(const(Point)*)p;
997   auto b = cast(const(Point)*)q;
998   return (a.x < b.x ? -1 : a.x > b.x);
999 }
1000 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1001 
1002 // ///////////////////////////////////////////////////////////////////// //
1003 private ubyte get8 (VorbisDecoder f) {
1004   ubyte b = void;
1005   if (!f.eof) {
1006     if (f.rawRead((&b)[0..1]) != 1) { f.eof = true; b = 0; }
1007   }
1008   return b;
1009 }
1010 
1011 private uint get32 (VorbisDecoder f) {
1012   uint x = 0;
1013   if (!f.eof) {
1014     version(LittleEndian) {
1015       if (f.rawRead((&x)[0..1]) != x.sizeof) { f.eof = true; x = 0; }
1016     } else {
1017       x = get8(f);
1018       x |= cast(uint)get8(f)<<8;
1019       x |= cast(uint)get8(f)<<16;
1020       x |= cast(uint)get8(f)<<24;
1021     }
1022   }
1023   return x;
1024 }
1025 
1026 private bool getn (VorbisDecoder f, void* data, int n) {
1027   if (f.eof || n < 0) return false;
1028   if (n == 0) return true;
1029   if (f.rawRead(data[0..n]) != n) { f.eof = true; return false; }
1030   return true;
1031 }
1032 
1033 private void skip (VorbisDecoder f, int n) {
1034   if (f.eof || n == 0) return;
1035   f.rawSkip(n);
1036 }
1037 
1038 private void set_file_offset (VorbisDecoder f, uint loc) {
1039   /+if (f.push_mode) return;+/
1040   f.eof = false;
1041   if (loc >= 0x80000000) { f.eof = true; return; }
1042   f.rawSeek(loc);
1043 }
1044 
1045 
1046 immutable char[4] ogg_page_header = "OggS"; //[ 0x4f, 0x67, 0x67, 0x53 ];
1047 
1048 private bool capture_pattern (VorbisDecoder f) {
1049   static if (__VERSION__ > 2067) pragma(inline, true);
1050   char[4] sign = void;
1051   if (!getn(f, sign.ptr, 4)) return false;
1052   return (sign == "OggS");
1053 }
1054 
1055 enum PAGEFLAG_continued_packet = 1;
1056 enum PAGEFLAG_first_page = 2;
1057 enum PAGEFLAG_last_page = 4;
1058 
1059 private int start_page_no_capturepattern (VorbisDecoder f) {
1060   uint loc0, loc1, n;
1061   // stream structure version
1062   if (get8(f) != 0) return error(f, STBVorbisError.invalid_stream_structure_version);
1063   // header flag
1064   f.page_flag = get8(f);
1065   // absolute granule position
1066   loc0 = get32(f);
1067   loc1 = get32(f);
1068   // @TODO: validate loc0, loc1 as valid positions?
1069   // stream serial number -- vorbis doesn't interleave, so discard
1070   get32(f);
1071   //if (f.serial != get32(f)) return error(f, STBVorbisError.incorrect_stream_serial_number);
1072   // page sequence number
1073   n = get32(f);
1074   f.last_page = n;
1075   // CRC32
1076   get32(f);
1077   // page_segments
1078   f.segment_count = get8(f);
1079   if (!getn(f, f.segments.ptr, f.segment_count)) return error(f, STBVorbisError.unexpected_eof);
1080   // assume we _don't_ know any the sample position of any segments
1081   f.end_seg_with_known_loc = -2;
1082   if (loc0 != ~0U || loc1 != ~0U) {
1083     int i;
1084     // determine which packet is the last one that will complete
1085     for (i = f.segment_count-1; i >= 0; --i) if (f.segments.ptr[i] < 255) break;
1086     // 'i' is now the index of the _last_ segment of a packet that ends
1087     if (i >= 0) {
1088       f.end_seg_with_known_loc = i;
1089       f.known_loc_for_packet = loc0;
1090     }
1091   }
1092   if (f.first_decode) {
1093     int len;
1094     ProbedPage p;
1095     len = 0;
1096     foreach (int i; 0..f.segment_count) len += f.segments.ptr[i];
1097     len += 27+f.segment_count;
1098     p.page_start = f.first_audio_page_offset;
1099     p.page_end = p.page_start+len;
1100     p.last_decoded_sample = loc0;
1101     f.p_first = p;
1102   }
1103   f.next_seg = 0;
1104   return true;
1105 }
1106 
1107 private int start_page (VorbisDecoder f) {
1108   if (!capture_pattern(f)) return error(f, STBVorbisError.missing_capture_pattern);
1109   return start_page_no_capturepattern(f);
1110 }
1111 
1112 private int start_packet (VorbisDecoder f) {
1113   while (f.next_seg == -1) {
1114     if (!start_page(f)) return false;
1115     if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.continued_packet_flag_invalid);
1116   }
1117   f.last_seg = false;
1118   f.valid_bits = 0;
1119   f.packet_bytes = 0;
1120   f.bytes_in_seg = 0;
1121   // f.next_seg is now valid
1122   return true;
1123 }
1124 
1125 private int maybe_start_packet (VorbisDecoder f) {
1126   if (f.next_seg == -1) {
1127     auto x = get8(f);
1128     if (f.eof) return false; // EOF at page boundary is not an error!
1129     // import std.stdio; debug writefln("CAPTURE %x %x", x, f.stpos);
1130     if (0x4f != x      ) return error(f, STBVorbisError.missing_capture_pattern);
1131     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1132     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1133     if (0x53 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1134     if (!start_page_no_capturepattern(f)) return false;
1135     if (f.page_flag&PAGEFLAG_continued_packet) {
1136       // set up enough state that we can read this packet if we want,
1137       // e.g. during recovery
1138       f.last_seg = false;
1139       f.bytes_in_seg = 0;
1140       return error(f, STBVorbisError.continued_packet_flag_invalid);
1141     }
1142   }
1143   return start_packet(f);
1144 }
1145 
1146 private int next_segment (VorbisDecoder f) {
1147   if (f.last_seg) return 0;
1148   if (f.next_seg == -1) {
1149     f.last_seg_which = f.segment_count-1; // in case start_page fails
1150     if (!start_page(f)) { f.last_seg = 1; return 0; }
1151     if (!(f.page_flag&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.continued_packet_flag_invalid);
1152   }
1153   auto len = f.segments.ptr[f.next_seg++];
1154   if (len < 255) {
1155     f.last_seg = true;
1156     f.last_seg_which = f.next_seg-1;
1157   }
1158   if (f.next_seg >= f.segment_count) f.next_seg = -1;
1159   debug(stb_vorbis) assert(f.bytes_in_seg == 0);
1160   f.bytes_in_seg = len;
1161   return len;
1162 }
1163 
1164 enum EOP = (-1);
1165 enum INVALID_BITS = (-1);
1166 
1167 private int get8_packet_raw (VorbisDecoder f) {
1168   if (!f.bytes_in_seg) {  // CLANG!
1169     if (f.last_seg) return EOP;
1170     else if (!next_segment(f)) return EOP;
1171   }
1172   debug(stb_vorbis) assert(f.bytes_in_seg > 0);
1173   --f.bytes_in_seg;
1174   ++f.packet_bytes;
1175   return get8(f);
1176 }
1177 
1178 private int get8_packet (VorbisDecoder f) {
1179   int x = get8_packet_raw(f);
1180   f.valid_bits = 0;
1181   return x;
1182 }
1183 
1184 private uint get32_packet (VorbisDecoder f) {
1185   uint x = get8_packet(f), b;
1186   if (x == EOP) return EOP;
1187   if ((b = get8_packet(f)) == EOP) return EOP;
1188   x += b<<8;
1189   if ((b = get8_packet(f)) == EOP) return EOP;
1190   x += b<<16;
1191   if ((b = get8_packet(f)) == EOP) return EOP;
1192   x += b<<24;
1193   return x;
1194 }
1195 
1196 private void flush_packet (VorbisDecoder f) {
1197   while (get8_packet_raw(f) != EOP) {}
1198 }
1199 
1200 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1201 // as the huffman decoder?
1202 private uint get_bits_main (VorbisDecoder f, int n) {
1203   uint z;
1204   if (f.valid_bits < 0) return 0;
1205   if (f.valid_bits < n) {
1206     if (n > 24) {
1207       // the accumulator technique below would not work correctly in this case
1208       z = get_bits_main(f, 24);
1209       z += get_bits_main(f, n-24)<<24;
1210       return z;
1211     }
1212     if (f.valid_bits == 0) f.acc = 0;
1213     while (f.valid_bits < n) {
1214       z = get8_packet_raw(f);
1215       if (z == EOP) {
1216         f.valid_bits = INVALID_BITS;
1217         return 0;
1218       }
1219       f.acc += z<<f.valid_bits;
1220       f.valid_bits += 8;
1221     }
1222   }
1223   if (f.valid_bits < 0) return 0;
1224   z = f.acc&((1<<n)-1);
1225   f.acc >>= n;
1226   f.valid_bits -= n;
1227   return z;
1228 }
1229 
1230 // chooses minimal possible integer type
1231 private auto get_bits(ubyte n) (VorbisDecoder f) if (n >= 1 && n <= 64) {
1232   static if (n <= 8) return cast(ubyte)get_bits_main(f, n);
1233   else static if (n <= 16) return cast(ushort)get_bits_main(f, n);
1234   else static if (n <= 32) return cast(uint)get_bits_main(f, n);
1235   else static if (n <= 64) return cast(ulong)get_bits_main(f, n);
1236   else static assert(0, "wtf?!");
1237 }
1238 
1239 // chooses minimal possible integer type, assume no overflow
1240 private auto get_bits_add_no(ubyte n) (VorbisDecoder f, ubyte add) if (n >= 1 && n <= 64) {
1241   static if (n <= 8) return cast(ubyte)(get_bits_main(f, n)+add);
1242   else static if (n <= 16) return cast(ushort)(get_bits_main(f, n)+add);
1243   else static if (n <= 32) return cast(uint)(get_bits_main(f, n)+add);
1244   else static if (n <= 64) return cast(ulong)(get_bits_main(f, n)+add);
1245   else static assert(0, "wtf?!");
1246 }
1247 
1248 // @OPTIMIZE: primary accumulator for huffman
1249 // expand the buffer to as many bits as possible without reading off end of packet
1250 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1251 // e.g. cache them locally and decode locally
1252 //private /*__forceinline*/ void prep_huffman (VorbisDecoder f)
1253 enum PrepHuffmanMixin = q{
1254   if (f.valid_bits <= 24) {
1255     if (f.valid_bits == 0) f.acc = 0;
1256     int phmz = void;
1257     do {
1258       if (f.last_seg && !f.bytes_in_seg) break;
1259       phmz = get8_packet_raw(f);
1260       if (phmz == EOP) break;
1261       f.acc += cast(uint)phmz<<f.valid_bits;
1262       f.valid_bits += 8;
1263     } while (f.valid_bits <= 24);
1264   }
1265 };
1266 
1267 enum VorbisPacket {
1268   id = 1,
1269   comment = 3,
1270   setup = 5,
1271 }
1272 
1273 private int codebook_decode_scalar_raw (VorbisDecoder f, Codebook *c) {
1274   mixin(PrepHuffmanMixin);
1275 
1276   if (c.codewords is null && c.sorted_codewords is null) return -1;
1277   // cases to use binary search: sorted_codewords && !c.codewords
1278   //                             sorted_codewords && c.entries > 8
1279   auto cond = (c.entries > 8 ? c.sorted_codewords !is null : !c.codewords);
1280   if (cond) {
1281     // binary search
1282     uint code = bit_reverse(f.acc);
1283     int x = 0, n = c.sorted_entries, len;
1284     while (n > 1) {
1285       // invariant: sc[x] <= code < sc[x+n]
1286       int m = x+(n>>1);
1287       if (c.sorted_codewords[m] <= code) {
1288         x = m;
1289         n -= (n>>1);
1290       } else {
1291         n >>= 1;
1292       }
1293     }
1294     // x is now the sorted index
1295     if (!c.sparse) x = c.sorted_values[x];
1296     // x is now sorted index if sparse, or symbol otherwise
1297     len = c.codeword_lengths[x];
1298     if (f.valid_bits >= len) {
1299       f.acc >>= len;
1300       f.valid_bits -= len;
1301       return x;
1302     }
1303     f.valid_bits = 0;
1304     return -1;
1305   }
1306   // if small, linear search
1307   debug(stb_vorbis) assert(!c.sparse);
1308   foreach (uint i; 0..c.entries) {
1309     if (c.codeword_lengths[i] == NO_CODE) continue;
1310     if (c.codewords[i] == (f.acc&((1<<c.codeword_lengths[i])-1))) {
1311       if (f.valid_bits >= c.codeword_lengths[i]) {
1312         f.acc >>= c.codeword_lengths[i];
1313         f.valid_bits -= c.codeword_lengths[i];
1314         return i;
1315       }
1316       f.valid_bits = 0;
1317       return -1;
1318     }
1319   }
1320   error(f, STBVorbisError.invalid_stream);
1321   f.valid_bits = 0;
1322   return -1;
1323 }
1324 
1325 
1326 template DECODE_RAW(string var, string c) {
1327   enum DECODE_RAW = q{
1328     if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) { mixin(PrepHuffmanMixin); }
1329     // fast huffman table lookup
1330     ${i} = f.acc&FAST_HUFFMAN_TABLE_MASK;
1331     ${i} = ${c}.fast_huffman.ptr[${i}];
1332     if (${i} >= 0) {
1333       auto ${__temp_prefix__}n = ${c}.codeword_lengths[${i}];
1334       f.acc >>= ${__temp_prefix__}n;
1335       f.valid_bits -= ${__temp_prefix__}n;
1336       if (f.valid_bits < 0) { f.valid_bits = 0; ${i} = -1; }
1337     } else {
1338       ${i} = codebook_decode_scalar_raw(f, ${c});
1339     }
1340   }.cmacroFixVars!("i", "c")(var, c);
1341 }
1342 
1343 enum DECODE(string var, string c) = q{
1344   ${DECODE_RAW}
1345   if (${c}.sparse) ${var} = ${c}.sorted_values[${var}];
1346 }.cmacroFixVars!("var", "c", "DECODE_RAW")(var, c, DECODE_RAW!(var, c));
1347 
1348 
1349 version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1350   alias DECODE_VQ = DECODE;
1351 } else {
1352   alias DECODE_VQ = DECODE_RAW;
1353 }
1354 
1355 
1356 
1357 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1358 // where we avoid one addition
1359 enum CODEBOOK_ELEMENT(string c, string off) = "("~c~".multiplicands["~off~"])";
1360 enum CODEBOOK_ELEMENT_FAST(string c, string off) = "("~c~".multiplicands["~off~"])";
1361 enum CODEBOOK_ELEMENT_BASE(string c) = "(0)";
1362 
1363 
1364 private int codebook_decode_start (VorbisDecoder f, Codebook* c) {
1365   int z = -1;
1366   // type 0 is only legal in a scalar context
1367   if (c.lookup_type == 0) {
1368     error(f, STBVorbisError.invalid_stream);
1369   } else {
1370     mixin(DECODE_VQ!("z", "c"));
1371     debug(stb_vorbis) if (c.sparse) assert(z < c.sorted_entries);
1372     if (z < 0) {  // check for EOP
1373       if (!f.bytes_in_seg && f.last_seg) return z;
1374       error(f, STBVorbisError.invalid_stream);
1375     }
1376   }
1377   return z;
1378 }
1379 
1380 private int codebook_decode (VorbisDecoder f, Codebook* c, float* output, int len) {
1381   int z = codebook_decode_start(f, c);
1382   if (z < 0) return false;
1383   if (len > c.dimensions) len = c.dimensions;
1384 
1385   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1386     if (c.lookup_type == 1) {
1387       float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1388       int div = 1;
1389       foreach (immutable i; 0..len) {
1390         int off = (z/div)%c.lookup_values;
1391         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1392         output[i] += val;
1393         if (c.sequence_p) last = val+c.minimum_value;
1394         div *= c.lookup_values;
1395       }
1396       return true;
1397     }
1398   }
1399 
1400   z *= c.dimensions;
1401   if (c.sequence_p) {
1402     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1403     foreach (immutable i; 0..len) {
1404       float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1405       output[i] += val;
1406       last = val+c.minimum_value;
1407     }
1408   } else {
1409     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1410     foreach (immutable i; 0..len) output[i] += mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1411   }
1412 
1413   return true;
1414 }
1415 
1416 private int codebook_decode_step (VorbisDecoder f, Codebook* c, float* output, int len, int step) {
1417   int z = codebook_decode_start(f, c);
1418   float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1419   if (z < 0) return false;
1420   if (len > c.dimensions) len = c.dimensions;
1421 
1422   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1423     if (c.lookup_type == 1) {
1424       int div = 1;
1425       foreach (immutable i; 0..len) {
1426         int off = (z/div)%c.lookup_values;
1427         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1428         output[i*step] += val;
1429         if (c.sequence_p) last = val;
1430         div *= c.lookup_values;
1431       }
1432       return true;
1433     }
1434   }
1435 
1436   z *= c.dimensions;
1437   foreach (immutable i; 0..len) {
1438     float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1439     output[i*step] += val;
1440     if (c.sequence_p) last = val;
1441   }
1442 
1443   return true;
1444 }
1445 
1446 private int codebook_decode_deinterleave_repeat (VorbisDecoder f, Codebook* c, ref float*[STB_VORBIS_MAX_CHANNELS] outputs, int ch, int* c_inter_p, int* p_inter_p, int len, int total_decode) {
1447   int c_inter = *c_inter_p;
1448   int p_inter = *p_inter_p;
1449   int z, effective = c.dimensions;
1450 
1451   // type 0 is only legal in a scalar context
1452   if (c.lookup_type == 0) return error(f, STBVorbisError.invalid_stream);
1453 
1454   while (total_decode > 0) {
1455     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1456     mixin(DECODE_VQ!("z", "c"));
1457     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
1458       debug(stb_vorbis) assert(!c.sparse || z < c.sorted_entries);
1459     }
1460     if (z < 0) {
1461       if (!f.bytes_in_seg && f.last_seg) return false;
1462       return error(f, STBVorbisError.invalid_stream);
1463     }
1464 
1465     // if this will take us off the end of the buffers, stop short!
1466     // we check by computing the length of the virtual interleaved
1467     // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1468     // and the length we'll be using (effective)
1469     if (c_inter+p_inter*ch+effective > len*ch) effective = len*ch-(p_inter*ch-c_inter);
1470 
1471     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1472       if (c.lookup_type == 1) {
1473         int div = 1;
1474         foreach (immutable i; 0..effective) {
1475           int off = (z/div)%c.lookup_values;
1476           float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1477           if (outputs.ptr[c_inter]) outputs.ptr[c_inter].ptr[p_inter] += val;
1478           if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1479           if (c.sequence_p) last = val;
1480           div *= c.lookup_values;
1481         }
1482         goto skipit;
1483       }
1484     }
1485     z *= c.dimensions;
1486     if (c.sequence_p) {
1487       foreach (immutable i; 0..effective) {
1488         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1489         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1490         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1491         last = val;
1492       }
1493     } else {
1494       foreach (immutable i; 0..effective) {
1495         float val = mixin(CODEBOOK_ELEMENT_FAST!("c","z+i"))+last;
1496         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1497         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1498       }
1499     }
1500    skipit:
1501     total_decode -= effective;
1502   }
1503   *c_inter_p = c_inter;
1504   *p_inter_p = p_inter;
1505   return true;
1506 }
1507 
1508 //private int predict_point (int x, int x0, int x1, int y0, int y1)
1509 enum predict_point(string dest, string x, string x0, string x1, string y0, string y1) = q{{
1510   //import std.math : abs;
1511   int dy = ${y1}-${y0};
1512   int adx = ${x1}-${x0};
1513   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1514   int err = /*abs(dy)*/(dy < 0 ? -dy : dy)*(${x}-${x0});
1515   int off = err/adx;
1516   /*return*/${dest} = (dy < 0 ? ${y0}-off : ${y0}+off);
1517 }}.cmacroFixVars!("dest", "x", "x0", "x1", "y0", "y1")(dest, x, x0, x1, y0, y1);
1518 
1519 // the following table is block-copied from the specification
1520 immutable float[256] inverse_db_table = [
1521   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1522   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1523   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1524   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1525   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1526   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1527   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1528   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1529   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1530   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1531   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1532   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1533   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1534   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1535   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1536   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1537   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1538   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1539   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1540   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1541   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1542   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1543   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1544   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1545   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1546   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1547   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1548   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1549   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1550   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1551   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1552   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1553   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1554   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1555   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1556   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1557   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1558   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1559   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1560   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1561   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1562   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1563   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1564   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1565   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1566   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1567   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1568   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1569   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1570   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1571   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1572   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1573   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1574   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1575   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1576   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1577   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1578   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1579   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1580   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1581   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1582   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1583   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1584   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1585 ];
1586 
1587 
1588 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1589 // note that you must produce bit-identical output to decode correctly;
1590 // this specific sequence of operations is specified in the spec (it's
1591 // drawing integer-quantized frequency-space lines that the encoder
1592 // expects to be exactly the same)
1593 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1594 // have to divide in the setup? sigh.
1595 version(STB_VORBIS_NO_DEFER_FLOOR) {
1596   enum LINE_OP(string a, string b) = a~" = "~b~";";
1597 } else {
1598   enum LINE_OP(string a, string b) = a~" *= "~b~";";
1599 }
1600 
1601 version(STB_VORBIS_DIVIDE_TABLE) {
1602   enum DIVTAB_NUMER = 32;
1603   enum DIVTAB_DENOM = 64;
1604   byte[DIVTAB_DENOM][DIVTAB_NUMER] integer_divide_table; // 2KB
1605 }
1606 
1607 // nobranch abs trick
1608 enum ABS(string v) = q{(((${v})+((${v})>>31))^((${v})>>31))}.cmacroFixVars!"v"(v);
1609 
1610 // this is forceinline, but dmd inliner sux
1611 // but hey, i have my k00l macrosystem!
1612 //void draw_line (float* ${output}, int ${x0}, int ${y0}, int ${x1}, int ${y1}, int ${n})
1613 enum draw_line(string output, string x0, string y0, string x1, string y1, string n) = q{{
1614   int ${__temp_prefix__}dy = ${y1}-${y0};
1615   int ${__temp_prefix__}adx = ${x1}-${x0};
1616   int ${__temp_prefix__}ady = mixin(ABS!"${__temp_prefix__}dy");
1617   int ${__temp_prefix__}base;
1618   int ${__temp_prefix__}x = ${x0}, ${__temp_prefix__}y = ${y0};
1619   int ${__temp_prefix__}err = 0;
1620   int ${__temp_prefix__}sy;
1621 
1622   version(STB_VORBIS_DIVIDE_TABLE) {
1623     if (${__temp_prefix__}adx < DIVTAB_DENOM && ${__temp_prefix__}ady < DIVTAB_NUMER) {
1624       if (${__temp_prefix__}dy < 0) {
1625         ${__temp_prefix__}base = -integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1626         ${__temp_prefix__}sy = ${__temp_prefix__}base-1;
1627       } else {
1628         ${__temp_prefix__}base = integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1629         ${__temp_prefix__}sy = ${__temp_prefix__}base+1;
1630       }
1631     } else {
1632       ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1633       ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1634     }
1635   } else {
1636     ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1637     ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1638   }
1639   ${__temp_prefix__}ady -= mixin(ABS!"${__temp_prefix__}base")*${__temp_prefix__}adx;
1640   if (${x1} > ${n}) ${x1} = ${n};
1641   if (${__temp_prefix__}x < ${x1}) {
1642     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1643     for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1644       ${__temp_prefix__}err += ${__temp_prefix__}ady;
1645       if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1646         ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1647         ${__temp_prefix__}y += ${__temp_prefix__}sy;
1648       } else {
1649         ${__temp_prefix__}y += ${__temp_prefix__}base;
1650       }
1651       mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1652     }
1653   }
1654   /*
1655   mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1656   for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1657     ${__temp_prefix__}err += ${__temp_prefix__}ady;
1658     if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1659       ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1660       ${__temp_prefix__}y += ${__temp_prefix__}sy;
1661     } else {
1662       ${__temp_prefix__}y += ${__temp_prefix__}base;
1663     }
1664     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1665   }
1666   */
1667 }}.cmacroFixVars!("output", "x0", "y0", "x1", "y1", "n")(output, x0, y0, x1, y1, n);
1668 
1669 private int residue_decode (VorbisDecoder f, Codebook* book, float* target, int offset, int n, int rtype) {
1670   if (rtype == 0) {
1671     int step = n/book.dimensions;
1672     foreach (immutable k; 0..step) if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) return false;
1673   } else {
1674     for (int k = 0; k < n; ) {
1675       if (!codebook_decode(f, book, target+offset, n-k)) return false;
1676       k += book.dimensions;
1677       offset += book.dimensions;
1678     }
1679   }
1680   return true;
1681 }
1682 
1683 private void decode_residue (VorbisDecoder f, ref float*[STB_VORBIS_MAX_CHANNELS] residue_buffers, int ch, int n, int rn, ubyte* do_not_decode) {
1684   import core.stdc.stdlib : alloca;
1685   import core.stdc.string : memset;
1686 
1687   Residue* r = f.residue_config+rn;
1688   int rtype = f.residue_types.ptr[rn];
1689   int c = r.classbook;
1690   int classwords = f.codebooks[c].dimensions;
1691   int n_read = r.end-r.begin;
1692   int part_read = n_read/r.part_size;
1693   uint temp_alloc_point = temp_alloc_save(f);
1694   version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1695     int** classifications = cast(int**)mixin(temp_block_array!("f.vrchannels", "part_read*int.sizeof"));
1696   } else {
1697     ubyte*** part_classdata = cast(ubyte***)mixin(temp_block_array!("f.vrchannels", "part_read*cast(int)(ubyte*).sizeof"));
1698   }
1699 
1700   //stb_prof(2);
1701   foreach (immutable i; 0..ch) if (!do_not_decode[i]) memset(residue_buffers.ptr[i], 0, float.sizeof*n);
1702 
1703   if (rtype == 2 && ch != 1) {
1704     int j = void;
1705     for (j = 0; j < ch; ++j) if (!do_not_decode[j]) break;
1706     if (j == ch) goto done;
1707 
1708     //stb_prof(3);
1709     foreach (immutable pass; 0..8) {
1710       int pcount = 0, class_set = 0;
1711       if (ch == 2) {
1712         //stb_prof(13);
1713         while (pcount < part_read) {
1714           int z = r.begin+pcount*r.part_size;
1715           int c_inter = (z&1), p_inter = z>>1;
1716           if (pass == 0) {
1717             Codebook *cc = f.codebooks+r.classbook;
1718             int q;
1719             mixin(DECODE!("q", "cc"));
1720             if (q == EOP) goto done;
1721             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1722               for (int i = classwords-1; i >= 0; --i) {
1723                 classifications[0].ptr[i+pcount] = q%r.classifications;
1724                 q /= r.classifications;
1725               }
1726             } else {
1727               part_classdata[0][class_set] = r.classdata[q];
1728             }
1729           }
1730           //stb_prof(5);
1731           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1732             int zz = r.begin+pcount*r.part_size;
1733             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1734               int cc = classifications[0].ptr[pcount];
1735             } else {
1736               int cc = part_classdata[0][class_set][i];
1737             }
1738             int b = r.residue_books[cc].ptr[pass];
1739             if (b >= 0) {
1740               Codebook* book = f.codebooks+b;
1741               //stb_prof(20); // accounts for X time
1742               version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1743                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1744               } else {
1745                 // saves 1%
1746                 //if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r.part_size)) goto done; // according to C source
1747                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1748               }
1749               //stb_prof(7);
1750             } else {
1751               zz += r.part_size;
1752               c_inter = zz&1;
1753               p_inter = zz>>1;
1754             }
1755           }
1756           //stb_prof(8);
1757           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1758             ++class_set;
1759           }
1760         }
1761       } else if (ch == 1) {
1762         while (pcount < part_read) {
1763           int z = r.begin+pcount*r.part_size;
1764           int c_inter = 0, p_inter = z;
1765           if (pass == 0) {
1766             Codebook* cc = f.codebooks+r.classbook;
1767             int q;
1768             mixin(DECODE!("q", "cc"));
1769             if (q == EOP) goto done;
1770             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1771               for (int i = classwords-1; i >= 0; --i) {
1772                 classifications[0].ptr[i+pcount] = q%r.classifications;
1773                 q /= r.classifications;
1774               }
1775             } else {
1776               part_classdata[0][class_set] = r.classdata[q];
1777             }
1778           }
1779           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1780             int zz = r.begin+pcount*r.part_size;
1781             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1782               int cc = classifications[0].ptr[pcount];
1783             } else {
1784               int cc = part_classdata[0][class_set][i];
1785             }
1786             int b = r.residue_books[cc].ptr[pass];
1787             if (b >= 0) {
1788               Codebook* book = f.codebooks+b;
1789               //stb_prof(22);
1790               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1791               //stb_prof(3);
1792             } else {
1793               zz += r.part_size;
1794               c_inter = 0;
1795               p_inter = zz;
1796             }
1797           }
1798           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1799             ++class_set;
1800           }
1801         }
1802       } else {
1803         while (pcount < part_read) {
1804           int z = r.begin+pcount*r.part_size;
1805           int c_inter = z%ch, p_inter = z/ch;
1806           if (pass == 0) {
1807             Codebook* cc = f.codebooks+r.classbook;
1808             int q;
1809             mixin(DECODE!("q", "cc"));
1810             if (q == EOP) goto done;
1811             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1812               for (int i = classwords-1; i >= 0; --i) {
1813                 classifications[0].ptr[i+pcount] = q%r.classifications;
1814                 q /= r.classifications;
1815               }
1816             } else {
1817               part_classdata[0][class_set] = r.classdata[q];
1818             }
1819           }
1820           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1821             int zz = r.begin+pcount*r.part_size;
1822             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1823               int cc = classifications[0].ptr[pcount];
1824             } else {
1825               int cc = part_classdata[0][class_set][i];
1826             }
1827             int b = r.residue_books[cc].ptr[pass];
1828             if (b >= 0) {
1829               Codebook* book = f.codebooks+b;
1830               //stb_prof(22);
1831               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1832               //stb_prof(3);
1833             } else {
1834               zz += r.part_size;
1835               c_inter = zz%ch;
1836               p_inter = zz/ch;
1837             }
1838           }
1839           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1840             ++class_set;
1841           }
1842         }
1843       }
1844     }
1845     goto done;
1846   }
1847   //stb_prof(9);
1848 
1849   foreach (immutable pass; 0..8) {
1850     int pcount = 0, class_set=0;
1851     while (pcount < part_read) {
1852       if (pass == 0) {
1853         foreach (immutable j; 0..ch) {
1854           if (!do_not_decode[j]) {
1855             Codebook* cc = f.codebooks+r.classbook;
1856             int temp;
1857             mixin(DECODE!("temp", "cc"));
1858             if (temp == EOP) goto done;
1859             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1860               for (int i = classwords-1; i >= 0; --i) {
1861                 classifications[j].ptr[i+pcount] = temp%r.classifications;
1862                 temp /= r.classifications;
1863               }
1864             } else {
1865               part_classdata[j][class_set] = r.classdata[temp];
1866             }
1867           }
1868         }
1869       }
1870       for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1871         foreach (immutable j; 0..ch) {
1872           if (!do_not_decode[j]) {
1873             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1874               int cc = classifications[j].ptr[pcount];
1875             } else {
1876               int cc = part_classdata[j][class_set][i];
1877             }
1878             int b = r.residue_books[cc].ptr[pass];
1879             if (b >= 0) {
1880               float* target = residue_buffers.ptr[j];
1881               int offset = r.begin+pcount*r.part_size;
1882               int nn = r.part_size;
1883               Codebook* book = f.codebooks+b;
1884               if (!residue_decode(f, book, target, offset, nn, rtype)) goto done;
1885             }
1886           }
1887         }
1888       }
1889       version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1890         ++class_set;
1891       }
1892     }
1893   }
1894  done:
1895   //stb_prof(0);
1896   version(STB_VORBIS_DIVIDES_IN_RESIDUE) temp_free(f, classifications); else temp_free(f, part_classdata);
1897   temp_alloc_restore(f, temp_alloc_point);
1898 }
1899 
1900 
1901 // the following were split out into separate functions while optimizing;
1902 // they could be pushed back up but eh. __forceinline showed no change;
1903 // they're probably already being inlined.
1904 private void imdct_step3_iter0_loop (int n, float* e, int i_off, int k_off, float* A) {
1905   float* ee0 = e+i_off;
1906   float* ee2 = ee0+k_off;
1907   debug(stb_vorbis) assert((n&3) == 0);
1908   foreach (immutable _; 0..n>>2) {
1909     float k00_20, k01_21;
1910     k00_20 = ee0[ 0]-ee2[ 0];
1911     k01_21 = ee0[-1]-ee2[-1];
1912     ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0]+ee2[ 0];
1913     ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1]+ee2[-1];
1914     ee2[ 0] = k00_20*A[0]-k01_21*A[1];
1915     ee2[-1] = k01_21*A[0]+k00_20*A[1];
1916     A += 8;
1917 
1918     k00_20 = ee0[-2]-ee2[-2];
1919     k01_21 = ee0[-3]-ee2[-3];
1920     ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2]+ee2[-2];
1921     ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3]+ee2[-3];
1922     ee2[-2] = k00_20*A[0]-k01_21*A[1];
1923     ee2[-3] = k01_21*A[0]+k00_20*A[1];
1924     A += 8;
1925 
1926     k00_20 = ee0[-4]-ee2[-4];
1927     k01_21 = ee0[-5]-ee2[-5];
1928     ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4]+ee2[-4];
1929     ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5]+ee2[-5];
1930     ee2[-4] = k00_20*A[0]-k01_21*A[1];
1931     ee2[-5] = k01_21*A[0]+k00_20*A[1];
1932     A += 8;
1933 
1934     k00_20 = ee0[-6]-ee2[-6];
1935     k01_21 = ee0[-7]-ee2[-7];
1936     ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6]+ee2[-6];
1937     ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7]+ee2[-7];
1938     ee2[-6] = k00_20*A[0]-k01_21*A[1];
1939     ee2[-7] = k01_21*A[0]+k00_20*A[1];
1940     A += 8;
1941     ee0 -= 8;
1942     ee2 -= 8;
1943   }
1944 }
1945 
1946 private void imdct_step3_inner_r_loop (int lim, float* e, int d0, int k_off, float* A, int k1) {
1947   float k00_20, k01_21;
1948   float* e0 = e+d0;
1949   float* e2 = e0+k_off;
1950   foreach (immutable _; 0..lim>>2) {
1951     k00_20 = e0[-0]-e2[-0];
1952     k01_21 = e0[-1]-e2[-1];
1953     e0[-0] += e2[-0];//e0[-0] = e0[-0]+e2[-0];
1954     e0[-1] += e2[-1];//e0[-1] = e0[-1]+e2[-1];
1955     e2[-0] = (k00_20)*A[0]-(k01_21)*A[1];
1956     e2[-1] = (k01_21)*A[0]+(k00_20)*A[1];
1957 
1958     A += k1;
1959 
1960     k00_20 = e0[-2]-e2[-2];
1961     k01_21 = e0[-3]-e2[-3];
1962     e0[-2] += e2[-2];//e0[-2] = e0[-2]+e2[-2];
1963     e0[-3] += e2[-3];//e0[-3] = e0[-3]+e2[-3];
1964     e2[-2] = (k00_20)*A[0]-(k01_21)*A[1];
1965     e2[-3] = (k01_21)*A[0]+(k00_20)*A[1];
1966 
1967     A += k1;
1968 
1969     k00_20 = e0[-4]-e2[-4];
1970     k01_21 = e0[-5]-e2[-5];
1971     e0[-4] += e2[-4];//e0[-4] = e0[-4]+e2[-4];
1972     e0[-5] += e2[-5];//e0[-5] = e0[-5]+e2[-5];
1973     e2[-4] = (k00_20)*A[0]-(k01_21)*A[1];
1974     e2[-5] = (k01_21)*A[0]+(k00_20)*A[1];
1975 
1976     A += k1;
1977 
1978     k00_20 = e0[-6]-e2[-6];
1979     k01_21 = e0[-7]-e2[-7];
1980     e0[-6] += e2[-6];//e0[-6] = e0[-6]+e2[-6];
1981     e0[-7] += e2[-7];//e0[-7] = e0[-7]+e2[-7];
1982     e2[-6] = (k00_20)*A[0]-(k01_21)*A[1];
1983     e2[-7] = (k01_21)*A[0]+(k00_20)*A[1];
1984 
1985     e0 -= 8;
1986     e2 -= 8;
1987 
1988     A += k1;
1989   }
1990 }
1991 
1992 private void imdct_step3_inner_s_loop (int n, float* e, int i_off, int k_off, float* A, int a_off, int k0) {
1993   float A0 = A[0];
1994   float A1 = A[0+1];
1995   float A2 = A[0+a_off];
1996   float A3 = A[0+a_off+1];
1997   float A4 = A[0+a_off*2+0];
1998   float A5 = A[0+a_off*2+1];
1999   float A6 = A[0+a_off*3+0];
2000   float A7 = A[0+a_off*3+1];
2001   float k00, k11;
2002   float *ee0 = e  +i_off;
2003   float *ee2 = ee0+k_off;
2004   foreach (immutable _; 0..n) {
2005     k00 = ee0[ 0]-ee2[ 0];
2006     k11 = ee0[-1]-ee2[-1];
2007     ee0[ 0] = ee0[ 0]+ee2[ 0];
2008     ee0[-1] = ee0[-1]+ee2[-1];
2009     ee2[ 0] = (k00)*A0-(k11)*A1;
2010     ee2[-1] = (k11)*A0+(k00)*A1;
2011 
2012     k00 = ee0[-2]-ee2[-2];
2013     k11 = ee0[-3]-ee2[-3];
2014     ee0[-2] = ee0[-2]+ee2[-2];
2015     ee0[-3] = ee0[-3]+ee2[-3];
2016     ee2[-2] = (k00)*A2-(k11)*A3;
2017     ee2[-3] = (k11)*A2+(k00)*A3;
2018 
2019     k00 = ee0[-4]-ee2[-4];
2020     k11 = ee0[-5]-ee2[-5];
2021     ee0[-4] = ee0[-4]+ee2[-4];
2022     ee0[-5] = ee0[-5]+ee2[-5];
2023     ee2[-4] = (k00)*A4-(k11)*A5;
2024     ee2[-5] = (k11)*A4+(k00)*A5;
2025 
2026     k00 = ee0[-6]-ee2[-6];
2027     k11 = ee0[-7]-ee2[-7];
2028     ee0[-6] = ee0[-6]+ee2[-6];
2029     ee0[-7] = ee0[-7]+ee2[-7];
2030     ee2[-6] = (k00)*A6-(k11)*A7;
2031     ee2[-7] = (k11)*A6+(k00)*A7;
2032 
2033     ee0 -= k0;
2034     ee2 -= k0;
2035   }
2036 }
2037 
2038 // this was forceinline
2039 //void iter_54(float *z)
2040 enum iter_54(string z) = q{{
2041   auto ${__temp_prefix__}z = (${z});
2042   float ${__temp_prefix__}k00, ${__temp_prefix__}k11, ${__temp_prefix__}k22, ${__temp_prefix__}k33;
2043   float ${__temp_prefix__}y0, ${__temp_prefix__}y1, ${__temp_prefix__}y2, ${__temp_prefix__}y3;
2044 
2045   ${__temp_prefix__}k00 = ${__temp_prefix__}z[ 0]-${__temp_prefix__}z[-4];
2046   ${__temp_prefix__}y0  = ${__temp_prefix__}z[ 0]+${__temp_prefix__}z[-4];
2047   ${__temp_prefix__}y2  = ${__temp_prefix__}z[-2]+${__temp_prefix__}z[-6];
2048   ${__temp_prefix__}k22 = ${__temp_prefix__}z[-2]-${__temp_prefix__}z[-6];
2049 
2050   ${__temp_prefix__}z[-0] = ${__temp_prefix__}y0+${__temp_prefix__}y2;   // z0+z4+z2+z6
2051   ${__temp_prefix__}z[-2] = ${__temp_prefix__}y0-${__temp_prefix__}y2;   // z0+z4-z2-z6
2052 
2053   // done with ${__temp_prefix__}y0, ${__temp_prefix__}y2
2054 
2055   ${__temp_prefix__}k33 = ${__temp_prefix__}z[-3]-${__temp_prefix__}z[-7];
2056 
2057   ${__temp_prefix__}z[-4] = ${__temp_prefix__}k00+${__temp_prefix__}k33; // z0-z4+z3-z7
2058   ${__temp_prefix__}z[-6] = ${__temp_prefix__}k00-${__temp_prefix__}k33; // z0-z4-z3+z7
2059 
2060   // done with ${__temp_prefix__}k33
2061 
2062   ${__temp_prefix__}k11 = ${__temp_prefix__}z[-1]-${__temp_prefix__}z[-5];
2063   ${__temp_prefix__}y1  = ${__temp_prefix__}z[-1]+${__temp_prefix__}z[-5];
2064   ${__temp_prefix__}y3  = ${__temp_prefix__}z[-3]+${__temp_prefix__}z[-7];
2065 
2066   ${__temp_prefix__}z[-1] = ${__temp_prefix__}y1+${__temp_prefix__}y3;   // z1+z5+z3+z7
2067   ${__temp_prefix__}z[-3] = ${__temp_prefix__}y1-${__temp_prefix__}y3;   // z1+z5-z3-z7
2068   ${__temp_prefix__}z[-5] = ${__temp_prefix__}k11-${__temp_prefix__}k22; // z1-z5+z2-z6
2069   ${__temp_prefix__}z[-7] = ${__temp_prefix__}k11+${__temp_prefix__}k22; // z1-z5-z2+z6
2070 }}.cmacroFixVars!"z"(z);
2071 
2072 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2073 {
2074     int a_off = base_n >> 3;
2075     float A2 = A[0+a_off];
2076     float *z = e + i_off;
2077     float *base = z - 16 * n;
2078 
2079     while (z > base) {
2080         float k00,k11;
2081         float l00,l11;
2082 
2083         k00    = z[-0] - z[ -8];
2084         k11    = z[-1] - z[ -9];
2085         l00    = z[-2] - z[-10];
2086         l11    = z[-3] - z[-11];
2087         z[ -0] = z[-0] + z[ -8];
2088         z[ -1] = z[-1] + z[ -9];
2089         z[ -2] = z[-2] + z[-10];
2090         z[ -3] = z[-3] + z[-11];
2091         z[ -8] = k00;
2092         z[ -9] = k11;
2093         z[-10] = (l00+l11) * A2;
2094         z[-11] = (l11-l00) * A2;
2095 
2096         k00    = z[ -4] - z[-12];
2097         k11    = z[ -5] - z[-13];
2098         l00    = z[ -6] - z[-14];
2099         l11    = z[ -7] - z[-15];
2100         z[ -4] = z[ -4] + z[-12];
2101         z[ -5] = z[ -5] + z[-13];
2102         z[ -6] = z[ -6] + z[-14];
2103         z[ -7] = z[ -7] + z[-15];
2104         z[-12] = k11;
2105         z[-13] = -k00;
2106         z[-14] = (l11-l00) * A2;
2107         z[-15] = (l00+l11) * -A2;
2108 
2109         mixin(iter_54!"z");
2110         mixin(iter_54!"z-8");
2111         z -= 16;
2112     }
2113 }
2114 
2115 private void inverse_mdct (float* buffer, int n, VorbisDecoder f, int blocktype) {
2116   import core.stdc.stdlib : alloca;
2117 
2118   int n2 = n>>1, n4 = n>>2, n8 = n>>3, l;
2119   int ld;
2120   // @OPTIMIZE: reduce register pressure by using fewer variables?
2121   int save_point = temp_alloc_save(f);
2122   float *buf2;
2123   buf2 = cast(float*)mixin(temp_alloc!("n2*float.sizeof"));
2124   float *u = null, v = null;
2125   // twiddle factors
2126   float *A = f.A.ptr[blocktype];
2127 
2128   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2129   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2130 
2131   // kernel from paper
2132 
2133 
2134   // merged:
2135   //   copy and reflect spectral data
2136   //   step 0
2137 
2138   // note that it turns out that the items added together during
2139   // this step are, in fact, being added to themselves (as reflected
2140   // by step 0). inexplicable inefficiency! this became obvious
2141   // once I combined the passes.
2142 
2143   // so there's a missing 'times 2' here (for adding X to itself).
2144   // this propogates through linearly to the end, where the numbers
2145   // are 1/2 too small, and need to be compensated for.
2146 
2147   {
2148     float* d, e, AA, e_stop;
2149     d = &buf2[n2-2];
2150     AA = A;
2151     e = &buffer[0];
2152     e_stop = &buffer[n2];
2153     while (e != e_stop) {
2154       d[1] = (e[0]*AA[0]-e[2]*AA[1]);
2155       d[0] = (e[0]*AA[1]+e[2]*AA[0]);
2156       d -= 2;
2157       AA += 2;
2158       e += 4;
2159     }
2160     e = &buffer[n2-3];
2161     while (d >= buf2) {
2162       d[1] = (-e[2]*AA[0]- -e[0]*AA[1]);
2163       d[0] = (-e[2]*AA[1]+ -e[0]*AA[0]);
2164       d -= 2;
2165       AA += 2;
2166       e -= 4;
2167     }
2168   }
2169 
2170   // now we use symbolic names for these, so that we can
2171   // possibly swap their meaning as we change which operations
2172   // are in place
2173 
2174   u = buffer;
2175   v = buf2;
2176 
2177   // step 2    (paper output is w, now u)
2178   // this could be in place, but the data ends up in the wrong
2179   // place... _somebody_'s got to swap it, so this is nominated
2180   {
2181     float* AA = &A[n2-8];
2182     float* d0, d1, e0, e1;
2183     e0 = &v[n4];
2184     e1 = &v[0];
2185     d0 = &u[n4];
2186     d1 = &u[0];
2187     while (AA >= A) {
2188       float v40_20, v41_21;
2189 
2190       v41_21 = e0[1]-e1[1];
2191       v40_20 = e0[0]-e1[0];
2192       d0[1]  = e0[1]+e1[1];
2193       d0[0]  = e0[0]+e1[0];
2194       d1[1]  = v41_21*AA[4]-v40_20*AA[5];
2195       d1[0]  = v40_20*AA[4]+v41_21*AA[5];
2196 
2197       v41_21 = e0[3]-e1[3];
2198       v40_20 = e0[2]-e1[2];
2199       d0[3]  = e0[3]+e1[3];
2200       d0[2]  = e0[2]+e1[2];
2201       d1[3]  = v41_21*AA[0]-v40_20*AA[1];
2202       d1[2]  = v40_20*AA[0]+v41_21*AA[1];
2203 
2204       AA -= 8;
2205 
2206       d0 += 4;
2207       d1 += 4;
2208       e0 += 4;
2209       e1 += 4;
2210     }
2211   }
2212 
2213   // step 3
2214   ld = ilog(n)-1; // ilog is off-by-one from normal definitions
2215 
2216   // optimized step 3:
2217 
2218   // the original step3 loop can be nested r inside s or s inside r;
2219   // it's written originally as s inside r, but this is dumb when r
2220   // iterates many times, and s few. So I have two copies of it and
2221   // switch between them halfway.
2222 
2223   // this is iteration 0 of step 3
2224   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*0, -(n>>3), A);
2225   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*1, -(n>>3), A);
2226 
2227   // this is iteration 1 of step 3
2228   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*0, -(n>>4), A, 16);
2229   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*1, -(n>>4), A, 16);
2230   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*2, -(n>>4), A, 16);
2231   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*3, -(n>>4), A, 16);
2232 
2233   l = 2;
2234   for (; l < (ld-3)>>1; ++l) {
2235     int k0 = n>>(l+2), k0_2 = k0>>1;
2236     int lim = 1<<(l+1);
2237     foreach (int i; 0..lim) imdct_step3_inner_r_loop(n>>(l+4), u, n2-1-k0*i, -k0_2, A, 1<<(l+3));
2238   }
2239 
2240   for (; l < ld-6; ++l) {
2241     int k0 = n>>(l+2), k1 = 1<<(l+3), k0_2 = k0>>1;
2242     int rlim = n>>(l+6);
2243     int lim = 1<<(l+1);
2244     int i_off;
2245     float *A0 = A;
2246     i_off = n2-1;
2247     foreach (immutable _; 0..rlim) {
2248       imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2249       A0 += k1*4;
2250       i_off -= 8;
2251     }
2252   }
2253 
2254   // iterations with count:
2255   //   ld-6,-5,-4 all interleaved together
2256   //       the big win comes from getting rid of needless flops
2257   //         due to the constants on pass 5 & 4 being all 1 and 0;
2258   //       combining them to be simultaneous to improve cache made little difference
2259   imdct_step3_inner_s_loop_ld654(n>>5, u, n2-1, A, n);
2260 
2261   // output is u
2262 
2263   // step 4, 5, and 6
2264   // cannot be in-place because of step 5
2265   {
2266     ushort *bitrev = f.bit_reverse.ptr[blocktype];
2267     // weirdly, I'd have thought reading sequentially and writing
2268     // erratically would have been better than vice-versa, but in
2269     // fact that's not what my testing showed. (That is, with
2270     // j = bitreverse(i), do you read i and write j, or read j and write i.)
2271     float *d0 = &v[n4-4];
2272     float *d1 = &v[n2-4];
2273     int k4;
2274     while (d0 >= v) {
2275       k4 = bitrev[0];
2276       d1[3] = u[k4+0];
2277       d1[2] = u[k4+1];
2278       d0[3] = u[k4+2];
2279       d0[2] = u[k4+3];
2280 
2281       k4 = bitrev[1];
2282       d1[1] = u[k4+0];
2283       d1[0] = u[k4+1];
2284       d0[1] = u[k4+2];
2285       d0[0] = u[k4+3];
2286 
2287       d0 -= 4;
2288       d1 -= 4;
2289       bitrev += 2;
2290     }
2291   }
2292   // (paper output is u, now v)
2293 
2294 
2295   // data must be in buf2
2296   debug(stb_vorbis) assert(v == buf2);
2297 
2298   // step 7   (paper output is v, now v)
2299   // this is now in place
2300   {
2301     float a02, a11, b0, b1, b2, b3;
2302     float* C = f.C.ptr[blocktype];
2303     float* d, e;
2304     d = v;
2305     e = v+n2-4;
2306     while (d < e) {
2307       a02 = d[0]-e[2];
2308       a11 = d[1]+e[3];
2309 
2310       b0 = C[1]*a02+C[0]*a11;
2311       b1 = C[1]*a11-C[0]*a02;
2312 
2313       b2 = d[0]+e[ 2];
2314       b3 = d[1]-e[ 3];
2315 
2316       d[0] = b2+b0;
2317       d[1] = b3+b1;
2318       e[2] = b2-b0;
2319       e[3] = b1-b3;
2320 
2321       a02 = d[2]-e[0];
2322       a11 = d[3]+e[1];
2323 
2324       b0 = C[3]*a02+C[2]*a11;
2325       b1 = C[3]*a11-C[2]*a02;
2326 
2327       b2 = d[2]+e[ 0];
2328       b3 = d[3]-e[ 1];
2329 
2330       d[2] = b2+b0;
2331       d[3] = b3+b1;
2332       e[0] = b2-b0;
2333       e[1] = b1-b3;
2334 
2335       C += 4;
2336       d += 4;
2337       e -= 4;
2338     }
2339   }
2340 
2341   // data must be in buf2
2342 
2343 
2344   // step 8+decode   (paper output is X, now buffer)
2345   // this generates pairs of data a la 8 and pushes them directly through
2346   // the decode kernel (pushing rather than pulling) to avoid having
2347   // to make another pass later
2348 
2349   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2350   {
2351     float p0, p1, p2, p3;
2352     float* d0, d1, d2, d3;
2353     float* B = f.B.ptr[blocktype]+n2-8;
2354     float* e = buf2+n2-8;
2355     d0 = &buffer[0];
2356     d1 = &buffer[n2-4];
2357     d2 = &buffer[n2];
2358     d3 = &buffer[n-4];
2359     while (e >= v) {
2360       p3 =  e[6]*B[7]-e[7]*B[6];
2361       p2 = -e[6]*B[6]-e[7]*B[7];
2362 
2363       d0[0] =   p3;
2364       d1[3] =  -p3;
2365       d2[0] =   p2;
2366       d3[3] =   p2;
2367 
2368       p1 =  e[4]*B[5]-e[5]*B[4];
2369       p0 = -e[4]*B[4]-e[5]*B[5];
2370 
2371       d0[1] =   p1;
2372       d1[2] = - p1;
2373       d2[1] =   p0;
2374       d3[2] =   p0;
2375 
2376       p3 =  e[2]*B[3]-e[3]*B[2];
2377       p2 = -e[2]*B[2]-e[3]*B[3];
2378 
2379       d0[2] =   p3;
2380       d1[1] = - p3;
2381       d2[2] =   p2;
2382       d3[1] =   p2;
2383 
2384       p1 =  e[0]*B[1]-e[1]*B[0];
2385       p0 = -e[0]*B[0]-e[1]*B[1];
2386 
2387       d0[3] =   p1;
2388       d1[0] = - p1;
2389       d2[3] =   p0;
2390       d3[0] =   p0;
2391 
2392       B -= 8;
2393       e -= 8;
2394       d0 += 4;
2395       d2 += 4;
2396       d1 -= 4;
2397       d3 -= 4;
2398     }
2399   }
2400 
2401   temp_free(f, buf2);
2402   temp_alloc_restore(f, save_point);
2403 }
2404 
2405 private float *get_window (VorbisDecoder f, int len) {
2406   len <<= 1;
2407   if (len == f.blocksize_0) return f.window.ptr[0];
2408   if (len == f.blocksize_1) return f.window.ptr[1];
2409   assert(0);
2410 }
2411 
2412 version(STB_VORBIS_NO_DEFER_FLOOR) {
2413   alias YTYPE = int;
2414 } else {
2415   alias YTYPE = short;
2416 }
2417 
2418 private int do_floor (VorbisDecoder f, Mapping* map, int i, int n, float* target, YTYPE* finalY, ubyte* step2_flag) {
2419   int n2 = n>>1;
2420   int s = map.chan[i].mux, floor;
2421   floor = map.submap_floor.ptr[s];
2422   if (f.floor_types.ptr[floor] == 0) {
2423     return error(f, STBVorbisError.invalid_stream);
2424   } else {
2425     Floor1* g = &f.floor_config[floor].floor1;
2426     int lx = 0, ly = finalY[0]*g.floor1_multiplier;
2427     foreach (immutable q; 1..g.values) {
2428       int j = g.sorted_order.ptr[q];
2429       version(STB_VORBIS_NO_DEFER_FLOOR) {
2430         auto cond = step2_flag[j];
2431       } else {
2432         auto cond = (finalY[j] >= 0);
2433       }
2434       if (cond) {
2435         int hy = finalY[j]*g.floor1_multiplier;
2436         int hx = g.Xlist.ptr[j];
2437         if (lx != hx) { mixin(draw_line!("target", "lx", "ly", "hx", "hy", "n2")); }
2438         lx = hx; ly = hy;
2439       }
2440     }
2441     if (lx < n2) {
2442       // optimization of: draw_line(target, lx, ly, n, ly, n2);
2443       foreach (immutable j; lx..n2) { mixin(LINE_OP!("target[j]", "inverse_db_table[ly]")); }
2444     }
2445   }
2446   return true;
2447 }
2448 
2449 // The meaning of "left" and "right"
2450 //
2451 // For a given frame:
2452 //     we compute samples from 0..n
2453 //     window_center is n/2
2454 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2455 //     all of the samples from left_end to right_start can be output without mixing; however,
2456 //        this interval is 0-length except when transitioning between short and long frames
2457 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2458 //        which we don't have, so those get saved in a buffer
2459 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2460 //        has to be the same as frame N+1's left_end-left_start (which they are by
2461 //        construction)
2462 
2463 private int vorbis_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
2464   Mode *m;
2465   int i, n, prev, next, window_center;
2466   f.channel_buffer_start = f.channel_buffer_end = 0;
2467 
2468  retry:
2469   if (f.eof) return false;
2470   if (!maybe_start_packet(f)) return false;
2471   // check packet type
2472   if (get_bits!1(f) != 0) {
2473     /+if (f.push_mode) return error(f, STBVorbisError.bad_packet_type);+/
2474     while (EOP != get8_packet(f)) {}
2475     goto retry;
2476   }
2477 
2478   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2479 
2480   i = get_bits_main(f, ilog(f.mode_count-1));
2481   if (i == EOP) return false;
2482   if (i >= f.mode_count) return false;
2483   *mode = i;
2484   m = f.mode_config.ptr+i;
2485   if (m.blockflag) {
2486     n = f.blocksize_1;
2487     prev = get_bits!1(f);
2488     next = get_bits!1(f);
2489   } else {
2490     prev = next = 0;
2491     n = f.blocksize_0;
2492   }
2493 
2494   // WINDOWING
2495   window_center = n>>1;
2496   if (m.blockflag && !prev) {
2497     *p_left_start = (n-f.blocksize_0)>>2;
2498     *p_left_end   = (n+f.blocksize_0)>>2;
2499   } else {
2500     *p_left_start = 0;
2501     *p_left_end   = window_center;
2502   }
2503   if (m.blockflag && !next) {
2504     *p_right_start = (n*3-f.blocksize_0)>>2;
2505     *p_right_end   = (n*3+f.blocksize_0)>>2;
2506   } else {
2507     *p_right_start = window_center;
2508     *p_right_end   = n;
2509   }
2510   return true;
2511 }
2512 
2513 private int vorbis_decode_packet_rest (VorbisDecoder f, int* len, Mode* m, int left_start, int left_end, int right_start, int right_end, int* p_left) {
2514   import core.stdc.string : memcpy, memset;
2515 
2516   Mapping* map;
2517   int n, n2;
2518   int[256] zero_channel;
2519   int[256] really_zero_channel;
2520 
2521   // WINDOWING
2522   n = f.blocksize.ptr[m.blockflag];
2523   map = &f.mapping[m.mapping];
2524 
2525   // FLOORS
2526   n2 = n>>1;
2527 
2528   //stb_prof(1);
2529   foreach (immutable i; 0..f.vrchannels) {
2530     int s = map.chan[i].mux, floor;
2531     zero_channel[i] = false;
2532     floor = map.submap_floor.ptr[s];
2533     if (f.floor_types.ptr[floor] == 0) {
2534       return error(f, STBVorbisError.invalid_stream);
2535     } else {
2536       Floor1* g = &f.floor_config[floor].floor1;
2537       if (get_bits!1(f)) {
2538         short* finalY;
2539         ubyte[256] step2_flag = void;
2540         immutable int[4] range_list = [ 256, 128, 86, 64 ];
2541         int range = range_list[g.floor1_multiplier-1];
2542         int offset = 2;
2543         finalY = f.finalY.ptr[i];
2544         finalY[0] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2545         finalY[1] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2546         foreach (immutable j; 0..g.partitions) {
2547           int pclass = g.partition_class_list.ptr[j];
2548           int cdim = g.class_dimensions.ptr[pclass];
2549           int cbits = g.class_subclasses.ptr[pclass];
2550           int csub = (1<<cbits)-1;
2551           int cval = 0;
2552           if (cbits) {
2553             Codebook *cc = f.codebooks+g.class_masterbooks.ptr[pclass];
2554             mixin(DECODE!("cval", "cc"));
2555           }
2556           foreach (immutable k; 0..cdim) {
2557             int book = g.subclass_books.ptr[pclass].ptr[cval&csub];
2558             cval = cval>>cbits;
2559             if (book >= 0) {
2560               int temp;
2561               Codebook *cc = f.codebooks+book;
2562               mixin(DECODE!("temp", "cc"));
2563               finalY[offset++] = cast(short)temp; //k8
2564             } else {
2565               finalY[offset++] = 0;
2566             }
2567           }
2568         }
2569         if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2570         step2_flag[0] = step2_flag[1] = 1;
2571         foreach (immutable j; 2..g.values) {
2572           int low = g.neighbors.ptr[j].ptr[0];
2573           int high = g.neighbors.ptr[j].ptr[1];
2574           //neighbors(g.Xlist, j, &low, &high);
2575           int pred = void;
2576           mixin(predict_point!("pred", "g.Xlist.ptr[j]", "g.Xlist.ptr[low]", "g.Xlist.ptr[high]", "finalY[low]", "finalY[high]"));
2577           int val = finalY[j];
2578           int highroom = range-pred;
2579           int lowroom = pred;
2580           auto room = (highroom < lowroom ? highroom : lowroom)*2;
2581           if (val) {
2582             step2_flag[low] = step2_flag[high] = 1;
2583             step2_flag[j] = 1;
2584             if (val >= room) {
2585               finalY[j] = cast(short)(highroom > lowroom ? val-lowroom+pred : pred-val+highroom-1); //k8
2586             } else {
2587               finalY[j] = cast(short)(val&1 ? pred-((val+1)>>1) : pred+(val>>1)); //k8
2588             }
2589           } else {
2590             step2_flag[j] = 0;
2591             finalY[j] = cast(short)pred; //k8
2592           }
2593         }
2594 
2595         version(STB_VORBIS_NO_DEFER_FLOOR) {
2596           do_floor(f, map, i, n, f.floor_buffers.ptr[i], finalY, step2_flag);
2597         } else {
2598           // defer final floor computation until _after_ residue
2599           foreach (immutable j; 0..g.values) if (!step2_flag[j]) finalY[j] = -1;
2600         }
2601       } else {
2602   error:
2603         zero_channel[i] = true;
2604       }
2605       // So we just defer everything else to later
2606       // at this point we've decoded the floor into buffer
2607     }
2608   }
2609   //stb_prof(0);
2610   // at this point we've decoded all floors
2611 
2612   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2613 
2614   // re-enable coupled channels if necessary
2615   memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof*f.vrchannels);
2616   foreach (immutable i; 0..map.coupling_steps) {
2617     if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2618       zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = false;
2619     }
2620   }
2621 
2622   // RESIDUE DECODE
2623   foreach (immutable i; 0..map.submaps) {
2624     float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2625     ubyte[256] do_not_decode = void;
2626     int ch = 0;
2627     foreach (immutable j; 0..f.vrchannels) {
2628       if (map.chan[j].mux == i) {
2629         if (zero_channel[j]) {
2630           do_not_decode[ch] = true;
2631           residue_buffers.ptr[ch] = null;
2632         } else {
2633           do_not_decode[ch] = false;
2634           residue_buffers.ptr[ch] = f.channel_buffers.ptr[j];
2635         }
2636         ++ch;
2637       }
2638     }
2639     int r = map.submap_residue.ptr[i];
2640     decode_residue(f, residue_buffers, ch, n2, r, do_not_decode.ptr);
2641   }
2642 
2643   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2644 
2645    // INVERSE COUPLING
2646   //stb_prof(14);
2647   foreach_reverse (immutable i; 0..map.coupling_steps) {
2648     int n2n = n>>1;
2649     float* mm = f.channel_buffers.ptr[map.chan[i].magnitude];
2650     float* a = f.channel_buffers.ptr[map.chan[i].angle];
2651     foreach (immutable j; 0..n2n) {
2652       float a2, m2;
2653       if (mm[j] > 0) {
2654         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]-a[j]; } else { a2 = mm[j]; m2 = mm[j]+a[j]; }
2655       } else {
2656         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]+a[j]; } else { a2 = mm[j]; m2 = mm[j]-a[j]; }
2657       }
2658       mm[j] = m2;
2659       a[j] = a2;
2660     }
2661   }
2662 
2663   // finish decoding the floors
2664   version(STB_VORBIS_NO_DEFER_FLOOR) {
2665     foreach (immutable i; 0..f.vrchannels) {
2666       if (really_zero_channel[i]) {
2667         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2668       } else {
2669         foreach (immutable j; 0..n2) f.channel_buffers.ptr[i].ptr[j] *= f.floor_buffers.ptr[i].ptr[j];
2670       }
2671     }
2672   } else {
2673     //stb_prof(15);
2674     foreach (immutable i; 0..f.vrchannels) {
2675       if (really_zero_channel[i]) {
2676         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2677       } else {
2678         do_floor(f, map, i, n, f.channel_buffers.ptr[i], f.finalY.ptr[i], null);
2679       }
2680     }
2681   }
2682 
2683   // INVERSE MDCT
2684   //stb_prof(16);
2685   foreach (immutable i; 0..f.vrchannels) inverse_mdct(f.channel_buffers.ptr[i], n, f, m.blockflag);
2686   //stb_prof(0);
2687 
2688   // this shouldn't be necessary, unless we exited on an error
2689   // and want to flush to get to the next packet
2690   flush_packet(f);
2691 
2692   if (f.first_decode) {
2693     // assume we start so first non-discarded sample is sample 0
2694     // this isn't to spec, but spec would require us to read ahead
2695     // and decode the size of all current frames--could be done,
2696     // but presumably it's not a commonly used feature
2697     f.current_loc = -n2; // start of first frame is positioned for discard
2698     // we might have to discard samples "from" the next frame too,
2699     // if we're lapping a large block then a small at the start?
2700     f.discard_samples_deferred = n-right_end;
2701     f.current_loc_valid = true;
2702     f.first_decode = false;
2703   } else if (f.discard_samples_deferred) {
2704     if (f.discard_samples_deferred >= right_start-left_start) {
2705       f.discard_samples_deferred -= (right_start-left_start);
2706       left_start = right_start;
2707       *p_left = left_start;
2708     } else {
2709       left_start += f.discard_samples_deferred;
2710       *p_left = left_start;
2711       f.discard_samples_deferred = 0;
2712     }
2713   } else if (f.previous_length == 0 && f.current_loc_valid) {
2714     // we're recovering from a seek... that means we're going to discard
2715     // the samples from this packet even though we know our position from
2716     // the last page header, so we need to update the position based on
2717     // the discarded samples here
2718     // but wait, the code below is going to add this in itself even
2719     // on a discard, so we don't need to do it here...
2720   }
2721 
2722   // check if we have ogg information about the sample # for this packet
2723   if (f.last_seg_which == f.end_seg_with_known_loc) {
2724     // if we have a valid current loc, and this is final:
2725     if (f.current_loc_valid && (f.page_flag&PAGEFLAG_last_page)) {
2726       uint current_end = f.known_loc_for_packet-(n-right_end);
2727       // then let's infer the size of the (probably) short final frame
2728       if (current_end < f.current_loc+right_end) {
2729         if (current_end < f.current_loc+(right_end-left_start)) {
2730           // negative truncation, that's impossible!
2731           *len = 0;
2732         } else {
2733           *len = current_end-f.current_loc;
2734         }
2735         *len += left_start;
2736         if (*len > right_end) *len = right_end; // this should never happen
2737         f.current_loc += *len;
2738         return true;
2739       }
2740     }
2741     // otherwise, just set our sample loc
2742     // guess that the ogg granule pos refers to the _middle_ of the
2743     // last frame?
2744     // set f.current_loc to the position of left_start
2745     f.current_loc = f.known_loc_for_packet-(n2-left_start);
2746     f.current_loc_valid = true;
2747   }
2748   if (f.current_loc_valid) f.current_loc += (right_start-left_start);
2749 
2750   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2751 
2752   *len = right_end;  // ignore samples after the window goes to 0
2753   return true;
2754 }
2755 
2756 private int vorbis_decode_packet (VorbisDecoder f, int* len, int* p_left, int* p_right) {
2757   int mode, left_end, right_end;
2758   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2759   return vorbis_decode_packet_rest(f, len, f.mode_config.ptr+mode, *p_left, left_end, *p_right, right_end, p_left);
2760 }
2761 
2762 private int vorbis_finish_frame (VorbisDecoder f, int len, int left, int right) {
2763   // we use right&left (the start of the right- and left-window sin()-regions)
2764   // to determine how much to return, rather than inferring from the rules
2765   // (same result, clearer code); 'left' indicates where our sin() window
2766   // starts, therefore where the previous window's right edge starts, and
2767   // therefore where to start mixing from the previous buffer. 'right'
2768   // indicates where our sin() ending-window starts, therefore that's where
2769   // we start saving, and where our returned-data ends.
2770 
2771   // mixin from previous window
2772   if (f.previous_length) {
2773     int n = f.previous_length;
2774     float *w = get_window(f, n);
2775     foreach (immutable i; 0..f.vrchannels) {
2776       foreach (immutable j; 0..n) {
2777         (f.channel_buffers.ptr[i])[left+j] =
2778           (f.channel_buffers.ptr[i])[left+j]*w[    j]+
2779           (f.previous_window.ptr[i])[     j]*w[n-1-j];
2780       }
2781     }
2782   }
2783 
2784   auto prev = f.previous_length;
2785 
2786   // last half of this data becomes previous window
2787   f.previous_length = len-right;
2788 
2789   // @OPTIMIZE: could avoid this copy by double-buffering the
2790   // output (flipping previous_window with channel_buffers), but
2791   // then previous_window would have to be 2x as large, and
2792   // channel_buffers couldn't be temp mem (although they're NOT
2793   // currently temp mem, they could be (unless we want to level
2794   // performance by spreading out the computation))
2795   foreach (immutable i; 0..f.vrchannels) {
2796     for (uint j = 0; right+j < len; ++j) (f.previous_window.ptr[i])[j] = (f.channel_buffers.ptr[i])[right+j];
2797   }
2798 
2799   if (!prev) {
2800     // there was no previous packet, so this data isn't valid...
2801     // this isn't entirely true, only the would-have-overlapped data
2802     // isn't valid, but this seems to be what the spec requires
2803     return 0;
2804   }
2805 
2806   // truncate a short frame
2807   if (len < right) right = len;
2808 
2809   f.samples_output += right-left;
2810 
2811   return right-left;
2812 }
2813 
2814 private bool vorbis_pump_first_frame (VorbisDecoder f) {
2815   int len, right, left;
2816   if (vorbis_decode_packet(f, &len, &left, &right)) {
2817     vorbis_finish_frame(f, len, left, right);
2818     return true;
2819   }
2820   return false;
2821 }
2822 
2823 /+ k8: i don't need that, so it's dead
2824 private int is_whole_packet_present (VorbisDecoder f, int end_page) {
2825   import core.stdc.string : memcmp;
2826 
2827   // make sure that we have the packet available before continuing...
2828   // this requires a full ogg parse, but we know we can fetch from f.stream
2829 
2830   // instead of coding this out explicitly, we could save the current read state,
2831   // read the next packet with get8() until end-of-packet, check f.eof, then
2832   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
2833   // of state to restore (primarily the page segment table)
2834 
2835   int s = f.next_seg, first = true;
2836   ubyte *p = f.stream;
2837 
2838   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
2839     for (; s < f.segment_count; ++s) {
2840       p += f.segments[s];
2841       if (f.segments[s] < 255) break; // stop at first short segment
2842     }
2843     // either this continues, or it ends it...
2844     if (end_page && s < f.segment_count-1) return error(f, STBVorbisError.invalid_stream);
2845     if (s == f.segment_count) s = -1; // set 'crosses page' flag
2846     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2847     first = false;
2848   }
2849   while (s == -1) {
2850     ubyte* q = void;
2851     int n = void;
2852     // check that we have the page header ready
2853     if (p+26 >= f.stream_end) return error(f, STBVorbisError.need_more_data);
2854     // validate the page
2855     if (memcmp(p, ogg_page_header.ptr, 4)) return error(f, STBVorbisError.invalid_stream);
2856     if (p[4] != 0) return error(f, STBVorbisError.invalid_stream);
2857     if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
2858       if (f.previous_length && (p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2859       // if no previous length, we're resynching, so we can come in on a continued-packet,
2860       // which we'll just drop
2861     } else {
2862       if (!(p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2863     }
2864     n = p[26]; // segment counts
2865     q = p+27; // q points to segment table
2866     p = q+n; // advance past header
2867     // make sure we've read the segment table
2868     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2869     for (s = 0; s < n; ++s) {
2870       p += q[s];
2871       if (q[s] < 255) break;
2872     }
2873     if (end_page && s < n-1) return error(f, STBVorbisError.invalid_stream);
2874     if (s == n) s = -1; // set 'crosses page' flag
2875     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2876     first = false;
2877   }
2878   return true;
2879 }
2880 +/
2881 
2882 private int start_decoder (VorbisDecoder f) {
2883   import core.stdc.string : memcpy, memset;
2884 
2885   ubyte[6] header;
2886   ubyte x, y;
2887   int len, max_submaps = 0;
2888   int longest_floorlist = 0;
2889 
2890   // first page, first packet
2891 
2892   if (!start_page(f)) return false;
2893   // validate page flag
2894   if (!(f.page_flag&PAGEFLAG_first_page)) return error(f, STBVorbisError.invalid_first_page);
2895   if (f.page_flag&PAGEFLAG_last_page) return error(f, STBVorbisError.invalid_first_page);
2896   if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.invalid_first_page);
2897   // check for expected packet length
2898   if (f.segment_count != 1) return error(f, STBVorbisError.invalid_first_page);
2899   if (f.segments[0] != 30) return error(f, STBVorbisError.invalid_first_page);
2900   // read packet
2901   // check packet header
2902   if (get8(f) != VorbisPacket.id) return error(f, STBVorbisError.invalid_first_page);
2903   if (!getn(f, header.ptr, 6)) return error(f, STBVorbisError.unexpected_eof);
2904   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_first_page);
2905   // vorbis_version
2906   if (get32(f) != 0) return error(f, STBVorbisError.invalid_first_page);
2907   f.vrchannels = get8(f); if (!f.vrchannels) return error(f, STBVorbisError.invalid_first_page);
2908   if (f.vrchannels > STB_VORBIS_MAX_CHANNELS) return error(f, STBVorbisError.too_many_channels);
2909   f.sample_rate = get32(f); if (!f.sample_rate) return error(f, STBVorbisError.invalid_first_page);
2910   get32(f); // bitrate_maximum
2911   get32(f); // bitrate_nominal
2912   get32(f); // bitrate_minimum
2913   x = get8(f);
2914   {
2915     int log0 = x&15;
2916     int log1 = x>>4;
2917     f.blocksize_0 = 1<<log0;
2918     f.blocksize_1 = 1<<log1;
2919     if (log0 < 6 || log0 > 13) return error(f, STBVorbisError.invalid_setup);
2920     if (log1 < 6 || log1 > 13) return error(f, STBVorbisError.invalid_setup);
2921     if (log0 > log1) return error(f, STBVorbisError.invalid_setup);
2922   }
2923 
2924   // framing_flag
2925   x = get8(f);
2926   if (!(x&1)) return error(f, STBVorbisError.invalid_first_page);
2927 
2928   // second packet! (comments)
2929   if (!start_page(f)) return false;
2930 
2931   // read comments
2932   if (!start_packet(f)) return false;
2933 
2934   if (f.read_comments) {
2935     /+if (f.push_mode) {
2936       if (!is_whole_packet_present(f, true)) {
2937         // convert error in ogg header to write type
2938         if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2939         return false;
2940       }
2941     }+/
2942     if (get8_packet(f) != VorbisPacket.comment) return error(f, STBVorbisError.invalid_setup);
2943     foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2944     if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2945 
2946     // skip vendor id
2947     uint vidsize = get32_packet(f);
2948     //{ import core.stdc.stdio; printf("vendor size: %u\n", vidsize); }
2949     if (vidsize == EOP) return error(f, STBVorbisError.invalid_setup);
2950     while (vidsize--) get8_packet(f);
2951 
2952     // read comments section
2953     uint cmtcount = get32_packet(f);
2954     if (cmtcount == EOP) return error(f, STBVorbisError.invalid_setup);
2955     if (cmtcount > 0) {
2956       uint cmtsize = 32768; // this should be enough for everyone
2957       f.comment_data = setup_malloc!ubyte(f, cmtsize);
2958       if (f.comment_data is null) return error(f, STBVorbisError.outofmem);
2959       auto cmtpos = 0;
2960       auto d = f.comment_data;
2961       while (cmtcount--) {
2962         uint linelen = get32_packet(f);
2963         //{ import core.stdc.stdio; printf("linelen: %u; lines left: %u\n", linelen, cmtcount); }
2964         if (linelen == EOP || linelen > ushort.max-2) break;
2965         if (linelen == 0) { continue; }
2966         if (cmtpos+2+linelen > cmtsize) break;
2967         cmtpos += linelen+2;
2968         *d++ = (linelen+2)&0xff;
2969         *d++ = ((linelen+2)>>8)&0xff;
2970         while (linelen--) {
2971           auto b = get8_packet(f);
2972           if (b == EOP) return error(f, STBVorbisError.outofmem);
2973           *d++ = cast(ubyte)b;
2974         }
2975         //{ import core.stdc.stdio; printf("%u bytes of comments read\n", cmtpos); }
2976         f.comment_size = cmtpos;
2977       }
2978     }
2979     flush_packet(f);
2980     f.comment_rewind();
2981   } else {
2982     // skip comments
2983     do {
2984       len = next_segment(f);
2985       skip(f, len);
2986       f.bytes_in_seg = 0;
2987     } while (len);
2988   }
2989 
2990   // third packet!
2991   if (!start_packet(f)) return false;
2992 
2993   /+if (f.push_mode) {
2994     if (!is_whole_packet_present(f, true)) {
2995       // convert error in ogg header to write type
2996       if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2997       return false;
2998     }
2999   }+/
3000 
3001   if (get8_packet(f) != VorbisPacket.setup) return error(f, STBVorbisError.invalid_setup);
3002   foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
3003   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
3004 
3005   // codebooks
3006   f.codebook_count = get_bits!8(f)+1;
3007   f.codebooks = setup_malloc!Codebook(f, f.codebook_count);
3008   static assert((*f.codebooks).sizeof == Codebook.sizeof);
3009   if (f.codebooks is null) return error(f, STBVorbisError.outofmem);
3010   memset(f.codebooks, 0, (*f.codebooks).sizeof*f.codebook_count);
3011   foreach (immutable i; 0..f.codebook_count) {
3012     uint* values;
3013     int ordered, sorted_count;
3014     int total = 0;
3015     ubyte* lengths;
3016     Codebook* c = f.codebooks+i;
3017     x = get_bits!8(f); if (x != 0x42) return error(f, STBVorbisError.invalid_setup);
3018     x = get_bits!8(f); if (x != 0x43) return error(f, STBVorbisError.invalid_setup);
3019     x = get_bits!8(f); if (x != 0x56) return error(f, STBVorbisError.invalid_setup);
3020     x = get_bits!8(f);
3021     c.dimensions = (get_bits!8(f)<<8)+x;
3022     x = get_bits!8(f);
3023     y = get_bits!8(f);
3024     c.entries = (get_bits!8(f)<<16)+(y<<8)+x;
3025     ordered = get_bits!1(f);
3026     c.sparse = (ordered ? 0 : get_bits!1(f));
3027 
3028     if (c.dimensions == 0 && c.entries != 0) return error(f, STBVorbisError.invalid_setup);
3029 
3030     if (c.sparse) {
3031       lengths = cast(ubyte*)setup_temp_malloc(f, c.entries);
3032     } else {
3033       lengths = c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3034     }
3035 
3036     if (lengths is null) return error(f, STBVorbisError.outofmem);
3037 
3038     if (ordered) {
3039       int current_entry = 0;
3040       int current_length = get_bits_add_no!5(f, 1);
3041       while (current_entry < c.entries) {
3042         int limit = c.entries-current_entry;
3043         int n = get_bits_main(f, ilog(limit));
3044         if (current_entry+n > cast(int)c.entries) return error(f, STBVorbisError.invalid_setup);
3045         memset(lengths+current_entry, current_length, n);
3046         current_entry += n;
3047         ++current_length;
3048       }
3049     } else {
3050       foreach (immutable j; 0..c.entries) {
3051         int present = (c.sparse ? get_bits!1(f) : 1);
3052         if (present) {
3053           lengths[j] = get_bits_add_no!5(f, 1);
3054           ++total;
3055           if (lengths[j] == 32) return error(f, STBVorbisError.invalid_setup);
3056         } else {
3057           lengths[j] = NO_CODE;
3058         }
3059       }
3060     }
3061 
3062     if (c.sparse && total >= c.entries>>2) {
3063       // convert sparse items to non-sparse!
3064       if (c.entries > cast(int)f.setup_temp_memory_required) f.setup_temp_memory_required = c.entries;
3065       c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3066       if (c.codeword_lengths is null) return error(f, STBVorbisError.outofmem);
3067       memcpy(c.codeword_lengths, lengths, c.entries);
3068       setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
3069       lengths = c.codeword_lengths;
3070       c.sparse = 0;
3071     }
3072 
3073     // compute the size of the sorted tables
3074     if (c.sparse) {
3075       sorted_count = total;
3076     } else {
3077       sorted_count = 0;
3078       version(STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH) {} else {
3079         foreach (immutable j; 0..c.entries) if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) ++sorted_count;
3080       }
3081     }
3082 
3083     c.sorted_entries = sorted_count;
3084     values = null;
3085 
3086     if (!c.sparse) {
3087       c.codewords = setup_malloc!uint(f, c.entries);
3088       if (!c.codewords) return error(f, STBVorbisError.outofmem);
3089     } else {
3090       if (c.sorted_entries) {
3091         c.codeword_lengths = setup_malloc!ubyte(f, c.sorted_entries);
3092         if (!c.codeword_lengths) return error(f, STBVorbisError.outofmem);
3093         c.codewords = cast(uint*)setup_temp_malloc(f, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3094         if (!c.codewords) return error(f, STBVorbisError.outofmem);
3095         values = cast(uint*)setup_temp_malloc(f, cast(int)(*values).sizeof*c.sorted_entries);
3096         if (!values) return error(f, STBVorbisError.outofmem);
3097       }
3098       uint size = c.entries+cast(int)((*c.codewords).sizeof+(*values).sizeof)*c.sorted_entries;
3099       if (size > f.setup_temp_memory_required) f.setup_temp_memory_required = size;
3100     }
3101 
3102     if (!compute_codewords(c, lengths, c.entries, values)) {
3103       if (c.sparse) setup_temp_free(f, values, 0);
3104       return error(f, STBVorbisError.invalid_setup);
3105     }
3106 
3107     if (c.sorted_entries) {
3108       // allocate an extra slot for sentinels
3109       c.sorted_codewords = setup_malloc!uint(f, c.sorted_entries+1);
3110       if (c.sorted_codewords is null) return error(f, STBVorbisError.outofmem);
3111       // allocate an extra slot at the front so that c.sorted_values[-1] is defined
3112       // so that we can catch that case without an extra if
3113       c.sorted_values = setup_malloc!int(f, c.sorted_entries+1);
3114       if (c.sorted_values is null) return error(f, STBVorbisError.outofmem);
3115       ++c.sorted_values;
3116       c.sorted_values[-1] = -1;
3117       compute_sorted_huffman(c, lengths, values);
3118     }
3119 
3120     if (c.sparse) {
3121       setup_temp_free(f, values, cast(int)(*values).sizeof*c.sorted_entries);
3122       setup_temp_free(f, c.codewords, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3123       setup_temp_free(f, lengths, c.entries);
3124       c.codewords = null;
3125     }
3126 
3127     compute_accelerated_huffman(c);
3128 
3129     c.lookup_type = get_bits!4(f);
3130     if (c.lookup_type > 2) return error(f, STBVorbisError.invalid_setup);
3131     if (c.lookup_type > 0) {
3132       ushort* mults;
3133       c.minimum_value = float32_unpack(get_bits!32(f));
3134       c.delta_value = float32_unpack(get_bits!32(f));
3135       c.value_bits = get_bits_add_no!4(f, 1);
3136       c.sequence_p = get_bits!1(f);
3137       if (c.lookup_type == 1) {
3138         c.lookup_values = lookup1_values(c.entries, c.dimensions);
3139       } else {
3140         c.lookup_values = c.entries*c.dimensions;
3141       }
3142       if (c.lookup_values == 0) return error(f, STBVorbisError.invalid_setup);
3143       mults = cast(ushort*)setup_temp_malloc(f, cast(int)(mults[0]).sizeof*c.lookup_values);
3144       if (mults is null) return error(f, STBVorbisError.outofmem);
3145       foreach (immutable j; 0..cast(int)c.lookup_values) {
3146         int q = get_bits_main(f, c.value_bits);
3147         if (q == EOP) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.invalid_setup); }
3148         mults[j] = cast(ushort)q; //k8
3149       }
3150 
3151       version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
3152         if (c.lookup_type == 1) {
3153           int sparse = c.sparse; //len
3154           float last = 0;
3155           // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3156           if (sparse) {
3157             if (c.sorted_entries == 0) goto skip;
3158             c.multiplicands = setup_malloc!codetype(f, c.sorted_entries*c.dimensions);
3159           } else {
3160             c.multiplicands = setup_malloc!codetype(f, c.entries*c.dimensions);
3161           }
3162           if (c.multiplicands is null) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3163           foreach (immutable j; 0..(sparse ? c.sorted_entries : c.entries)) {
3164             uint z = (sparse ? c.sorted_values[j] : j);
3165             uint div = 1;
3166             foreach (immutable k; 0..c.dimensions) {
3167               int off = (z/div)%c.lookup_values;
3168               float val = mults[off];
3169               val = val*c.delta_value+c.minimum_value+last;
3170               c.multiplicands[j*c.dimensions+k] = val;
3171               if (c.sequence_p) last = val;
3172               if (k+1 < c.dimensions) {
3173                  if (div > uint.max/cast(uint)c.lookup_values) {
3174                     setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3175                     return error(f, STBVorbisError.invalid_setup);
3176                  }
3177                  div *= c.lookup_values;
3178               }
3179             }
3180           }
3181           c.lookup_type = 2;
3182           goto skip;
3183         }
3184         //else
3185       }
3186       {
3187         float last = 0;
3188         c.multiplicands = setup_malloc!codetype(f, c.lookup_values);
3189         if (c.multiplicands is null) { setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3190         foreach (immutable j; 0..cast(int)c.lookup_values) {
3191           float val = mults[j]*c.delta_value+c.minimum_value+last;
3192           c.multiplicands[j] = val;
3193           if (c.sequence_p) last = val;
3194         }
3195       }
3196      //version(STB_VORBIS_DIVIDES_IN_CODEBOOK)
3197      skip: // this is versioned out in C
3198       setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3199     }
3200   }
3201 
3202   // time domain transfers (notused)
3203   x = get_bits_add_no!6(f, 1);
3204   foreach (immutable i; 0..x) {
3205     auto z = get_bits!16(f);
3206     if (z != 0) return error(f, STBVorbisError.invalid_setup);
3207   }
3208 
3209   // Floors
3210   f.floor_count = get_bits_add_no!6(f, 1);
3211   f.floor_config = setup_malloc!Floor(f, f.floor_count);
3212   if (f.floor_config is null) return error(f, STBVorbisError.outofmem);
3213   foreach (immutable i; 0..f.floor_count) {
3214     f.floor_types[i] = get_bits!16(f);
3215     if (f.floor_types[i] > 1) return error(f, STBVorbisError.invalid_setup);
3216     if (f.floor_types[i] == 0) {
3217       Floor0* g = &f.floor_config[i].floor0;
3218       g.order = get_bits!8(f);
3219       g.rate = get_bits!16(f);
3220       g.bark_map_size = get_bits!16(f);
3221       g.amplitude_bits = get_bits!6(f);
3222       g.amplitude_offset = get_bits!8(f);
3223       g.number_of_books = get_bits_add_no!4(f, 1);
3224       foreach (immutable j; 0..g.number_of_books) g.book_list[j] = get_bits!8(f);
3225       return error(f, STBVorbisError.feature_not_supported);
3226     } else {
3227       Point[31*8+2] p;
3228       Floor1 *g = &f.floor_config[i].floor1;
3229       int max_class = -1;
3230       g.partitions = get_bits!5(f);
3231       foreach (immutable j; 0..g.partitions) {
3232         g.partition_class_list[j] = get_bits!4(f);
3233         if (g.partition_class_list[j] > max_class) max_class = g.partition_class_list[j];
3234       }
3235       foreach (immutable j; 0..max_class+1) {
3236         g.class_dimensions[j] = get_bits_add_no!3(f, 1);
3237         g.class_subclasses[j] = get_bits!2(f);
3238         if (g.class_subclasses[j]) {
3239           g.class_masterbooks[j] = get_bits!8(f);
3240           if (g.class_masterbooks[j] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3241         }
3242         foreach (immutable k; 0..1<<g.class_subclasses[j]) {
3243           g.subclass_books[j].ptr[k] = get_bits!8(f)-1;
3244           if (g.subclass_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3245         }
3246       }
3247       g.floor1_multiplier = get_bits_add_no!2(f, 1);
3248       g.rangebits = get_bits!4(f);
3249       g.Xlist[0] = 0;
3250       g.Xlist[1] = cast(ushort)(1<<g.rangebits); //k8
3251       g.values = 2;
3252       foreach (immutable j; 0..g.partitions) {
3253         int c = g.partition_class_list[j];
3254         foreach (immutable k; 0..g.class_dimensions[c]) {
3255           g.Xlist[g.values] = cast(ushort)get_bits_main(f, g.rangebits); //k8
3256           ++g.values;
3257         }
3258       }
3259       assert(g.values <= ushort.max);
3260       // precompute the sorting
3261       foreach (ushort j; 0..cast(ushort)g.values) {
3262         p[j].x = g.Xlist[j];
3263         p[j].y = j;
3264       }
3265       qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
3266       foreach (uint j; 0..g.values) g.sorted_order.ptr[j] = cast(ubyte)p.ptr[j].y;
3267       // precompute the neighbors
3268       foreach (uint j; 2..g.values) {
3269         ushort low = void, hi = void;
3270         neighbors(g.Xlist.ptr, j, &low, &hi);
3271         assert(low <= ubyte.max);
3272         assert(hi <= ubyte.max);
3273         g.neighbors[j].ptr[0] = cast(ubyte)low;
3274         g.neighbors[j].ptr[1] = cast(ubyte)hi;
3275       }
3276       if (g.values > longest_floorlist) longest_floorlist = g.values;
3277     }
3278   }
3279 
3280   // Residue
3281   f.residue_count = get_bits_add_no!6(f, 1);
3282   f.residue_config = setup_malloc!Residue(f, f.residue_count);
3283   if (f.residue_config is null) return error(f, STBVorbisError.outofmem);
3284   memset(f.residue_config, 0, f.residue_count*(f.residue_config[0]).sizeof);
3285   foreach (immutable i; 0..f.residue_count) {
3286     ubyte[64] residue_cascade;
3287     Residue* r = f.residue_config+i;
3288     f.residue_types[i] = get_bits!16(f);
3289     if (f.residue_types[i] > 2) return error(f, STBVorbisError.invalid_setup);
3290     r.begin = get_bits!24(f);
3291     r.end = get_bits!24(f);
3292     if (r.end < r.begin) return error(f, STBVorbisError.invalid_setup);
3293     r.part_size = get_bits_add_no!24(f, 1);
3294     r.classifications = get_bits_add_no!6(f, 1);
3295     r.classbook = get_bits!8(f);
3296     if (r.classbook >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3297     foreach (immutable j; 0..r.classifications) {
3298       ubyte high_bits = 0;
3299       ubyte low_bits = get_bits!3(f);
3300       if (get_bits!1(f)) high_bits = get_bits!5(f);
3301       assert(high_bits*8+low_bits <= ubyte.max);
3302       residue_cascade[j] = cast(ubyte)(high_bits*8+low_bits);
3303     }
3304     static assert(r.residue_books[0].sizeof == 16);
3305     r.residue_books = setup_malloc!(short[8])(f, r.classifications);
3306     if (r.residue_books is null) return error(f, STBVorbisError.outofmem);
3307     foreach (immutable j; 0..r.classifications) {
3308       foreach (immutable k; 0..8) {
3309         if (residue_cascade[j]&(1<<k)) {
3310           r.residue_books[j].ptr[k] = get_bits!8(f);
3311           if (r.residue_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3312         } else {
3313           r.residue_books[j].ptr[k] = -1;
3314         }
3315       }
3316     }
3317     // precompute the classifications[] array to avoid inner-loop mod/divide
3318     // call it 'classdata' since we already have r.classifications
3319     r.classdata = setup_malloc!(ubyte*)(f, f.codebooks[r.classbook].entries);
3320     if (!r.classdata) return error(f, STBVorbisError.outofmem);
3321     memset(r.classdata, 0, (*r.classdata).sizeof*f.codebooks[r.classbook].entries);
3322     foreach (immutable j; 0..f.codebooks[r.classbook].entries) {
3323       int classwords = f.codebooks[r.classbook].dimensions;
3324       int temp = j;
3325       r.classdata[j] = setup_malloc!ubyte(f, classwords);
3326       if (r.classdata[j] is null) return error(f, STBVorbisError.outofmem);
3327       foreach_reverse (immutable k; 0..classwords) {
3328         assert(temp%r.classifications >= 0 && temp%r.classifications <= ubyte.max);
3329         r.classdata[j][k] = cast(ubyte)(temp%r.classifications);
3330         temp /= r.classifications;
3331       }
3332     }
3333   }
3334 
3335   f.mapping_count = get_bits_add_no!6(f, 1);
3336   f.mapping = setup_malloc!Mapping(f, f.mapping_count);
3337   if (f.mapping is null) return error(f, STBVorbisError.outofmem);
3338   memset(f.mapping, 0, f.mapping_count*(*f.mapping).sizeof);
3339   foreach (immutable i; 0..f.mapping_count) {
3340     Mapping* m = f.mapping+i;
3341     int mapping_type = get_bits!16(f);
3342     if (mapping_type != 0) return error(f, STBVorbisError.invalid_setup);
3343     m.chan = setup_malloc!MappingChannel(f, f.vrchannels);
3344     if (m.chan is null) return error(f, STBVorbisError.outofmem);
3345     m.submaps = (get_bits!1(f) ? get_bits_add_no!4(f, 1) : 1);
3346     if (m.submaps > max_submaps) max_submaps = m.submaps;
3347     if (get_bits!1(f)) {
3348       m.coupling_steps = get_bits_add_no!8(f, 1);
3349       foreach (immutable k; 0..m.coupling_steps) {
3350         m.chan[k].magnitude = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3351         m.chan[k].angle = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3352         if (m.chan[k].magnitude >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3353         if (m.chan[k].angle     >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3354         if (m.chan[k].magnitude == m.chan[k].angle) return error(f, STBVorbisError.invalid_setup);
3355       }
3356     } else {
3357       m.coupling_steps = 0;
3358     }
3359 
3360     // reserved field
3361     if (get_bits!2(f)) return error(f, STBVorbisError.invalid_setup);
3362     if (m.submaps > 1) {
3363       foreach (immutable j; 0..f.vrchannels) {
3364         m.chan[j].mux = get_bits!4(f);
3365         if (m.chan[j].mux >= m.submaps) return error(f, STBVorbisError.invalid_setup);
3366       }
3367     } else {
3368       // @SPECIFICATION: this case is missing from the spec
3369       foreach (immutable j; 0..f.vrchannels) m.chan[j].mux = 0;
3370     }
3371     foreach (immutable j; 0..m.submaps) {
3372       get_bits!8(f); // discard
3373       m.submap_floor[j] = get_bits!8(f);
3374       m.submap_residue[j] = get_bits!8(f);
3375       if (m.submap_floor[j] >= f.floor_count) return error(f, STBVorbisError.invalid_setup);
3376       if (m.submap_residue[j] >= f.residue_count) return error(f, STBVorbisError.invalid_setup);
3377     }
3378   }
3379 
3380   // Modes
3381   f.mode_count = get_bits_add_no!6(f, 1);
3382   foreach (immutable i; 0..f.mode_count) {
3383     Mode* m = f.mode_config.ptr+i;
3384     m.blockflag = get_bits!1(f);
3385     m.windowtype = get_bits!16(f);
3386     m.transformtype = get_bits!16(f);
3387     m.mapping = get_bits!8(f);
3388     if (m.windowtype != 0) return error(f, STBVorbisError.invalid_setup);
3389     if (m.transformtype != 0) return error(f, STBVorbisError.invalid_setup);
3390     if (m.mapping >= f.mapping_count) return error(f, STBVorbisError.invalid_setup);
3391   }
3392 
3393   flush_packet(f);
3394 
3395   f.previous_length = 0;
3396 
3397   foreach (immutable i; 0..f.vrchannels) {
3398     f.channel_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1);
3399     f.previous_window.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3400     f.finalY.ptr[i]          = setup_malloc!short(f, longest_floorlist);
3401     if (f.channel_buffers.ptr[i] is null || f.previous_window.ptr[i] is null || f.finalY.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3402     version(STB_VORBIS_NO_DEFER_FLOOR) {
3403       f.floor_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3404       if (f.floor_buffers.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3405     }
3406   }
3407 
3408   if (!init_blocksize(f, 0, f.blocksize_0)) return false;
3409   if (!init_blocksize(f, 1, f.blocksize_1)) return false;
3410   f.blocksize.ptr[0] = f.blocksize_0;
3411   f.blocksize.ptr[1] = f.blocksize_1;
3412 
3413   version(STB_VORBIS_DIVIDE_TABLE) {
3414     if (integer_divide_table[1].ptr[1] == 0) {
3415       foreach (immutable i; 0..DIVTAB_NUMER) foreach (immutable j; 1..DIVTAB_DENOM) integer_divide_table[i].ptr[j] = i/j;
3416     }
3417   }
3418 
3419   // compute how much temporary memory is needed
3420 
3421   // 1.
3422   {
3423     uint imdct_mem = (f.blocksize_1*cast(uint)(float).sizeof>>1);
3424     uint classify_mem;
3425     int max_part_read = 0;
3426     foreach (immutable i; 0..f.residue_count) {
3427       Residue* r = f.residue_config+i;
3428       int n_read = r.end-r.begin;
3429       int part_read = n_read/r.part_size;
3430       if (part_read > max_part_read) max_part_read = part_read;
3431     }
3432     version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
3433       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(int*).sizeof);
3434     } else {
3435       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(ubyte*).sizeof);
3436     }
3437     f.temp_memory_required = classify_mem;
3438     if (imdct_mem > f.temp_memory_required) f.temp_memory_required = imdct_mem;
3439   }
3440 
3441   f.first_decode = true;
3442 
3443   /+
3444   if (f.alloc.alloc_buffer) {
3445     debug(stb_vorbis) assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3446     // check if there's enough temp memory so we don't error later
3447     if (f.setup_offset+ /*(*f).sizeof+*/ f.temp_memory_required > cast(uint)f.temp_offset) return error(f, STBVorbisError.outofmem);
3448   }
3449   +/
3450 
3451   f.first_audio_page_offset = f.fileOffset();
3452 
3453   return true;
3454 }
3455 
3456 /+
3457 private int vorbis_search_for_page_pushdata (VorbisDecoder f, ubyte* data, int data_len) {
3458   import core.stdc.string : memcmp;
3459 
3460   foreach (immutable i; 0..f.page_crc_tests) f.scan.ptr[i].bytes_done = 0;
3461 
3462   // if we have room for more scans, search for them first, because
3463   // they may cause us to stop early if their header is incomplete
3464   if (f.page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
3465     if (data_len < 4) return 0;
3466     data_len -= 3; // need to look for 4-byte sequence, so don't miss one that straddles a boundary
3467     foreach (immutable i; 0..data_len) {
3468       if (data[i] == 0x4f) {
3469         if (memcmp(data+i, ogg_page_header.ptr, 4) == 0) {
3470           // make sure we have the whole page header
3471           if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
3472             // only read up to this page start, so hopefully we'll
3473             // have the whole page header start next time
3474             data_len = i;
3475             break;
3476           }
3477           // ok, we have it all; compute the length of the page
3478           auto len = 27+data[i+26];
3479           foreach (immutable j; 0..data[i+26]) len += data[i+27+j];
3480           // scan everything up to the embedded crc (which we must 0)
3481           uint crc = 0;
3482           foreach (immutable j; 0..22) crc = crc32_update(crc, data[i+j]);
3483           // now process 4 0-bytes
3484           foreach (immutable j; 22..26) crc = crc32_update(crc, 0);
3485           // len is the total number of bytes we need to scan
3486           auto n = f.page_crc_tests++;
3487           f.scan.ptr[n].bytes_left = len-/*j*/26;
3488           f.scan.ptr[n].crc_so_far = crc;
3489           f.scan.ptr[n].goal_crc = data[i+22]+(data[i+23]<<8)+(data[i+24]<<16)+(data[i+25]<<24);
3490           // if the last frame on a page is continued to the next, then
3491           // we can't recover the sample_loc immediately
3492           if (data[i+27+data[i+26]-1] == 255) {
3493             f.scan.ptr[n].sample_loc = ~0;
3494           } else {
3495             f.scan.ptr[n].sample_loc = data[i+6]+(data[i+7]<<8)+(data[i+8]<<16)+(data[i+9]<<24);
3496           }
3497           f.scan.ptr[n].bytes_done = i+26/*j*/;
3498           if (f.page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) break;
3499           // keep going if we still have room for more
3500         }
3501       }
3502     }
3503   }
3504 
3505   for (uint i = 0; i < f.page_crc_tests; ) {
3506     int nn = f.scan.ptr[i].bytes_done;
3507     int m = f.scan.ptr[i].bytes_left;
3508     if (m > data_len-nn) m = data_len-nn;
3509     // m is the bytes to scan in the current chunk
3510     uint crc = f.scan.ptr[i].crc_so_far;
3511     foreach (immutable j; 0..m) crc = crc32_update(crc, data[nn+j]);
3512     f.scan.ptr[i].bytes_left -= m;
3513     f.scan.ptr[i].crc_so_far = crc;
3514     if (f.scan.ptr[i].bytes_left == 0) {
3515       // does it match?
3516       if (f.scan.ptr[i].crc_so_far == f.scan.ptr[i].goal_crc) {
3517         // Houston, we have page
3518         data_len = nn+m; // consumption amount is wherever that scan ended
3519         f.page_crc_tests = -1; // drop out of page scan mode
3520         f.previous_length = 0; // decode-but-don't-output one frame
3521         f.next_seg = -1;       // start a new page
3522         f.current_loc = f.scan.ptr[i].sample_loc; // set the current sample location to the amount we'd have decoded had we decoded this page
3523         f.current_loc_valid = f.current_loc != ~0U;
3524         return data_len;
3525       }
3526       // delete entry
3527       f.scan.ptr[i] = f.scan.ptr[--f.page_crc_tests];
3528     } else {
3529       ++i;
3530     }
3531   }
3532 
3533   return data_len;
3534 }
3535 +/
3536 
3537 private uint vorbis_find_page (VorbisDecoder f, uint* end, uint* last) {
3538   for (;;) {
3539     if (f.eof) return 0;
3540     auto n = get8(f);
3541     if (n == 0x4f) { // page header candidate
3542       uint retry_loc = f.fileOffset;
3543       // check if we're off the end of a file_section stream
3544       if (retry_loc-25 > f.stream_len) return 0;
3545       // check the rest of the header
3546       int i = void;
3547       for (i = 1; i < 4; ++i) if (get8(f) != ogg_page_header[i]) break;
3548       if (f.eof) return 0;
3549       if (i == 4) {
3550         ubyte[27] header;
3551         //for (i=0; i < 4; ++i) header[i] = ogg_page_header[i];
3552         header[0..4] = cast(immutable(ubyte)[])ogg_page_header[0..4];
3553         for (i = 4; i < 27; ++i) header[i] = get8(f);
3554         if (f.eof) return 0;
3555         if (header[4] != 0) goto invalid;
3556         uint goal = header[22]+(header[23]<<8)+(header[24]<<16)+(header[25]<<24);
3557         for (i = 22; i < 26; ++i) header[i] = 0;
3558         uint crc = 0;
3559         for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]);
3560         uint len = 0;
3561         for (i = 0; i < header[26]; ++i) {
3562           auto s = get8(f);
3563           crc = crc32_update(crc, s);
3564           len += s;
3565         }
3566         if (len && f.eof) return 0;
3567         for (i = 0; i < len; ++i) crc = crc32_update(crc, get8(f));
3568         // finished parsing probable page
3569         if (crc == goal) {
3570           // we could now check that it's either got the last
3571           // page flag set, OR it's followed by the capture
3572           // pattern, but I guess TECHNICALLY you could have
3573           // a file with garbage between each ogg page and recover
3574           // from it automatically? So even though that paranoia
3575           // might decrease the chance of an invalid decode by
3576           // another 2^32, not worth it since it would hose those
3577           // invalid-but-useful files?
3578           if (end) *end = f.fileOffset;
3579           if (last) *last = (header[5]&0x04 ? 1 : 0);
3580           set_file_offset(f, retry_loc-1);
3581           return 1;
3582         }
3583       }
3584      invalid:
3585       // not a valid page, so rewind and look for next one
3586       set_file_offset(f, retry_loc);
3587     }
3588   }
3589   assert(0);
3590 }
3591 
3592 enum SAMPLE_unknown = 0xffffffff;
3593 
3594 // seeking is implemented with a binary search, which narrows down the range to
3595 // 64K, before using a linear search (because finding the synchronization
3596 // pattern can be expensive, and the chance we'd find the end page again is
3597 // relatively high for small ranges)
3598 //
3599 // two initial interpolation-style probes are used at the start of the search
3600 // to try to bound either side of the binary search sensibly, while still
3601 // working in O(log n) time if they fail.
3602 private int get_seek_page_info (VorbisDecoder f, ProbedPage* z) {
3603   ubyte[27] header;
3604   ubyte[255] lacing;
3605 
3606   // record where the page starts
3607   z.page_start = f.fileOffset;
3608 
3609   // parse the header
3610   getn(f, header.ptr, 27);
3611   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') return 0;
3612   getn(f, lacing.ptr, header[26]);
3613 
3614   // determine the length of the payload
3615   uint len = 0;
3616   foreach (immutable i; 0..header[26]) len += lacing[i];
3617 
3618   // this implies where the page ends
3619   z.page_end = z.page_start+27+header[26]+len;
3620 
3621   // read the last-decoded sample out of the data
3622   z.last_decoded_sample = header[6]+(header[7]<<8)+(header[8]<<16)+(header[9]<<24);
3623 
3624   // restore file state to where we were
3625   set_file_offset(f, z.page_start);
3626   return 1;
3627 }
3628 
3629 // rarely used function to seek back to the preceeding page while finding the start of a packet
3630 private int go_to_page_before (VorbisDecoder f, uint limit_offset) {
3631   uint previous_safe, end;
3632 
3633   // now we want to seek back 64K from the limit
3634   if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) {
3635     previous_safe = limit_offset-65536;
3636   } else {
3637     previous_safe = f.first_audio_page_offset;
3638   }
3639 
3640   set_file_offset(f, previous_safe);
3641 
3642   while (vorbis_find_page(f, &end, null)) {
3643     if (end >= limit_offset && f.fileOffset < limit_offset) return 1;
3644     set_file_offset(f, end);
3645   }
3646 
3647   return 0;
3648 }
3649 
3650 // implements the search logic for finding a page and starting decoding. if
3651 // the function succeeds, current_loc_valid will be true and current_loc will
3652 // be less than or equal to the provided sample number (the closer the
3653 // better).
3654 private int seek_to_sample_coarse (VorbisDecoder f, uint sample_number) {
3655   ProbedPage left, right, mid;
3656   int i, start_seg_with_known_loc, end_pos, page_start;
3657   uint delta, stream_length, padding;
3658   double offset, bytes_per_sample;
3659   int probe = 0;
3660 
3661   // find the last page and validate the target sample
3662   stream_length = f.streamLengthInSamples;
3663   if (stream_length == 0) return error(f, STBVorbisError.seek_without_length);
3664   if (sample_number > stream_length) return error(f, STBVorbisError.seek_invalid);
3665 
3666   // this is the maximum difference between the window-center (which is the
3667   // actual granule position value), and the right-start (which the spec
3668   // indicates should be the granule position (give or take one)).
3669   padding = ((f.blocksize_1-f.blocksize_0)>>2);
3670   if (sample_number < padding) sample_number = 0; else sample_number -= padding;
3671 
3672   left = f.p_first;
3673   while (left.last_decoded_sample == ~0U) {
3674     // (untested) the first page does not have a 'last_decoded_sample'
3675     set_file_offset(f, left.page_end);
3676     if (!get_seek_page_info(f, &left)) goto error;
3677   }
3678 
3679   right = f.p_last;
3680   debug(stb_vorbis) assert(right.last_decoded_sample != ~0U);
3681 
3682   // starting from the start is handled differently
3683   if (sample_number <= left.last_decoded_sample) {
3684     f.seekStart;
3685     return 1;
3686   }
3687 
3688   while (left.page_end != right.page_start) {
3689     debug(stb_vorbis) assert(left.page_end < right.page_start);
3690     // search range in bytes
3691     delta = right.page_start-left.page_end;
3692     if (delta <= 65536) {
3693       // there's only 64K left to search - handle it linearly
3694       set_file_offset(f, left.page_end);
3695     } else {
3696       if (probe < 2) {
3697         if (probe == 0) {
3698           // first probe (interpolate)
3699           double data_bytes = right.page_end-left.page_start;
3700           bytes_per_sample = data_bytes/right.last_decoded_sample;
3701           offset = left.page_start+bytes_per_sample*(sample_number-left.last_decoded_sample);
3702         } else {
3703           // second probe (try to bound the other side)
3704           double error = (cast(double)sample_number-mid.last_decoded_sample)*bytes_per_sample;
3705           if (error >= 0 && error <  8000) error =  8000;
3706           if (error <  0 && error > -8000) error = -8000;
3707           offset += error*2;
3708         }
3709 
3710         // ensure the offset is valid
3711         if (offset < left.page_end) offset = left.page_end;
3712         if (offset > right.page_start-65536) offset = right.page_start-65536;
3713 
3714         set_file_offset(f, cast(uint)offset);
3715       } else {
3716         // binary search for large ranges (offset by 32K to ensure
3717         // we don't hit the right page)
3718         set_file_offset(f, left.page_end+(delta/2)-32768);
3719       }
3720 
3721       if (!vorbis_find_page(f, null, null)) goto error;
3722     }
3723 
3724     for (;;) {
3725       if (!get_seek_page_info(f, &mid)) goto error;
3726       if (mid.last_decoded_sample != ~0U) break;
3727       // (untested) no frames end on this page
3728       set_file_offset(f, mid.page_end);
3729       debug(stb_vorbis) assert(mid.page_start < right.page_start);
3730     }
3731 
3732     // if we've just found the last page again then we're in a tricky file,
3733     // and we're close enough.
3734     if (mid.page_start == right.page_start) break;
3735 
3736     if (sample_number < mid.last_decoded_sample) right = mid; else left = mid;
3737 
3738     ++probe;
3739   }
3740 
3741   // seek back to start of the last packet
3742   page_start = left.page_start;
3743   set_file_offset(f, page_start);
3744   if (!start_page(f)) return error(f, STBVorbisError.seek_failed);
3745   end_pos = f.end_seg_with_known_loc;
3746   debug(stb_vorbis) assert(end_pos >= 0);
3747 
3748   for (;;) {
3749     for (i = end_pos; i > 0; --i) if (f.segments.ptr[i-1] != 255) break;
3750     start_seg_with_known_loc = i;
3751     if (start_seg_with_known_loc > 0 || !(f.page_flag&PAGEFLAG_continued_packet)) break;
3752     // (untested) the final packet begins on an earlier page
3753     if (!go_to_page_before(f, page_start)) goto error;
3754     page_start = f.fileOffset;
3755     if (!start_page(f)) goto error;
3756     end_pos = f.segment_count-1;
3757   }
3758 
3759   // prepare to start decoding
3760   f.current_loc_valid = false;
3761   f.last_seg = false;
3762   f.valid_bits = 0;
3763   f.packet_bytes = 0;
3764   f.bytes_in_seg = 0;
3765   f.previous_length = 0;
3766   f.next_seg = start_seg_with_known_loc;
3767 
3768   for (i = 0; i < start_seg_with_known_loc; ++i) skip(f, f.segments.ptr[i]);
3769 
3770   // start decoding (optimizable - this frame is generally discarded)
3771   if (!vorbis_pump_first_frame(f)) return 0;
3772   if (f.current_loc > sample_number) return error(f, STBVorbisError.seek_failed);
3773   return 1;
3774 
3775 error:
3776   // try to restore the file to a valid state
3777   f.seekStart;
3778   return error(f, STBVorbisError.seek_failed);
3779 }
3780 
3781 // the same as vorbis_decode_initial, but without advancing
3782 private int peek_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
3783   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) return 0;
3784 
3785   // either 1 or 2 bytes were read, figure out which so we can rewind
3786   int bits_read = 1+ilog(f.mode_count-1);
3787   if (f.mode_config.ptr[*mode].blockflag) bits_read += 2;
3788   int bytes_read = (bits_read+7)/8;
3789 
3790   f.bytes_in_seg += bytes_read;
3791   f.packet_bytes -= bytes_read;
3792   skip(f, -bytes_read);
3793   if (f.next_seg == -1) f.next_seg = f.segment_count-1; else --f.next_seg;
3794   f.valid_bits = 0;
3795 
3796   return 1;
3797 }
3798 
3799 // ////////////////////////////////////////////////////////////////////////// //
3800 // utility and supporting functions for getting s16 samples
3801 enum PLAYBACK_MONO  = (1<<0);
3802 enum PLAYBACK_LEFT  = (1<<1);
3803 enum PLAYBACK_RIGHT = (1<<2);
3804 
3805 enum L = (PLAYBACK_LEFT |PLAYBACK_MONO);
3806 enum C = (PLAYBACK_LEFT |PLAYBACK_RIGHT|PLAYBACK_MONO);
3807 enum R = (PLAYBACK_RIGHT|PLAYBACK_MONO);
3808 
3809 immutable byte[6][7] channel_position = [
3810   [ 0 ],
3811   [ C ],
3812   [ L, R ],
3813   [ L, C, R ],
3814   [ L, R, L, R ],
3815   [ L, C, R, L, R ],
3816   [ L, C, R, L, R, C ],
3817 ];
3818 
3819 
3820 version(STB_VORBIS_NO_FAST_SCALED_FLOAT) {
3821   enum declfcvar(string name) = "{}";
3822   template FAST_SCALED_FLOAT_TO_INT(string x, string s) {
3823     static assert(s == "15");
3824     enum FAST_SCALED_FLOAT_TO_INT = q{import core.stdc.math : lrintf; int v = lrintf((${x})*32768.0f);}.cmacroFixVars!"x"(x);
3825   }
3826 } else {
3827   //k8: actually, this is only marginally faster than using `lrintf()`, but anyway...
3828   align(1) union float_conv {
3829   align(1):
3830     float f;
3831     int i;
3832   }
3833   enum declfcvar(string name) = "float_conv "~name~" = void;";
3834   static assert(float_conv.i.sizeof == 4 && float_conv.f.sizeof == 4);
3835   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
3836   //#define check_endianness()
3837   enum MAGIC(string SHIFT) = q{(1.5f*(1<<(23-${SHIFT}))+0.5f/(1<<${SHIFT}))}.cmacroFixVars!("SHIFT")(SHIFT);
3838   enum ADDEND(string SHIFT) = q{(((150-${SHIFT})<<23)+(1<<22))}.cmacroFixVars!("SHIFT")(SHIFT);
3839   enum FAST_SCALED_FLOAT_TO_INT(string x, string s) = q{temp.f = (${x})+${MAGIC}; int v = temp.i-${ADDEND};}
3840     .cmacroFixVars!("x", "s", "MAGIC", "ADDEND")(x, s, MAGIC!(s), ADDEND!(s));
3841 }
3842 
3843 private void copy_samples (short* dest, float* src, int len) {
3844   //check_endianness();
3845   mixin(declfcvar!"temp");
3846   foreach (immutable _; 0..len) {
3847     mixin(FAST_SCALED_FLOAT_TO_INT!("*src", "15"));
3848     if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3849     *dest++ = cast(short)v; //k8
3850     ++src;
3851   }
3852 }
3853 
3854 private void compute_samples (int mask, short* output, int num_c, float** data, int d_offset, int len) {
3855   import core.stdc.string : memset;
3856   enum BUFFER_SIZE = 32;
3857   float[BUFFER_SIZE] buffer;
3858   int n = BUFFER_SIZE;
3859   //check_endianness();
3860   mixin(declfcvar!"temp");
3861   for (uint o = 0; o < len; o += BUFFER_SIZE) {
3862     memset(buffer.ptr, 0, (buffer).sizeof);
3863     if (o+n > len) n = len-o;
3864     foreach (immutable j; 0..num_c) {
3865       if (channel_position[num_c].ptr[j]&mask) foreach (immutable i; 0..n) buffer.ptr[i] += data[j][d_offset+o+i];
3866     }
3867     foreach (immutable i; 0..n) {
3868       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3869       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3870       output[o+i] = cast(short)v; //k8
3871     }
3872   }
3873 }
3874 
3875 private void compute_stereo_samples (short* output, int num_c, float** data, int d_offset, int len) {
3876   import core.stdc.string : memset;
3877 
3878   enum BUFFER_SIZE = 32;
3879   float[BUFFER_SIZE] buffer;
3880   int n = BUFFER_SIZE>>1;
3881   // o is the offset in the source data
3882   //check_endianness();
3883   mixin(declfcvar!"temp");
3884   for (uint o = 0; o < len; o += BUFFER_SIZE>>1) {
3885     // o2 is the offset in the output data
3886     int o2 = o<<1;
3887     memset(buffer.ptr, 0, buffer.sizeof);
3888     if (o+n > len) n = len-o;
3889     foreach (immutable j; 0..num_c) {
3890       int m = channel_position[num_c].ptr[j]&(PLAYBACK_LEFT|PLAYBACK_RIGHT);
3891       if (m == (PLAYBACK_LEFT|PLAYBACK_RIGHT)) {
3892         foreach (immutable i; 0..n) {
3893           buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3894           buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3895         }
3896       } else if (m == PLAYBACK_LEFT) {
3897         foreach (immutable i; 0..n) buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3898       } else if (m == PLAYBACK_RIGHT) {
3899         foreach (immutable i; 0..n) buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3900       }
3901     }
3902     foreach (immutable i; 0..n<<1) {
3903       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3904       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3905       output[o2+i] = cast(short)v; //k8
3906     }
3907   }
3908 }
3909 
3910 private void convert_samples_short (int buf_c, short** buffer, int b_offset, int data_c, float** data, int d_offset, int samples) {
3911   import core.stdc.string : memset;
3912 
3913   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3914     immutable int[2][3] channel_selector = [ [0,0], [PLAYBACK_MONO,0], [PLAYBACK_LEFT, PLAYBACK_RIGHT] ];
3915     foreach (immutable i; 0..buf_c) compute_samples(channel_selector[buf_c].ptr[i], buffer[i]+b_offset, data_c, data, d_offset, samples);
3916   } else {
3917     int limit = (buf_c < data_c ? buf_c : data_c);
3918     foreach (immutable i; 0..limit) copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
3919     foreach (immutable i; limit..buf_c) memset(buffer[i]+b_offset, 0, short.sizeof*samples);
3920   }
3921 }
3922 
3923 private void convert_channels_short_interleaved (int buf_c, short* buffer, int data_c, float** data, int d_offset, int len) {
3924   //check_endianness();
3925   mixin(declfcvar!"temp");
3926   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3927     debug(stb_vorbis) assert(buf_c == 2);
3928     foreach (immutable i; 0..buf_c) compute_stereo_samples(buffer, data_c, data, d_offset, len);
3929   } else {
3930     int limit = (buf_c < data_c ? buf_c : data_c);
3931     foreach (immutable j; 0..len) {
3932       foreach (immutable i; 0..limit) {
3933         float f = data[i][d_offset+j];
3934         mixin(FAST_SCALED_FLOAT_TO_INT!("f", "15"));//data[i][d_offset+j], 15);
3935         if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3936         *buffer++ = cast(short)v; //k8
3937       }
3938       foreach (immutable i; limit..buf_c) *buffer++ = 0;
3939     }
3940   }
3941 }
3942 } // @nogc
3943 
3944 
3945 public class VorbisDecoder {
3946   // return # of bytes read, 0 on eof, -1 on error
3947   // if called with `buf is null`, do `close()`
3948   alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3949 
3950   //TODO
3951   static struct Allocator {
3952   static nothrow @nogc: // because
3953     void* alloc (uint sz, VorbisDecoder vb) {
3954       import core.stdc.stdlib : malloc;
3955       return malloc(sz);
3956     }
3957     void free (void* p, VorbisDecoder vb) {
3958       import core.stdc.stdlib : free;
3959       free(p);
3960     }
3961     void* allocTemp (uint sz, VorbisDecoder vb) {
3962       import core.stdc.stdlib : malloc;
3963       return malloc(sz);
3964     }
3965     void freeTemp (void* p, uint sz, VorbisDecoder vb) {
3966       import core.stdc.stdlib : free;
3967       free(p);
3968     }
3969     uint tempSave (VorbisDecoder vb) { return 0; }
3970     void tempRestore (uint pos, VorbisDecoder vb) {}
3971   }
3972 
3973 nothrow @nogc:
3974 private:
3975   bool isOpened;
3976   readCB stmread;
3977   uint stlastofs = uint.max;
3978   uint stst;
3979   uint stpos;
3980   uint stend;
3981   bool stclose;
3982   FILE* stfl;
3983 
3984 private:
3985   //ubyte* stream;
3986   //ubyte* stream_start;
3987   //ubyte* stream_end;
3988   //uint stream_len;
3989 
3990   /+bool push_mode;+/
3991 
3992   uint first_audio_page_offset;
3993 
3994   ProbedPage p_first, p_last;
3995 
3996   // memory management
3997   Allocator alloc;
3998   int setup_offset;
3999   int temp_offset;
4000 
4001   // run-time results
4002   bool eof = true;
4003   STBVorbisError error;
4004 
4005   // header info
4006   int[2] blocksize;
4007   int blocksize_0, blocksize_1;
4008   int codebook_count;
4009   Codebook* codebooks;
4010   int floor_count;
4011   ushort[64] floor_types; // varies
4012   Floor* floor_config;
4013   int residue_count;
4014   ushort[64] residue_types; // varies
4015   Residue* residue_config;
4016   int mapping_count;
4017   Mapping* mapping;
4018   int mode_count;
4019   Mode[64] mode_config;  // varies
4020 
4021   uint total_samples;
4022 
4023   // decode buffer
4024   float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
4025   float*[STB_VORBIS_MAX_CHANNELS] outputs;
4026 
4027   float*[STB_VORBIS_MAX_CHANNELS] previous_window;
4028   int previous_length;
4029 
4030   version(STB_VORBIS_NO_DEFER_FLOOR) {
4031     float*[STB_VORBIS_MAX_CHANNELS] floor_buffers;
4032   } else {
4033     short*[STB_VORBIS_MAX_CHANNELS] finalY;
4034   }
4035 
4036   uint current_loc; // sample location of next frame to decode
4037   int current_loc_valid;
4038 
4039   // per-blocksize precomputed data
4040 
4041   // twiddle factors
4042   float*[2] A, B, C;
4043   float*[2] window;
4044   ushort*[2] bit_reverse;
4045 
4046   // current page/packet/segment streaming info
4047   uint serial; // stream serial number for verification
4048   int last_page;
4049   int segment_count;
4050   ubyte[255] segments;
4051   ubyte page_flag;
4052   ubyte bytes_in_seg;
4053   ubyte first_decode;
4054   int next_seg;
4055   int last_seg;  // flag that we're on the last segment
4056   int last_seg_which; // what was the segment number of the last seg?
4057   uint acc;
4058   int valid_bits;
4059   int packet_bytes;
4060   int end_seg_with_known_loc;
4061   uint known_loc_for_packet;
4062   int discard_samples_deferred;
4063   uint samples_output;
4064 
4065   // push mode scanning
4066   /+
4067   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
4068   CRCscan[STB_VORBIS_PUSHDATA_CRC_COUNT] scan;
4069   +/
4070 
4071   // sample-access
4072   int channel_buffer_start;
4073   int channel_buffer_end;
4074 
4075 private: // k8: 'cause i'm evil
4076   // user-accessible info
4077   uint sample_rate;
4078   int vrchannels;
4079 
4080   uint setup_memory_required;
4081   uint temp_memory_required;
4082   uint setup_temp_memory_required;
4083 
4084   bool read_comments;
4085   ubyte* comment_data;
4086   uint comment_size;
4087 
4088   // functions to get comment data
4089   uint comment_data_pos;
4090 
4091 private:
4092   int rawRead (void[] buf) {
4093     static if (__VERSION__ > 2067) pragma(inline, true);
4094     if (isOpened && buf.length > 0 && stpos < stend) {
4095       if (stend-stpos < buf.length) buf = buf[0..stend-stpos];
4096       auto rd = stmread(buf, stpos, this);
4097       if (rd > 0) stpos += rd;
4098       return rd;
4099     }
4100     return 0;
4101   }
4102   void rawSkip (int n) { static if (__VERSION__ > 2067) pragma(inline, true);
4103   	if (isOpened) {
4104 		stpos += n;
4105 		if(stpos < stst)
4106 			stpos = stst;
4107 		else if(stpos > stend)
4108 			stpos = stend;
4109 	}
4110   }
4111   void rawSeek (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { stpos = stst+(n < 0 ? 0 : n); if (stpos > stend) stpos = stend; } }
4112   void rawClose () { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { isOpened = false; stmread(null, 0, this); } }
4113 
4114 final:
4115 private:
4116   void doInit () {
4117     import core.stdc.string : memset;
4118     /*
4119     if (z) {
4120       alloc = *z;
4121       alloc.alloc_buffer_length_in_bytes = (alloc.alloc_buffer_length_in_bytes+3)&~3;
4122       temp_offset = alloc.alloc_buffer_length_in_bytes;
4123     }
4124     */
4125     eof = false;
4126     error = STBVorbisError.no_error;
4127     /+stream = null;+/
4128     codebooks = null;
4129     /+page_crc_tests = -1;+/
4130   }
4131 
4132   static int stflRead (void[] buf, uint ofs, VorbisDecoder vb) {
4133     if (buf !is null) {
4134       if (vb.stlastofs != ofs) {
4135       	// { import core.stdc.stdio; printf("stflRead: ofs=%u; len=%u\n", ofs, cast(uint)buf.length); }
4136         import core.stdc.stdio : fseek, SEEK_SET;
4137         vb.stlastofs = ofs;
4138         fseek(vb.stfl, ofs, SEEK_SET);
4139       }
4140       import core.stdc.stdio : fread;
4141       auto rd = cast(int)fread(buf.ptr, 1, buf.length, vb.stfl);
4142       if(rd > 0)
4143       	vb.stlastofs += rd;
4144       return rd;
4145     } else {
4146       if (vb.stclose) {
4147         import core.stdc.stdio : fclose;
4148         if (vb.stfl !is null) fclose(vb.stfl);
4149       }
4150       vb.stfl = null;
4151       return 0;
4152     }
4153   }
4154 
4155 public:
4156   this () {}
4157   ~this () { close(); }
4158 
4159   this (int asize, readCB rcb) {
4160   	assert(rcb !is null);
4161 	stend = (asize > 0 ? asize : 0);
4162 	stmread = rcb;
4163 	isOpened = true;
4164 	eof = false;
4165 	read_comments = true;
4166 	if (start_decoder(this)) {
4167 		vorbis_pump_first_frame(this);
4168 		return;
4169 	}
4170   }
4171   this (FILE* fl, bool doclose=true) { open(fl, doclose); }
4172   this (const(char)[] filename) { open(filename); }
4173 
4174   @property bool closed () { return !isOpened; }
4175 
4176   void open (FILE *fl, bool doclose=true) {
4177     import core.stdc.stdio : ftell, fseek, SEEK_SET, SEEK_END;
4178     close();
4179     if (fl is null) { error = STBVorbisError.invalid_stream; return; }
4180     stclose = doclose;
4181     stst = stpos = cast(uint)ftell(fl);
4182     fseek(fl, 0, SEEK_END);
4183     stend = cast(uint)ftell(fl);
4184     stlastofs = stlastofs.max;
4185     stclose = false;
4186     stfl = fl;
4187     import std.functional : toDelegate;
4188     stmread = toDelegate(&stflRead);
4189     isOpened = true;
4190     eof = false;
4191     read_comments = true;
4192     if (start_decoder(this)) {
4193       vorbis_pump_first_frame(this);
4194       return;
4195     }
4196     auto err = error;
4197     close();
4198     error = err;
4199   }
4200 
4201   void open (const(char)[] filename) {
4202     import core.stdc.stdio : fopen;
4203     import std.internal.cstring; // sorry
4204     close();
4205     FILE* fl = fopen(filename.tempCString, "rb");
4206     if (fl is null) { error = STBVorbisError.file_open_failure; return; }
4207     open(fl, true);
4208   }
4209 
4210   /+
4211   void openPushdata(void* data, int data_len, // the memory available for decoding
4212                     int* data_used)           // only defined on success
4213   {
4214     close();
4215     eof = false;
4216     stream = cast(ubyte*)data;
4217     stream_end = stream+data_len;
4218     push_mode = true;
4219     if (!start_decoder(this)) {
4220       auto err = error;
4221       if (eof) err = STBVorbisError.need_more_data; else close();
4222       error = err;
4223       return;
4224     }
4225     *data_used = stream-(cast(ubyte*)data);
4226     error = STBVorbisError.no_error;
4227   }
4228   +/
4229 
4230   void close () {
4231     import core.stdc.string : memset;
4232 
4233     setup_free(this, this.comment_data);
4234     if (this.residue_config) {
4235       foreach (immutable i; 0..this.residue_count) {
4236         Residue* r = this.residue_config+i;
4237         if (r.classdata) {
4238           foreach (immutable j; 0..this.codebooks[r.classbook].entries) setup_free(this, r.classdata[j]);
4239           setup_free(this, r.classdata);
4240         }
4241         setup_free(this, r.residue_books);
4242       }
4243     }
4244 
4245     if (this.codebooks) {
4246       foreach (immutable i; 0..this.codebook_count) {
4247         Codebook* c = this.codebooks+i;
4248         setup_free(this, c.codeword_lengths);
4249         setup_free(this, c.multiplicands);
4250         setup_free(this, c.codewords);
4251         setup_free(this, c.sorted_codewords);
4252         // c.sorted_values[-1] is the first entry in the array
4253         setup_free(this, c.sorted_values ? c.sorted_values-1 : null);
4254       }
4255       setup_free(this, this.codebooks);
4256     }
4257     setup_free(this, this.floor_config);
4258     setup_free(this, this.residue_config);
4259     if (this.mapping) {
4260       foreach (immutable i; 0..this.mapping_count) setup_free(this, this.mapping[i].chan);
4261       setup_free(this, this.mapping);
4262     }
4263     foreach (immutable i; 0..(this.vrchannels > STB_VORBIS_MAX_CHANNELS ? STB_VORBIS_MAX_CHANNELS : this.vrchannels)) {
4264       setup_free(this, this.channel_buffers.ptr[i]);
4265       setup_free(this, this.previous_window.ptr[i]);
4266       version(STB_VORBIS_NO_DEFER_FLOOR) setup_free(this, this.floor_buffers.ptr[i]);
4267       setup_free(this, this.finalY.ptr[i]);
4268     }
4269     foreach (immutable i; 0..2) {
4270       setup_free(this, this.A.ptr[i]);
4271       setup_free(this, this.B.ptr[i]);
4272       setup_free(this, this.C.ptr[i]);
4273       setup_free(this, this.window.ptr[i]);
4274       setup_free(this, this.bit_reverse.ptr[i]);
4275     }
4276 
4277     rawClose();
4278     isOpened = false;
4279     stmread = null;
4280     stlastofs = uint.max;
4281     stst = 0;
4282     stpos = 0;
4283     stend = 0;
4284     stclose = false;
4285     stfl = null;
4286 
4287     sample_rate = 0;
4288     vrchannels = 0;
4289 
4290     setup_memory_required = 0;
4291     temp_memory_required = 0;
4292     setup_temp_memory_required = 0;
4293 
4294     read_comments = 0;
4295     comment_data = null;
4296     comment_size = 0;
4297 
4298     comment_data_pos = 0;
4299 
4300     /+
4301     stream = null;
4302     stream_start = null;
4303     stream_end = null;
4304     +/
4305 
4306     //stream_len = 0;
4307 
4308     /+push_mode = false;+/
4309 
4310     first_audio_page_offset = 0;
4311 
4312     p_first = p_first.init;
4313     p_last = p_last.init;
4314 
4315     setup_offset = 0;
4316     temp_offset = 0;
4317 
4318     eof = true;
4319     error = STBVorbisError.no_error;
4320 
4321     blocksize[] = 0;
4322     blocksize_0 = 0;
4323     blocksize_1 = 0;
4324     codebook_count = 0;
4325     codebooks = null;
4326     floor_count = 0;
4327     floor_types[] = 0;
4328     floor_config = null;
4329     residue_count = 0;
4330     residue_types[] = 0;
4331     residue_config = null;
4332     mapping_count = 0;
4333     mapping = null;
4334     mode_count = 0;
4335     mode_config[] = Mode.init;
4336 
4337     total_samples = 0;
4338 
4339     channel_buffers[] = null;
4340     outputs[] = null;
4341 
4342     previous_window[] = null;
4343     previous_length = 0;
4344 
4345     version(STB_VORBIS_NO_DEFER_FLOOR) {
4346       floor_buffers[] = null;
4347     } else {
4348       finalY[] = null;
4349     }
4350 
4351     current_loc = 0;
4352     current_loc_valid = 0;
4353 
4354     A[] = null;
4355     B[] = null;
4356     C[] = null;
4357     window[] = null;
4358     bit_reverse = null;
4359 
4360     serial = 0;
4361     last_page = 0;
4362     segment_count = 0;
4363     segments[] = 0;
4364     page_flag = 0;
4365     bytes_in_seg = 0;
4366     first_decode = 0;
4367     next_seg = 0;
4368     last_seg = 0;
4369     last_seg_which = 0;
4370     acc = 0;
4371     valid_bits = 0;
4372     packet_bytes = 0;
4373     end_seg_with_known_loc = 0;
4374     known_loc_for_packet = 0;
4375     discard_samples_deferred = 0;
4376     samples_output = 0;
4377 
4378     /+
4379     page_crc_tests = -1;
4380     scan[] = CRCscan.init;
4381     +/
4382 
4383     channel_buffer_start = 0;
4384     channel_buffer_end = 0;
4385   }
4386 
4387   @property const pure {
4388     int getSampleOffset () { return (current_loc_valid ? current_loc : -1); }
4389 
4390     @property ubyte chans () { return (isOpened ? cast(ubyte)this.vrchannels : 0); }
4391     @property uint sampleRate () { return (isOpened ? this.sample_rate : 0); }
4392     @property uint maxFrameSize () { return (isOpened ? this.blocksize_1>>1 : 0); }
4393 
4394     @property uint getSetupMemoryRequired () { return (isOpened ? this.setup_memory_required : 0); }
4395     @property uint getSetupTempMemoryRequired () { return (isOpened ? this.setup_temp_memory_required : 0); }
4396     @property uint getTempMemoryRequired () { return (isOpened ? this.temp_memory_required : 0); }
4397   }
4398 
4399   // will clear last error
4400   @property int lastError () {
4401     int e = error;
4402     error = STBVorbisError.no_error;
4403     return e;
4404   }
4405 
4406   // PUSHDATA API
4407   /+
4408   void flushPushdata () {
4409     if (push_mode) {
4410       previous_length = 0;
4411       page_crc_tests = 0;
4412       discard_samples_deferred = 0;
4413       current_loc_valid = false;
4414       first_decode = false;
4415       samples_output = 0;
4416       channel_buffer_start = 0;
4417       channel_buffer_end = 0;
4418     }
4419   }
4420 
4421   // return value: number of bytes we used
4422   int decodeFramePushdata(
4423            void* data, int data_len, // the memory available for decoding
4424            int* channels,            // place to write number of float* buffers
4425            float*** output,          // place to write float** array of float* buffers
4426            int* samples              // place to write number of output samples
4427        )
4428   {
4429     if (!this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);
4430 
4431     if (this.page_crc_tests >= 0) {
4432       *samples = 0;
4433       return vorbis_search_for_page_pushdata(this, cast(ubyte*)data, data_len);
4434     }
4435 
4436     this.stream = cast(ubyte*)data;
4437     this.stream_end = this.stream+data_len;
4438     this.error = STBVorbisError.no_error;
4439 
4440     // check that we have the entire packet in memory
4441     if (!is_whole_packet_present(this, false)) {
4442       *samples = 0;
4443       return 0;
4444     }
4445 
4446     int len, left, right;
4447 
4448     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4449       // save the actual error we encountered
4450       STBVorbisError error = this.error;
4451       if (error == STBVorbisError.bad_packet_type) {
4452         // flush and resynch
4453         this.error = STBVorbisError.no_error;
4454         while (get8_packet(this) != EOP) if (this.eof) break;
4455         *samples = 0;
4456         return this.stream-data;
4457       }
4458       if (error == STBVorbisError.continued_packet_flag_invalid) {
4459         if (this.previous_length == 0) {
4460           // we may be resynching, in which case it's ok to hit one
4461           // of these; just discard the packet
4462           this.error = STBVorbisError.no_error;
4463           while (get8_packet(this) != EOP) if (this.eof) break;
4464           *samples = 0;
4465           return this.stream-data;
4466         }
4467       }
4468       // if we get an error while parsing, what to do?
4469       // well, it DEFINITELY won't work to continue from where we are!
4470       flushPushdata();
4471       // restore the error that actually made us bail
4472       this.error = error;
4473       *samples = 0;
4474       return 1;
4475     }
4476 
4477     // success!
4478     len = vorbis_finish_frame(this, len, left, right);
4479     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4480 
4481     if (channels) *channels = this.vrchannels;
4482     *samples = len;
4483     *output = this.outputs.ptr;
4484     return this.stream-data;
4485   }
4486   +/
4487 
4488   public uint fileOffset () {
4489     if (/+push_mode ||+/ !isOpened) return 0;
4490     /+if (stream !is null) return cast(uint)(stream-stream_start);+/
4491     return (stpos > stst ? stpos-stst : 0);
4492   }
4493 
4494   public uint stream_len () { return stend-stst; }
4495 
4496   // DATA-PULLING API
4497   public int seekFrame (uint sample_number) {
4498     uint max_frame_samples;
4499 
4500     /+if (this.push_mode) return -.error(this, STBVorbisError.invalid_api_mixing);+/
4501 
4502     // fast page-level search
4503     if (!seek_to_sample_coarse(this, sample_number)) return 0;
4504 
4505     assert(this.current_loc_valid);
4506     assert(this.current_loc <= sample_number);
4507 
4508     import std.stdio;
4509 
4510     // linear search for the relevant packet
4511     max_frame_samples = (this.blocksize_1*3-this.blocksize_0)>>2;
4512     while (this.current_loc < sample_number) {
4513       int left_start, left_end, right_start, right_end, mode, frame_samples;
4514       if (!peek_decode_initial(this, &left_start, &left_end, &right_start, &right_end, &mode)) return .error(this, STBVorbisError.seek_failed);
4515       // calculate the number of samples returned by the next frame
4516       frame_samples = right_start-left_start;
4517       if (this.current_loc+frame_samples > sample_number) {
4518         return 1; // the next frame will contain the sample
4519       } else if (this.current_loc+frame_samples+max_frame_samples > sample_number) {
4520         // there's a chance the frame after this could contain the sample
4521         vorbis_pump_first_frame(this);
4522       } else {
4523         // this frame is too early to be relevant
4524         this.current_loc += frame_samples;
4525         this.previous_length = 0;
4526         maybe_start_packet(this);
4527         flush_packet(this);
4528       }
4529     }
4530     // the next frame will start with the sample
4531     assert(this.current_loc == sample_number);
4532 
4533     return 1;
4534   }
4535 
4536   public int seek (uint sample_number) {
4537     if (!seekFrame(sample_number)) return 0;
4538     if (sample_number != this.current_loc) {
4539       int n;
4540       uint frame_start = this.current_loc;
4541       getFrameFloat(&n, null);
4542       assert(sample_number > frame_start);
4543       assert(this.channel_buffer_start+cast(int)(sample_number-frame_start) <= this.channel_buffer_end);
4544       this.channel_buffer_start += (sample_number-frame_start);
4545     }
4546     return 1;
4547   }
4548 
4549   public bool seekStart () {
4550     /+if (push_mode) { .error(this, STBVorbisError.invalid_api_mixing); return; }+/
4551     set_file_offset(this, first_audio_page_offset);
4552     previous_length = 0;
4553     first_decode = true;
4554     next_seg = -1;
4555     return vorbis_pump_first_frame(this);
4556   }
4557 
4558   public uint streamLengthInSamples () {
4559     uint restore_offset, previous_safe;
4560     uint end, last_page_loc;
4561 
4562     /+if (this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4563     if (!this.total_samples) {
4564       uint last;
4565       uint lo, hi;
4566       char[6] header;
4567 
4568       // first, store the current decode position so we can restore it
4569       restore_offset = fileOffset;
4570 
4571       // now we want to seek back 64K from the end (the last page must
4572       // be at most a little less than 64K, but let's allow a little slop)
4573       if (this.stream_len >= 65536 && this.stream_len-65536 >= this.first_audio_page_offset) {
4574         previous_safe = this.stream_len-65536;
4575       } else {
4576         previous_safe = this.first_audio_page_offset;
4577       }
4578 
4579       set_file_offset(this, previous_safe);
4580       // previous_safe is now our candidate 'earliest known place that seeking
4581       // to will lead to the final page'
4582 
4583       if (!vorbis_find_page(this, &end, &last)) {
4584         // if we can't find a page, we're hosed!
4585         this.error = STBVorbisError.cant_find_last_page;
4586         this.total_samples = 0xffffffff;
4587         goto done;
4588       }
4589 
4590       // check if there are more pages
4591       last_page_loc = fileOffset;
4592 
4593       // stop when the last_page flag is set, not when we reach eof;
4594       // this allows us to stop short of a 'file_section' end without
4595       // explicitly checking the length of the section
4596       while (!last) {
4597         set_file_offset(this, end);
4598         if (!vorbis_find_page(this, &end, &last)) {
4599           // the last page we found didn't have the 'last page' flag set. whoops!
4600           break;
4601         }
4602         previous_safe = last_page_loc+1;
4603         last_page_loc = fileOffset;
4604       }
4605 
4606       set_file_offset(this, last_page_loc);
4607 
4608       // parse the header
4609       getn(this, cast(ubyte*)header, 6);
4610       // extract the absolute granule position
4611       lo = get32(this);
4612       hi = get32(this);
4613       if (lo == 0xffffffff && hi == 0xffffffff) {
4614         this.error = STBVorbisError.cant_find_last_page;
4615         this.total_samples = SAMPLE_unknown;
4616         goto done;
4617       }
4618       if (hi) lo = 0xfffffffe; // saturate
4619       this.total_samples = lo;
4620 
4621       this.p_last.page_start = last_page_loc;
4622       this.p_last.page_end = end;
4623       this.p_last.last_decoded_sample = lo;
4624 
4625      done:
4626       set_file_offset(this, restore_offset);
4627     }
4628     return (this.total_samples == SAMPLE_unknown ? 0 : this.total_samples);
4629   }
4630 
4631   public float streamLengthInSeconds () {
4632     return (isOpened ? streamLengthInSamples()/cast(float)sample_rate : 0.0f);
4633   }
4634 
4635   public int getFrameFloat (int* channels, float*** output) {
4636     int len, right, left;
4637     /+if (push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4638 
4639     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4640       channel_buffer_start = channel_buffer_end = 0;
4641       return 0;
4642     }
4643 
4644     len = vorbis_finish_frame(this, len, left, right);
4645     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4646 
4647     channel_buffer_start = left;
4648     channel_buffer_end = left+len;
4649 
4650     if (channels) *channels = this.vrchannels;
4651     if (output) *output = this.outputs.ptr;
4652     return len;
4653   }
4654 
4655   /+
4656   public VorbisDecoder stb_vorbis_open_memory (const(void)* data, int len, int* error=null, stb_vorbis_alloc* alloc=null) {
4657     VorbisDecoder this;
4658     stb_vorbis_ctx p = void;
4659     if (data is null) return null;
4660     vorbis_init(&p, alloc);
4661     p.stream = cast(ubyte*)data;
4662     p.stream_end = cast(ubyte*)data+len;
4663     p.stream_start = cast(ubyte*)p.stream;
4664     p.stream_len = len;
4665     p.push_mode = false;
4666     if (start_decoder(&p)) {
4667       this = vorbis_alloc(&p);
4668       if (this) {
4669         *this = p;
4670         vorbis_pump_first_frame(this);
4671         return this;
4672       }
4673     }
4674     if (error) *error = p.error;
4675     vorbis_deinit(&p);
4676     return null;
4677   }
4678   +/
4679 
4680   // s16 samples API
4681   int getFrameShort (int num_c, short** buffer, int num_samples) {
4682     float** output;
4683     int len = getFrameFloat(null, &output);
4684     if (len > num_samples) len = num_samples;
4685     if (len) convert_samples_short(num_c, buffer, 0, vrchannels, output, 0, len);
4686     return len;
4687   }
4688 
4689   int getFrameShortInterleaved (int num_c, short* buffer, int num_shorts) {
4690     float** output;
4691     int len;
4692     if (num_c == 1) return getFrameShort(num_c, &buffer, num_shorts);
4693     len = getFrameFloat(null, &output);
4694     if (len) {
4695       if (len*num_c > num_shorts) len = num_shorts/num_c;
4696       convert_channels_short_interleaved(num_c, buffer, vrchannels, output, 0, len);
4697     }
4698     return len;
4699   }
4700 
4701   int getSamplesShortInterleaved (int channels, short* buffer, int num_shorts) {
4702     float** outputs;
4703     int len = num_shorts/channels;
4704     int n = 0;
4705     int z = this.vrchannels;
4706     if (z > channels) z = channels;
4707     while (n < len) {
4708       int k = channel_buffer_end-channel_buffer_start;
4709       if (n+k >= len) k = len-n;
4710       if (k) convert_channels_short_interleaved(channels, buffer, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4711       buffer += k*channels;
4712       n += k;
4713       channel_buffer_start += k;
4714       if (n == len) break;
4715       if (!getFrameFloat(null, &outputs)) break;
4716     }
4717     return n;
4718   }
4719 
4720   int getSamplesShort (int channels, short** buffer, int len) {
4721     float** outputs;
4722     int n = 0;
4723     int z = this.vrchannels;
4724     if (z > channels) z = channels;
4725     while (n < len) {
4726       int k = channel_buffer_end-channel_buffer_start;
4727       if (n+k >= len) k = len-n;
4728       if (k) convert_samples_short(channels, buffer, n, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4729       n += k;
4730       channel_buffer_start += k;
4731       if (n == len) break;
4732       if (!getFrameFloat(null, &outputs)) break;
4733     }
4734     return n;
4735   }
4736 
4737   /+
4738   public int stb_vorbis_decode_filename (string filename, int* channels, int* sample_rate, short** output) {
4739     import core.stdc.stdlib : malloc, realloc;
4740 
4741     int data_len, offset, total, limit, error;
4742     short* data;
4743     VorbisDecoder v = stb_vorbis_open_filename(filename, &error, null);
4744     if (v is null) return -1;
4745     limit = v.vrchannels*4096;
4746     *channels = v.vrchannels;
4747     if (sample_rate) *sample_rate = v.sample_rate;
4748     offset = data_len = 0;
4749     total = limit;
4750     data = cast(short*)malloc(total*(*data).sizeof);
4751     if (data is null) {
4752       stb_vorbis_close(v);
4753       return -2;
4754     }
4755     for (;;) {
4756       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4757       if (n == 0) break;
4758       data_len += n;
4759       offset += n*v.vrchannels;
4760       if (offset+limit > total) {
4761         short *data2;
4762         total *= 2;
4763         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4764         if (data2 is null) {
4765           import core.stdc.stdlib : free;
4766           free(data);
4767           stb_vorbis_close(v);
4768           return -2;
4769         }
4770         data = data2;
4771       }
4772     }
4773     *output = data;
4774     stb_vorbis_close(v);
4775     return data_len;
4776   }
4777 
4778   public int stb_vorbis_decode_memory (const(void)* mem, int len, int* channels, int* sample_rate, short** output) {
4779     import core.stdc.stdlib : malloc, realloc;
4780 
4781     int data_len, offset, total, limit, error;
4782     short* data;
4783     VorbisDecoder v = stb_vorbis_open_memory(mem, len, &error, null);
4784     if (v is null) return -1;
4785     limit = v.vrchannels*4096;
4786     *channels = v.vrchannels;
4787     if (sample_rate) *sample_rate = v.sample_rate;
4788     offset = data_len = 0;
4789     total = limit;
4790     data = cast(short*)malloc(total*(*data).sizeof);
4791     if (data is null) {
4792       stb_vorbis_close(v);
4793       return -2;
4794     }
4795     for (;;) {
4796       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4797       if (n == 0) break;
4798       data_len += n;
4799       offset += n*v.vrchannels;
4800       if (offset+limit > total) {
4801         short *data2;
4802         total *= 2;
4803         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4804         if (data2 is null) {
4805           import core.stdc.stdlib : free;
4806           free(data);
4807           stb_vorbis_close(v);
4808           return -2;
4809         }
4810         data = data2;
4811       }
4812     }
4813     *output = data;
4814     stb_vorbis_close(v);
4815     return data_len;
4816   }
4817 
4818   public int stb_vorbis_get_samples_float_interleaved (VorbisDecoder this, int channels, float* buffer, int num_floats) {
4819     float** outputs;
4820     int len = num_floats/channels;
4821     int n = 0;
4822     int z = this.vrchannels;
4823     if (z > channels) z = channels;
4824     while (n < len) {
4825       int k = this.channel_buffer_end-this.channel_buffer_start;
4826       if (n+k >= len) k = len-n;
4827       foreach (immutable j; 0..k) {
4828         foreach (immutable i; 0..z) *buffer++ = (this.channel_buffers.ptr[i])[this.channel_buffer_start+j];
4829         foreach (immutable i; z..channels) *buffer++ = 0;
4830       }
4831       n += k;
4832       this.channel_buffer_start += k;
4833       if (n == len) break;
4834       if (!stb_vorbis_get_frame_float(this, null, &outputs)) break;
4835     }
4836     return n;
4837   }
4838   +/
4839 
4840   public int getSamplesFloat (int achans, float** buffer, int num_samples) {
4841     import core.stdc.string : memcpy, memset;
4842     float** outputs;
4843     int n = 0;
4844     int z = vrchannels;
4845     if (z > achans) z = achans;
4846     while (n < num_samples) {
4847       int k = channel_buffer_end-channel_buffer_start;
4848       if (n+k >= num_samples) k = num_samples-n;
4849       if (k) {
4850         foreach (immutable i; 0..z) memcpy(buffer[i]+n, channel_buffers.ptr[i]+channel_buffer_start, float.sizeof*k);
4851         foreach (immutable i; z..achans) memset(buffer[i]+n, 0, float.sizeof*k);
4852       }
4853       n += k;
4854       channel_buffer_start += k;
4855       if (n == num_samples) break;
4856       if (!getFrameFloat(null, &outputs)) break;
4857     }
4858     return n;
4859   }
4860 
4861 private: // k8: 'cause i'm evil
4862   private enum cmt_len_size = 2;
4863   nothrow /*@trusted*/ @nogc {
4864     public @property bool comment_empty () const pure { return (comment_get_line_len == 0); }
4865 
4866     // 0: error
4867     // includes length itself
4868     private uint comment_get_line_len () const pure {
4869       if (comment_data_pos >= comment_size) return 0;
4870       if (comment_size-comment_data_pos < cmt_len_size) return 0;
4871       uint len = comment_data[comment_data_pos];
4872       len += cast(uint)comment_data[comment_data_pos+1]<<8;
4873       return (len >= cmt_len_size && comment_data_pos+len <= comment_size ? len : 0);
4874     }
4875 
4876     public bool comment_rewind () {
4877       comment_data_pos = 0;
4878       for (;;) {
4879         auto len = comment_get_line_len();
4880         if (!len) { comment_data_pos = comment_size; return false; }
4881         if (len != cmt_len_size) return true;
4882         comment_data_pos += len;
4883       }
4884     }
4885 
4886     // true: has something to read after skip
4887     public bool comment_skip () {
4888       comment_data_pos += comment_get_line_len();
4889       for (;;) {
4890         auto len = comment_get_line_len();
4891         if (!len) { comment_data_pos = comment_size; return false; }
4892         if (len != cmt_len_size) break;
4893         comment_data_pos += len;
4894       }
4895       return true;
4896     }
4897 
4898     public const(char)[] comment_line () {
4899       auto len = comment_get_line_len();
4900       if (len < cmt_len_size) return null;
4901       if (len == cmt_len_size) return "";
4902       return (cast(char*)comment_data+comment_data_pos+cmt_len_size)[0..len-cmt_len_size];
4903     }
4904 
4905     public const(char)[] comment_name () {
4906       auto line = comment_line();
4907       if (line.length == 0) return line;
4908       uint epos = 0;
4909       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4910       return (epos < line.length ? line[0..epos] : "");
4911     }
4912 
4913     public const(char)[] comment_value () {
4914       auto line = comment_line();
4915       if (line.length == 0) return line;
4916       uint epos = 0;
4917       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4918       return (epos < line.length ? line[epos+1..$] : line);
4919     }
4920   }
4921 }
4922 
4923 
4924 // ////////////////////////////////////////////////////////////////////////// //
4925 private:
4926 // cool helper to translate C defines
4927 template cmacroFixVars(T...) {
4928   /**
4929    * 64-bit implementation of fasthash
4930    *
4931    * Params:
4932    *   buf =  data buffer
4933    *   seed = the seed
4934    *
4935    * Returns:
4936    *   32-bit or 64-bit hash
4937    */
4938   size_t hashOf (const(void)* buf, size_t len, size_t seed=0) pure nothrow @trusted @nogc {
4939     enum Get8Bytes = q{
4940       cast(ulong)data[0]|
4941       (cast(ulong)data[1]<<8)|
4942       (cast(ulong)data[2]<<16)|
4943       (cast(ulong)data[3]<<24)|
4944       (cast(ulong)data[4]<<32)|
4945       (cast(ulong)data[5]<<40)|
4946       (cast(ulong)data[6]<<48)|
4947       (cast(ulong)data[7]<<56)
4948     };
4949     enum m = 0x880355f21e6d1965UL;
4950     auto data = cast(const(ubyte)*)buf;
4951     ulong h = seed;
4952     ulong t;
4953     foreach (immutable _; 0..len/8) {
4954       version(HasUnalignedOps) {
4955         if (__ctfe) {
4956           t = mixin(Get8Bytes);
4957         } else {
4958           t = *cast(ulong*)data;
4959         }
4960       } else {
4961         t = mixin(Get8Bytes);
4962       }
4963       data += 8;
4964       t ^= t>>23;
4965       t *= 0x2127599bf4325c37UL;
4966       t ^= t>>47;
4967       h ^= t;
4968       h *= m;
4969     }
4970 
4971     h ^= len*m;
4972     t = 0;
4973     switch (len&7) {
4974       case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
4975       case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
4976       case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
4977       case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
4978       case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
4979       case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
4980       case 1: t ^= cast(ulong)data[0]; goto default;
4981       default:
4982         t ^= t>>23;
4983         t *= 0x2127599bf4325c37UL;
4984         t ^= t>>47;
4985         h ^= t;
4986         h *= m;
4987         break;
4988     }
4989 
4990     h ^= h>>23;
4991     h *= 0x2127599bf4325c37UL;
4992     h ^= h>>47;
4993     static if (size_t.sizeof == 4) {
4994       // 32-bit hash
4995       // the following trick converts the 64-bit hashcode to Fermat
4996       // residue, which shall retain information from both the higher
4997       // and lower parts of hashcode.
4998       return cast(size_t)(h-(h>>32));
4999     } else {
5000       return h;
5001     }
5002   }
5003 
5004   string cmacroFixVars (string s, string[] names...) {
5005     assert(T.length == names.length, "cmacroFixVars: names and arguments count mismatch");
5006     enum tmpPfxName = "__temp_prefix__";
5007     string res;
5008     string tmppfx;
5009     uint pos = 0;
5010     // skip empty lines (for pretty printing)
5011     // trim trailing spaces
5012     while (s.length > 0 && s[$-1] <= ' ') s = s[0..$-1];
5013     uint linestpos = 0; // start of the current line
5014     while (pos < s.length) {
5015       if (s[pos] > ' ') break;
5016       if (s[pos] == '\n') linestpos = pos+1;
5017       ++pos;
5018     }
5019     pos = linestpos;
5020     while (pos+2 < s.length) {
5021       int epos = pos;
5022       while (epos+2 < s.length && (s[epos] != '$' || s[epos+1] != '{')) ++epos;
5023       if (epos > pos) {
5024         if (s.length-epos < 3) break;
5025         res ~= s[pos..epos];
5026         pos = epos;
5027       }
5028       assert(s[pos] == '$' && s[pos+1] == '{');
5029       pos += 2;
5030       bool found = false;
5031       if (s.length-pos >= tmpPfxName.length+1 && s[pos+tmpPfxName.length] == '}' && s[pos..pos+tmpPfxName.length] == tmpPfxName) {
5032         if (tmppfx.length == 0) {
5033           // generate temporary prefix
5034           auto hash = hashOf(s.ptr, s.length);
5035           immutable char[16] hexChars = "0123456789abcdef";
5036           tmppfx = "_temp_macro_var_";
5037           foreach_reverse (immutable idx; 0..size_t.sizeof*2) {
5038             tmppfx ~= hexChars[hash&0x0f];
5039             hash >>= 4;
5040           }
5041           tmppfx ~= "_";
5042         }
5043         pos += tmpPfxName.length+1;
5044         res ~= tmppfx;
5045         found = true;
5046       } else {
5047         foreach (immutable nidx, string oname; T) {
5048           static assert(oname.length > 0);
5049           if (s.length-pos >= oname.length+1 && s[pos+oname.length] == '}' && s[pos..pos+oname.length] == oname) {
5050             found = true;
5051             pos += oname.length+1;
5052             res ~= names[nidx];
5053             break;
5054           }
5055         }
5056       }
5057       assert(found, "unknown variable in macro");
5058     }
5059     if (pos < s.length) res ~= s[pos..$];
5060     return res;
5061   }
5062 }
5063 
5064 // ////////////////////////////////////////////////////////////////////////// //
5065 /* Version history
5066     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5067     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5068                            avoid discarding last frame of audio data
5069     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5070                            some more crash fixes when out of memory or with corrupt files
5071     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5072                            some crash fixes when out of memory or with corrupt files
5073     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5074     1.04    - 2014/08/27 - fix missing const-correct case in API
5075     1.03    - 2014/08/07 - Warning fixes
5076     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5077     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5078     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5079                            (API change) report sample rate for decode-full-file funcs
5080     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5081     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5082     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5083     0.99993 - remove assert that fired on legal files with empty tables
5084     0.99992 - rewind-to-start
5085     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5086     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5087     0.9998 - add a full-decode function with a memory source
5088     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5089     0.9996 - query length of vorbis stream in samples/seconds
5090     0.9995 - bugfix to another optimization that only happened in certain files
5091     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5092     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5093     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5094     0.9991 - performance improvement of IMDCT
5095     0.999 - (should have been 0.9990) performance improvement of IMDCT
5096     0.998 - no-CRT support from Casey Muratori
5097     0.997 - bugfixes for bugs found by Terje Mathisen
5098     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5099     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5100     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5101     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5102     0.992 - fixes for MinGW warning
5103     0.991 - turn fast-float-conversion on by default
5104     0.990 - fix push-mode seek recovery if you seek into the headers
5105     0.98b - fix to bad release of 0.98
5106     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5107     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5108     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5109     0.95 - clamping code for 16-bit functions
5110     0.94 - not publically released
5111     0.93 - fixed all-zero-floor case (was decoding garbage)
5112     0.92 - fixed a memory leak
5113     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5114     0.90 - first public release
5115 */
5116 
5117 /*
5118 ------------------------------------------------------------------------------
5119 This software is available under 2 licenses -- choose whichever you prefer.
5120 ------------------------------------------------------------------------------
5121 ALTERNATIVE A - MIT License
5122 Copyright (c) 2017 Sean Barrett
5123 Permission is hereby granted, free of charge, to any person obtaining a copy of
5124 this software and associated documentation files (the "Software"), to deal in
5125 the Software without restriction, including without limitation the rights to
5126 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5127 of the Software, and to permit persons to whom the Software is furnished to do
5128 so, subject to the following conditions:
5129 The above copyright notice and this permission notice shall be included in all
5130 copies or substantial portions of the Software.
5131 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5132 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5133 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5134 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5135 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5136 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5137 SOFTWARE.
5138 ------------------------------------------------------------------------------
5139 ALTERNATIVE B - Public Domain (www.unlicense.org)
5140 This is free and unencumbered software released into the public domain.
5141 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5142 software, either in source code form or as a compiled binary, for any purpose,
5143 commercial or non-commercial, and by any means.
5144 In jurisdictions that recognize copyright laws, the author or authors of this
5145 software dedicate any and all copyright interest in the software to the public
5146 domain. We make this dedication for the benefit of the public at large and to
5147 the detriment of our heirs and successors. We intend this dedication to be an
5148 overt act of relinquishment in perpetuity of all present and future rights to
5149 this software under copyright law.
5150 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5151 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5152 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5153 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5154 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5155 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5156 ------------------------------------------------------------------------------
5157 */