1 // Ogg Vorbis audio decoder - v1.10 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix
33 //
34 // Partial history:
35 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
36 //    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
37 //    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
38 //                           avoid discarding last frame of audio data
39 //    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
40 //                           some more crash fixes when out of memory or with corrupt files
41 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
42 //                           some crash fixes when out of memory or with corrupt files
43 //                           fix some inappropriately signed shifts
44 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
45 //    1.04    - 2014/08/27 - fix missing const-correct case in API
46 //    1.03    - 2014/08/07 - warning fixes
47 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
48 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
49 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
50 //                           (API change) report sample rate for decode-full-file funcs
51 //    0.99996 -            - bracket #include <malloc.h> for macintosh compilation
52 //    0.99995 -            - avoid alias-optimization issue in float-to-int conversion
53 //
54 // See end of file for full version history.
55 // D translation by Ketmar // Invisible Vector
56 // stolen by adam and module renamed.
57 /++
58 	Port of stb_vorbis to D. Provides .ogg audio file reading capabilities. See [arsd.simpleaudio] for code that can use this to actually load and play the file.
59 +/
60 module arsd.vorbis;
61 
62 import core.stdc.stdio : FILE;
63 
64 version(Windows)
65 	extern(C) int lrintf(float f) { return cast(int) f; }
66 
67 @system:
68 
69 nothrow /*@trusted*/:
70 @nogc { // code block, as c macro helper is not @nogc; yet it's CTFE-only
71 // import it here, as druntime has no `@nogc` on it (for a reason)
72 private extern(C) void qsort (void* base, size_t nmemb, size_t size, int function(const scope void*, const scope void*) compar);
73 
74 
75 //////////////////////////////////////////////////////////////////////////////
76 //
77 //  HEADER BEGINS HERE
78 //
79 
80 ///////////   THREAD SAFETY
81 
82 // Individual VorbisDecoder handles are not thread-safe; you cannot decode from
83 // them from multiple threads at the same time. However, you can have multiple
84 // VorbisDecoder handles and decode from them independently in multiple thrads.
85 
86 
87 ///////////   MEMORY ALLOCATION
88 
89 // normally stb_vorbis uses malloc() to allocate memory at startup,
90 // and alloca() to allocate temporary memory during a frame on the
91 // stack. (Memory consumption will depend on the amount of setup
92 // data in the file and how you set the compile flags for speed
93 // vs. size. In my test files the maximal-size usage is ~150KB.)
94 //
95 // You can modify the wrapper functions in the source (setup_malloc,
96 // setup_temp_malloc, temp_malloc) to change this behavior, or you
97 // can use a simpler allocation model: you pass in a buffer from
98 // which stb_vorbis will allocate _all_ its memory (including the
99 // temp memory). "open" may fail with a VORBIS_outofmem if you
100 // do not pass in enough data; there is no way to determine how
101 // much you do need except to succeed (at which point you can
102 // query get_info to find the exact amount required. yes I know
103 // this is lame).
104 //
105 // If you pass in a non-null buffer of the type below, allocation
106 // will occur from it as described above. Otherwise just pass null
107 // to use malloc()/alloca()
108 
109 public struct stb_vorbis_alloc {
110   ubyte* alloc_buffer;
111   int alloc_buffer_length_in_bytes;
112 }
113 
114 
115 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
116 
117 /*
118 public struct stb_vorbis_info {
119   uint sample_rate;
120   int channels;
121 
122   uint setup_memory_required;
123   uint setup_temp_memory_required;
124   uint temp_memory_required;
125 
126   int max_frame_size;
127 }
128 */
129 
130 
131 /* ************************************************************************** *
132 // get general information about the file
133 stb_vorbis_info stb_vorbis_get_info (VorbisDecoder f);
134 
135 // get the last error detected (clears it, too)
136 int stb_vorbis_get_error (VorbisDecoder f);
137 
138 // close an ogg vorbis file and free all memory in use
139 void stb_vorbis_close (VorbisDecoder f);
140 
141 // this function returns the offset (in samples) from the beginning of the
142 // file that will be returned by the next decode, if it is known, or -1
143 // otherwise. after a flush_pushdata() call, this may take a while before
144 // it becomes valid again.
145 // NOT WORKING YET after a seek with PULLDATA API
146 int stb_vorbis_get_sample_offset (VorbisDecoder f);
147 
148 // returns the current seek point within the file, or offset from the beginning
149 // of the memory buffer. In pushdata mode it returns 0.
150 uint stb_vorbis_get_file_offset (VorbisDecoder f);
151 
152 
153 ///////////   PUSHDATA API
154 
155 // this API allows you to get blocks of data from any source and hand
156 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
157 // you how much it used, and you have to give it the rest next time;
158 // and stb_vorbis may not have enough data to work with and you will
159 // need to give it the same data again PLUS more. Note that the Vorbis
160 // specification does not bound the size of an individual frame.
161 
162 // create a vorbis decoder by passing in the initial data block containing
163 //    the ogg&vorbis headers (you don't need to do parse them, just provide
164 //    the first N bytes of the file--you're told if it's not enough, see below)
165 // on success, returns an VorbisDecoder, does not set error, returns the amount of
166 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
167 // on failure, returns null on error and sets *error, does not change *datablock_memory_consumed
168 // if returns null and *error is VORBIS_need_more_data, then the input block was
169 //       incomplete and you need to pass in a larger block from the start of the file
170 VorbisDecoder stb_vorbis_open_pushdata (
171               ubyte* datablock, int datablock_length_in_bytes,
172               int* datablock_memory_consumed_in_bytes,
173               int* error,
174               stb_vorbis_alloc* alloc_buffer
175             );
176 
177 // decode a frame of audio sample data if possible from the passed-in data block
178 //
179 // return value: number of bytes we used from datablock
180 //
181 // possible cases:
182 //     0 bytes used, 0 samples output (need more data)
183 //     N bytes used, 0 samples output (resynching the stream, keep going)
184 //     N bytes used, M samples output (one frame of data)
185 // note that after opening a file, you will ALWAYS get one N-bytes, 0-sample
186 // frame, because Vorbis always "discards" the first frame.
187 //
188 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
189 // instead only datablock_length_in_bytes-3 or less. This is because it wants
190 // to avoid missing parts of a page header if they cross a datablock boundary,
191 // without writing state-machiney code to record a partial detection.
192 //
193 // The number of channels returned are stored in *channels (which can be
194 // null--it is always the same as the number of channels reported by
195 // get_info). *output will contain an array of float* buffers, one per
196 // channel. In other words, (*output)[0][0] contains the first sample from
197 // the first channel, and (*output)[1][0] contains the first sample from
198 // the second channel.
199 int stb_vorbis_decode_frame_pushdata (
200       VorbisDecoder f, ubyte* datablock, int datablock_length_in_bytes,
201       int* channels,   // place to write number of float * buffers
202       float*** output, // place to write float ** array of float * buffers
203       int* samples     // place to write number of output samples
204     );
205 
206 // inform stb_vorbis that your next datablock will not be contiguous with
207 // previous ones (e.g. you've seeked in the data); future attempts to decode
208 // frames will cause stb_vorbis to resynchronize (as noted above), and
209 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
210 // will begin decoding the _next_ frame.
211 //
212 // if you want to seek using pushdata, you need to seek in your file, then
213 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
214 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
215 // if you don't like the result, seek your file again and repeat.
216 void stb_vorbis_flush_pushdata (VorbisDecoder f);
217 
218 
219 //////////   PULLING INPUT API
220 
221 // This API assumes stb_vorbis is allowed to pull data from a source--
222 // either a block of memory containing the _entire_ vorbis stream, or a
223 // FILE* that you or it create, or possibly some other reading mechanism
224 // if you go modify the source to replace the FILE* case with some kind
225 // of callback to your code. (But if you don't support seeking, you may
226 // just want to go ahead and use pushdata.)
227 
228 // decode an entire file and output the data interleaved into a malloc()ed
229 // buffer stored in *output. The return value is the number of samples
230 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
231 // When you're done with it, just free() the pointer returned in *output.
232 int stb_vorbis_decode_filename (const(char)* filename, int* channels, int* sample_rate, short** output);
233 int stb_vorbis_decode_memory (const(ubyte)* mem, int len, int* channels, int* sample_rate, short** output);
234 
235 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
236 // this must be the entire stream!). on failure, returns null and sets *error
237 VorbisDecoder stb_vorbis_open_memory (const(ubyte)* data, int len, int* error, stb_vorbis_alloc* alloc_buffer);
238 
239 // create an ogg vorbis decoder from a filename via fopen(). on failure,
240 // returns null and sets *error (possibly to VORBIS_file_open_failure).
241 VorbisDecoder stb_vorbis_open_filename (const(char)* filename, int* error, stb_vorbis_alloc* alloc_buffer);
242 
243 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
244 // the _current_ seek point (ftell). on failure, returns null and sets *error.
245 // note that stb_vorbis must "own" this stream; if you seek it in between
246 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
247 // perform stb_vorbis_seek_*() operations on this file, it will assume it
248 // owns the _entire_ rest of the file after the start point. Use the next
249 // function, stb_vorbis_open_file_section(), to limit it.
250 VorbisDecoder stb_vorbis_open_file (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer);
251 
252 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
253 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
254 // on failure, returns null and sets *error. note that stb_vorbis must "own"
255 // this stream; if you seek it in between calls to stb_vorbis, it will become
256 // confused.
257 VorbisDecoder stb_vorbis_open_file_section (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer, uint len);
258 
259 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
260 // after calling seek_frame(), the next call to get_frame_*() will include
261 // the specified sample. after calling stb_vorbis_seek(), the next call to
262 // stb_vorbis_get_samples_* will start with the specified sample. If you
263 // do not need to seek to EXACTLY the target sample when using get_samples_*,
264 // you can also use seek_frame().
265 int stb_vorbis_seek_frame (VorbisDecoder f, uint sample_number);
266 int stb_vorbis_seek (VorbisDecoder f, uint sample_number);
267 
268 // this function is equivalent to stb_vorbis_seek(f, 0)
269 int stb_vorbis_seek_start (VorbisDecoder f);
270 
271 // these functions return the total length of the vorbis stream
272 uint stb_vorbis_stream_length_in_samples (VorbisDecoder f);
273 float stb_vorbis_stream_length_in_seconds (VorbisDecoder f);
274 
275 // decode the next frame and return the number of samples. the number of
276 // channels returned are stored in *channels (which can be null--it is always
277 // the same as the number of channels reported by get_info). *output will
278 // contain an array of float* buffers, one per channel. These outputs will
279 // be overwritten on the next call to stb_vorbis_get_frame_*.
280 //
281 // You generally should not intermix calls to stb_vorbis_get_frame_*()
282 // and stb_vorbis_get_samples_*(), since the latter calls the former.
283 int stb_vorbis_get_frame_float (VorbisDecoder f, int* channels, float*** output);
284 
285 // decode the next frame and return the number of *samples* per channel.
286 // Note that for interleaved data, you pass in the number of shorts (the
287 // size of your array), but the return value is the number of samples per
288 // channel, not the total number of samples.
289 //
290 // The data is coerced to the number of channels you request according to the
291 // channel coercion rules (see below). You must pass in the size of your
292 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
293 // The maximum buffer size needed can be gotten from get_info(); however,
294 // the Vorbis I specification implies an absolute maximum of 4096 samples
295 // per channel.
296 int stb_vorbis_get_frame_short_interleaved (VorbisDecoder f, int num_c, short* buffer, int num_shorts);
297 int stb_vorbis_get_frame_short (VorbisDecoder f, int num_c, short** buffer, int num_samples);
298 
299 // Channel coercion rules:
300 //    Let M be the number of channels requested, and N the number of channels present,
301 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
302 //    and stereo R be the sum of all R and center channels (channel assignment from the
303 //    vorbis spec).
304 //        M    N       output
305 //        1    k      sum(Ck) for all k
306 //        2    *      stereo L, stereo R
307 //        k    l      k > l, the first l channels, then 0s
308 //        k    l      k <= l, the first k channels
309 //    Note that this is not _good_ surround etc. mixing at all! It's just so
310 //    you get something useful.
311 
312 // gets num_samples samples, not necessarily on a frame boundary--this requires
313 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
314 // Returns the number of samples stored per channel; it may be less than requested
315 // at the end of the file. If there are no more samples in the file, returns 0.
316 int stb_vorbis_get_samples_float_interleaved (VorbisDecoder f, int channels, float* buffer, int num_floats);
317 int stb_vorbis_get_samples_float (VorbisDecoder f, int channels, float** buffer, int num_samples);
318 
319 // gets num_samples samples, not necessarily on a frame boundary--this requires
320 // buffering so you have to supply the buffers. Applies the coercion rules above
321 // to produce 'channels' channels. Returns the number of samples stored per channel;
322 // it may be less than requested at the end of the file. If there are no more
323 // samples in the file, returns 0.
324 int stb_vorbis_get_samples_short_interleaved (VorbisDecoder f, int channels, short* buffer, int num_shorts);
325 int stb_vorbis_get_samples_short (VorbisDecoder f, int channels, short** buffer, int num_samples);
326 */
327 
328 ////////   ERROR CODES
329 
330 public enum STBVorbisError {
331   no_error,
332 
333   need_more_data = 1,    // not a real error
334 
335   invalid_api_mixing,    // can't mix API modes
336   outofmem,              // not enough memory
337   feature_not_supported, // uses floor 0
338   too_many_channels,     // STB_VORBIS_MAX_CHANNELS is too small
339   file_open_failure,     // fopen() failed
340   seek_without_length,   // can't seek in unknown-length file
341 
342   unexpected_eof = 10,   // file is truncated?
343   seek_invalid,          // seek past EOF
344 
345   // decoding errors (corrupt/invalid stream) -- you probably
346   // don't care about the exact details of these
347 
348   // vorbis errors:
349   invalid_setup = 20,
350   invalid_stream,
351 
352   // ogg errors:
353   missing_capture_pattern = 30,
354   invalid_stream_structure_version,
355   continued_packet_flag_invalid,
356   incorrect_stream_serial_number,
357   invalid_first_page,
358   bad_packet_type,
359   cant_find_last_page,
360   seek_failed,
361 }
362 //
363 //  HEADER ENDS HERE
364 //
365 //////////////////////////////////////////////////////////////////////////////
366 
367 
368 // global configuration settings (e.g. set these in the project/makefile),
369 // or just set them in this file at the top (although ideally the first few
370 // should be visible when the header file is compiled too, although it's not
371 // crucial)
372 
373 // STB_VORBIS_NO_INTEGER_CONVERSION
374 //     does not compile the code for converting audio sample data from
375 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
376 //version = STB_VORBIS_NO_INTEGER_CONVERSION;
377 
378 // STB_VORBIS_NO_FAST_SCALED_FLOAT
379 //      does not use a fast float-to-int trick to accelerate float-to-int on
380 //      most platforms which requires endianness be defined correctly.
381 //version = STB_VORBIS_NO_FAST_SCALED_FLOAT;
382 
383 // STB_VORBIS_MAX_CHANNELS [number]
384 //     globally define this to the maximum number of channels you need.
385 //     The spec does not put a restriction on channels except that
386 //     the count is stored in a byte, so 255 is the hard limit.
387 //     Reducing this saves about 16 bytes per value, so using 16 saves
388 //     (255-16)*16 or around 4KB. Plus anything other memory usage
389 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
390 //     6 (5.1 audio), or 2 (stereo only).
391 enum STB_VORBIS_MAX_CHANNELS = 16; // enough for anyone?
392 
393 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
394 //     after a flush_pushdata(), stb_vorbis begins scanning for the
395 //     next valid page, without backtracking. when it finds something
396 //     that looks like a page, it streams through it and verifies its
397 //     CRC32. Should that validation fail, it keeps scanning. But it's
398 //     possible that _while_ streaming through to check the CRC32 of
399 //     one candidate page, it sees another candidate page. This #define
400 //     determines how many "overlapping" candidate pages it can search
401 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
402 //     garbage pages could be as big as 64KB, but probably average ~16KB.
403 //     So don't hose ourselves by scanning an apparent 64KB page and
404 //     missing a ton of real ones in the interim; so minimum of 2
405 enum STB_VORBIS_PUSHDATA_CRC_COUNT = 4;
406 
407 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
408 //     sets the log size of the huffman-acceleration table.  Maximum
409 //     supported value is 24. with larger numbers, more decodings are O(1),
410 //     but the table size is larger so worse cache missing, so you'll have
411 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
412 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
413 
414 // STB_VORBIS_FAST_BINARY_LENGTH [number]
415 //     sets the log size of the binary-search acceleration table. this
416 //     is used in similar fashion to the fast-huffman size to set initial
417 //     parameters for the binary search
418 
419 // STB_VORBIS_FAST_HUFFMAN_INT
420 //     The fast huffman tables are much more efficient if they can be
421 //     stored as 16-bit results instead of 32-bit results. This restricts
422 //     the codebooks to having only 65535 possible outcomes, though.
423 //     (At least, accelerated by the huffman table.)
424 //version = STB_VORBIS_FAST_HUFFMAN_INT;
425 version(STB_VORBIS_FAST_HUFFMAN_INT) {} else version = STB_VORBIS_FAST_HUFFMAN_SHORT;
426 
427 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
428 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
429 //     back on binary searching for the correct one. This requires storing
430 //     extra tables with the huffman codes in sorted order. Defining this
431 //     symbol trades off space for speed by forcing a linear search in the
432 //     non-fast case, except for "sparse" codebooks.
433 //version = STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH;
434 
435 // STB_VORBIS_DIVIDES_IN_RESIDUE
436 //     stb_vorbis precomputes the result of the scalar residue decoding
437 //     that would otherwise require a divide per chunk. you can trade off
438 //     space for time by defining this symbol.
439 //version = STB_VORBIS_DIVIDES_IN_RESIDUE;
440 
441 // STB_VORBIS_DIVIDES_IN_CODEBOOK
442 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
443 //     stored, or with all elements being chosen from a small range of values,
444 //     and all values possible in all elements. By default, stb_vorbis expands
445 //     this latter kind out to look like the former kind for ease of decoding,
446 //     because otherwise an integer divide-per-vector-element is required to
447 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
448 //     trade off storage for speed.
449 //version = STB_VORBIS_DIVIDES_IN_CODEBOOK;
450 
451 version(STB_VORBIS_CODEBOOK_SHORTS) static assert(0, "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats");
452 
453 // STB_VORBIS_DIVIDE_TABLE
454 //     this replaces small integer divides in the floor decode loop with
455 //     table lookups. made less than 1% difference, so disabled by default.
456 //version = STB_VORBIS_DIVIDE_TABLE;
457 
458 // STB_VORBIS_NO_DEFER_FLOOR
459 //     Normally we only decode the floor without synthesizing the actual
460 //     full curve. We can instead synthesize the curve immediately. This
461 //     requires more memory and is very likely slower, so I don't think
462 //     you'd ever want to do it except for debugging.
463 //version = STB_VORBIS_NO_DEFER_FLOOR;
464 //version(STB_VORBIS_CODEBOOK_FLOATS) static assert(0);
465 
466 
467 // ////////////////////////////////////////////////////////////////////////// //
468 private:
469 static assert(STB_VORBIS_MAX_CHANNELS <= 256, "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range");
470 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24, "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range");
471 
472 enum MAX_BLOCKSIZE_LOG = 13; // from specification
473 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
474 
475 
476 alias codetype = float;
477 
478 // @NOTE
479 //
480 // Some arrays below are tagged "//varies", which means it's actually
481 // a variable-sized piece of data, but rather than malloc I assume it's
482 // small enough it's better to just allocate it all together with the
483 // main thing
484 //
485 // Most of the variables are specified with the smallest size I could pack
486 // them into. It might give better performance to make them all full-sized
487 // integers. It should be safe to freely rearrange the structures or change
488 // the sizes larger--nothing relies on silently truncating etc., nor the
489 // order of variables.
490 
491 enum FAST_HUFFMAN_TABLE_SIZE = (1<<STB_VORBIS_FAST_HUFFMAN_LENGTH);
492 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE-1);
493 
494 struct Codebook {
495   int dimensions, entries;
496   ubyte* codeword_lengths;
497   float minimum_value;
498   float delta_value;
499   ubyte value_bits;
500   ubyte lookup_type;
501   ubyte sequence_p;
502   ubyte sparse;
503   uint lookup_values;
504   codetype* multiplicands;
505   uint *codewords;
506   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
507     short[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
508   } else {
509     int[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
510   }
511   uint* sorted_codewords;
512   int* sorted_values;
513   int sorted_entries;
514 }
515 
516 struct Floor0 {
517   ubyte order;
518   ushort rate;
519   ushort bark_map_size;
520   ubyte amplitude_bits;
521   ubyte amplitude_offset;
522   ubyte number_of_books;
523   ubyte[16] book_list; // varies
524 }
525 
526 struct Floor1 {
527   ubyte partitions;
528   ubyte[32] partition_class_list; // varies
529   ubyte[16] class_dimensions; // varies
530   ubyte[16] class_subclasses; // varies
531   ubyte[16] class_masterbooks; // varies
532   short[8][16] subclass_books; // varies
533   ushort[31*8+2] Xlist; // varies
534   ubyte[31*8+2] sorted_order;
535   ubyte[2][31*8+2] neighbors;
536   ubyte floor1_multiplier;
537   ubyte rangebits;
538   int values;
539 }
540 
541 union Floor {
542   Floor0 floor0;
543   Floor1 floor1;
544 }
545 
546 struct Residue {
547   uint begin, end;
548   uint part_size;
549   ubyte classifications;
550   ubyte classbook;
551   ubyte** classdata;
552   //int16 (*residue_books)[8];
553   short[8]* residue_books;
554 }
555 
556 struct MappingChannel {
557   ubyte magnitude;
558   ubyte angle;
559   ubyte mux;
560 }
561 
562 struct Mapping {
563   ushort coupling_steps;
564   MappingChannel* chan;
565   ubyte submaps;
566   ubyte[15] submap_floor; // varies
567   ubyte[15] submap_residue; // varies
568 }
569 
570 struct Mode {
571   ubyte blockflag;
572   ubyte mapping;
573   ushort windowtype;
574   ushort transformtype;
575 }
576 
577 struct CRCscan {
578   uint goal_crc;   // expected crc if match
579   int bytes_left;  // bytes left in packet
580   uint crc_so_far; // running crc
581   int bytes_done;  // bytes processed in _current_ chunk
582   uint sample_loc; // granule pos encoded in page
583 }
584 
585 struct ProbedPage {
586   uint page_start, page_end;
587   uint last_decoded_sample;
588 }
589 
590 private int error (VorbisDecoder f, STBVorbisError e) {
591   f.error = e;
592   if (!f.eof && e != STBVorbisError.need_more_data) {
593     // import std.stdio; debug writeln(e);
594     f.error = e; // breakpoint for debugging
595   }
596   return 0;
597 }
598 
599 // these functions are used for allocating temporary memory
600 // while decoding. if you can afford the stack space, use
601 // alloca(); otherwise, provide a temp buffer and it will
602 // allocate out of those.
603 uint temp_alloc_save (VorbisDecoder f) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); return f.alloc.tempSave(f); }
604 void temp_alloc_restore (VorbisDecoder f, uint p) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); f.alloc.tempRestore(p, f); }
605 void temp_free (VorbisDecoder f, void* p) nothrow @nogc {}
606 /*
607 T* temp_alloc(T) (VorbisDecoder f, uint count) nothrow @nogc {
608   auto res = f.alloc.alloc(count*T.sizeof, f);
609   return cast(T*)res;
610 }
611 */
612 
613 /+
614 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
615 
616 // has to be a mixin, due to `alloca`
617 template temp_alloc(string size) {
618   enum temp_alloc = q{(f.alloc.alloc_buffer ? setup_temp_malloc(f, (${size})) : alloca(${size}))}.cmacroFixVars!("size")(size);
619 }
620 
621 // has to be a mixin, due to `alloca`
622 template temp_block_array(string count, string size) {
623   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
624     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
625 }
626 +/
627 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
628 
629 template temp_alloc(string size) {
630   enum temp_alloc = q{alloca(${size})}.cmacroFixVars!("size")(size);
631 }
632 
633 template temp_block_array(string count, string size) {
634   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
635     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
636 }
637 
638 /*
639 T** temp_block_array(T) (VorbisDecoder f, uint count, uint size) {
640   size *= T.sizeof;
641   auto mem = f.alloc.alloc(count*(void*).sizeof+size, f);
642   if (mem !is null) make_block_array(mem, count, size);
643   return cast(T**)mem;
644 }
645 */
646 
647 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
648 private void* make_block_array (void* mem, int count, int size) {
649   void** p = cast(void**)mem;
650   char* q = cast(char*)(p+count);
651   foreach (immutable i; 0..count) {
652     p[i] = q;
653     q += size;
654   }
655   return p;
656 }
657 
658 private T* setup_malloc(T) (VorbisDecoder f, uint sz) {
659   sz *= T.sizeof;
660   /*
661   f.setup_memory_required += sz;
662   if (f.alloc.alloc_buffer) {
663     void* p = cast(char*)f.alloc.alloc_buffer+f.setup_offset;
664     if (f.setup_offset+sz > f.temp_offset) return null;
665     f.setup_offset += sz;
666     return cast(T*)p;
667   }
668   */
669   auto res = f.alloc.alloc(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
670   if (res !is null) {
671     import core.stdc.string : memset;
672     memset(res, 0, sz+8);
673   }
674   return cast(T*)res;
675 }
676 
677 private void setup_free (VorbisDecoder f, void* p) {
678   //if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
679   if (p !is null) f.alloc.free(p, f);
680 }
681 
682 private void* setup_temp_malloc (VorbisDecoder f, uint sz) {
683   auto res = f.alloc.allocTemp(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
684   if (res !is null) {
685     import core.stdc.string : memset;
686     memset(res, 0, sz+8);
687   }
688   return res;
689 }
690 
691 private void setup_temp_free (VorbisDecoder f, void* p, uint sz) {
692   if (p !is null) f.alloc.freeTemp(p, (sz ? sz : 1)+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
693 }
694 
695 immutable uint[256] crc_table;
696 shared static this () {
697   enum CRC32_POLY = 0x04c11db7; // from spec
698   // init crc32 table
699   foreach (uint i; 0..256) {
700     uint s = i<<24;
701     foreach (immutable _; 0..8) s = (s<<1)^(s >= (1U<<31) ? CRC32_POLY : 0);
702     crc_table[i] = s;
703   }
704 }
705 
706 uint crc32_update (uint crc, ubyte b) {
707   static if (__VERSION__ > 2067) pragma(inline, true);
708   return (crc<<8)^crc_table[b^(crc>>24)];
709 }
710 
711 // used in setup, and for huffman that doesn't go fast path
712 private uint bit_reverse (uint n) {
713   static if (__VERSION__ > 2067) pragma(inline, true);
714   n = ((n&0xAAAAAAAA)>>1)|((n&0x55555555)<<1);
715   n = ((n&0xCCCCCCCC)>>2)|((n&0x33333333)<<2);
716   n = ((n&0xF0F0F0F0)>>4)|((n&0x0F0F0F0F)<<4);
717   n = ((n&0xFF00FF00)>>8)|((n&0x00FF00FF)<<8);
718   return (n>>16)|(n<<16);
719 }
720 
721 private float square (float x) {
722   static if (__VERSION__ > 2067) pragma(inline, true);
723   return x*x;
724 }
725 
726 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
727 // as required by the specification. fast(?) implementation from stb.h
728 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
729 immutable byte[16] log2_4 = [0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4];
730 private int ilog (int n) {
731   //static if (__VERSION__ > 2067) pragma(inline, true);
732   if (n < 0) return 0; // signed n returns 0
733   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
734   if (n < (1<<14)) {
735     if (n < (1<<4)) return 0+log2_4[n];
736     if (n < (1<<9)) return 5+log2_4[n>>5];
737     return 10+log2_4[n>>10];
738   } else if (n < (1<<24)) {
739     if (n < (1<<19)) return 15+log2_4[n>>15];
740     return 20+log2_4[n>>20];
741   } else {
742     if (n < (1<<29)) return 25+log2_4[n>>25];
743     return 30+log2_4[n>>30];
744   }
745 }
746 
747 
748 // code length assigned to a value with no huffman encoding
749 enum NO_CODE = 255;
750 
751 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
752 //
753 // these functions are only called at setup, and only a few times per file
754 private float float32_unpack (uint x) {
755   import core.math : ldexp;
756   //static if (__VERSION__ > 2067) pragma(inline, true);
757   // from the specification
758   uint mantissa = x&0x1fffff;
759   uint sign = x&0x80000000;
760   uint exp = (x&0x7fe00000)>>21;
761   double res = (sign ? -cast(double)mantissa : cast(double)mantissa);
762   return cast(float)ldexp(cast(float)res, cast(int)exp-788);
763 }
764 
765 // zlib & jpeg huffman tables assume that the output symbols
766 // can either be arbitrarily arranged, or have monotonically
767 // increasing frequencies--they rely on the lengths being sorted;
768 // this makes for a very simple generation algorithm.
769 // vorbis allows a huffman table with non-sorted lengths. This
770 // requires a more sophisticated construction, since symbols in
771 // order do not map to huffman codes "in order".
772 private void add_entry (Codebook* c, uint huff_code, int symbol, int count, ubyte len, uint* values) {
773   if (!c.sparse) {
774     c.codewords[symbol] = huff_code;
775   } else {
776     c.codewords[count] = huff_code;
777     c.codeword_lengths[count] = len;
778     values[count] = symbol;
779   }
780 }
781 
782 private int compute_codewords (Codebook* c, ubyte* len, int n, uint* values) {
783   import core.stdc.string : memset;
784 
785   int i, k, m = 0;
786   uint[32] available;
787 
788   memset(available.ptr, 0, available.sizeof);
789   // find the first entry
790   for (k = 0; k < n; ++k) if (len[k] < NO_CODE) break;
791   if (k == n) { assert(c.sorted_entries == 0); return true; }
792   // add to the list
793   add_entry(c, 0, k, m++, len[k], values);
794   // add all available leaves
795   for (i = 1; i <= len[k]; ++i) available[i] = 1U<<(32-i);
796   // note that the above code treats the first case specially,
797   // but it's really the same as the following code, so they
798   // could probably be combined (except the initial code is 0,
799   // and I use 0 in available[] to mean 'empty')
800   for (i = k+1; i < n; ++i) {
801     uint res;
802     int z = len[i];
803     if (z == NO_CODE) continue;
804     // find lowest available leaf (should always be earliest,
805     // which is what the specification calls for)
806     // note that this property, and the fact we can never have
807     // more than one free leaf at a given level, isn't totally
808     // trivial to prove, but it seems true and the assert never
809     // fires, so!
810     while (z > 0 && !available[z]) --z;
811     if (z == 0) return false;
812     res = available[z];
813     assert(z >= 0 && z < 32);
814     available[z] = 0;
815     ubyte xxx = len[i];
816     add_entry(c,
817       bit_reverse(res),
818       i,
819       m++,
820       xxx, // dmd bug: it reads 4 bytes without temp
821       values);
822     // propogate availability up the tree
823     if (z != len[i]) {
824       assert(len[i] >= 0 && len[i] < 32);
825       for (int y = len[i]; y > z; --y) {
826         assert(available[y] == 0);
827         available[y] = res+(1<<(32-y));
828       }
829     }
830   }
831   return true;
832 }
833 
834 // accelerated huffman table allows fast O(1) match of all symbols
835 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
836 private void compute_accelerated_huffman (Codebook* c) {
837   //for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) c.fast_huffman.ptr[i] = -1;
838   c.fast_huffman.ptr[0..FAST_HUFFMAN_TABLE_SIZE] = -1;
839   auto len = (c.sparse ? c.sorted_entries : c.entries);
840   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
841     if (len > 32767) len = 32767; // largest possible value we can encode!
842   }
843   foreach (uint i; 0..len) {
844     if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
845       uint z = (c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]);
846       // set table entries for all bit combinations in the higher bits
847       while (z < FAST_HUFFMAN_TABLE_SIZE) {
848         c.fast_huffman.ptr[z] = cast(typeof(c.fast_huffman[0]))i; //k8
849         z += 1<<c.codeword_lengths[i];
850       }
851     }
852   }
853 }
854 
855 extern(C) int uint32_compare (const scope void* p, const scope void* q) {
856   uint x = *cast(uint*)p;
857   uint y = *cast(uint*)q;
858   return (x < y ? -1 : x > y);
859 }
860 
861 private int include_in_sort (Codebook* c, uint len) {
862   if (c.sparse) { assert(len != NO_CODE); return true; }
863   if (len == NO_CODE) return false;
864   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return true;
865   return false;
866 }
867 
868 // if the fast table above doesn't work, we want to binary
869 // search them... need to reverse the bits
870 private void compute_sorted_huffman (Codebook* c, ubyte* lengths, uint* values) {
871   // build a list of all the entries
872   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
873   // this is kind of a frivolous optimization--I don't see any performance improvement,
874   // but it's like 4 extra lines of code, so.
875   if (!c.sparse) {
876     int k = 0;
877     foreach (uint i; 0..c.entries) if (include_in_sort(c, lengths[i])) c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
878     assert(k == c.sorted_entries);
879   } else {
880     foreach (uint i; 0..c.sorted_entries) c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
881   }
882 
883   qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
884   c.sorted_codewords[c.sorted_entries] = 0xffffffff;
885 
886   auto len = (c.sparse ? c.sorted_entries : c.entries);
887   // now we need to indicate how they correspond; we could either
888   //   #1: sort a different data structure that says who they correspond to
889   //   #2: for each sorted entry, search the original list to find who corresponds
890   //   #3: for each original entry, find the sorted entry
891   // #1 requires extra storage, #2 is slow, #3 can use binary search!
892   foreach (uint i; 0..len) {
893     auto huff_len = (c.sparse ? lengths[values[i]] : lengths[i]);
894     if (include_in_sort(c, huff_len)) {
895       uint code = bit_reverse(c.codewords[i]);
896       int x = 0, n = c.sorted_entries;
897       while (n > 1) {
898         // invariant: sc[x] <= code < sc[x+n]
899         int m = x+(n>>1);
900         if (c.sorted_codewords[m] <= code) {
901           x = m;
902           n -= (n>>1);
903         } else {
904           n >>= 1;
905         }
906       }
907       assert(c.sorted_codewords[x] == code);
908       if (c.sparse) {
909         c.sorted_values[x] = values[i];
910         c.codeword_lengths[x] = huff_len;
911       } else {
912         c.sorted_values[x] = i;
913       }
914     }
915   }
916 }
917 
918 // only run while parsing the header (3 times)
919 private int vorbis_validate (const(void)* data) {
920   static if (__VERSION__ > 2067) pragma(inline, true);
921   immutable char[6] vorbis = "vorbis";
922   return ((cast(char*)data)[0..6] == vorbis[]);
923 }
924 
925 // called from setup only, once per code book
926 // (formula implied by specification)
927 private int lookup1_values (int entries, int dim) {
928   import core.stdc.math : lrintf;
929   import std.math : floor, exp, pow, log;
930   int r = cast(int)lrintf(floor(exp(cast(float)log(cast(float)entries)/dim)));
931   if (lrintf(floor(pow(cast(float)r+1, dim))) <= entries) ++r; // (int) cast for MinGW warning; floor() to avoid _ftol() when non-CRT
932   assert(pow(cast(float)r+1, dim) > entries);
933   assert(lrintf(floor(pow(cast(float)r, dim))) <= entries); // (int), floor() as above
934   return r;
935 }
936 
937 // called twice per file
938 private void compute_twiddle_factors (int n, float* A, float* B, float* C) {
939   import std.math : cos, sin, PI;
940   int n4 = n>>2, n8 = n>>3;
941   int k, k2;
942   for (k = k2 = 0; k < n4; ++k, k2 += 2) {
943     A[k2  ] = cast(float) cos(4*k*PI/n);
944     A[k2+1] = cast(float)-sin(4*k*PI/n);
945     B[k2  ] = cast(float) cos((k2+1)*PI/n/2)*0.5f;
946     B[k2+1] = cast(float) sin((k2+1)*PI/n/2)*0.5f;
947   }
948   for (k = k2 = 0; k < n8; ++k, k2 += 2) {
949     C[k2  ] = cast(float) cos(2*(k2+1)*PI/n);
950     C[k2+1] = cast(float)-sin(2*(k2+1)*PI/n);
951   }
952 }
953 
954 private void compute_window (int n, float* window) {
955   import std.math : sin, PI;
956   int n2 = n>>1;
957   foreach (int i; 0..n2) *window++ = cast(float)sin(0.5*PI*square(cast(float)sin((i-0+0.5)/n2*0.5*PI)));
958 }
959 
960 private void compute_bitreverse (int n, ushort* rev) {
961   int ld = ilog(n)-1; // ilog is off-by-one from normal definitions
962   int n8 = n>>3;
963   foreach (int i; 0..n8) *rev++ = cast(ushort)((bit_reverse(i)>>(32-ld+3))<<2); //k8
964 }
965 
966 private int init_blocksize (VorbisDecoder f, int b, int n) {
967   int n2 = n>>1, n4 = n>>2, n8 = n>>3;
968   f.A[b] = setup_malloc!float(f, n2);
969   f.B[b] = setup_malloc!float(f, n2);
970   f.C[b] = setup_malloc!float(f, n4);
971   if (f.A[b] is null || f.B[b] is null || f.C[b] is null) return error(f, STBVorbisError.outofmem);
972   compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
973   f.window[b] = setup_malloc!float(f, n2);
974   if (f.window[b] is null) return error(f, STBVorbisError.outofmem);
975   compute_window(n, f.window[b]);
976   f.bit_reverse[b] = setup_malloc!ushort(f, n8);
977   if (f.bit_reverse[b] is null) return error(f, STBVorbisError.outofmem);
978   compute_bitreverse(n, f.bit_reverse[b]);
979   return true;
980 }
981 
982 private void neighbors (ushort* x, int n, ushort* plow, ushort* phigh) {
983   int low = -1;
984   int high = 65536;
985   assert(n >= 0 && n <= ushort.max);
986   foreach (ushort i; 0..cast(ushort)n) {
987     if (x[i] > low  && x[i] < x[n]) { *plow = i; low = x[i]; }
988     if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
989   }
990 }
991 
992 // this has been repurposed so y is now the original index instead of y
993 struct Point {
994   ushort x, y;
995 }
996 
997 extern(C) int point_compare (const scope void *p, const scope void *q) {
998   auto a = cast(const(Point)*)p;
999   auto b = cast(const(Point)*)q;
1000   return (a.x < b.x ? -1 : a.x > b.x);
1001 }
1002 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1003 
1004 // ///////////////////////////////////////////////////////////////////// //
1005 private ubyte get8 (VorbisDecoder f) {
1006   ubyte b = void;
1007   if (!f.eof) {
1008     if (f.rawRead((&b)[0..1]) != 1) { f.eof = true; b = 0; }
1009   }
1010   return b;
1011 }
1012 
1013 private uint get32 (VorbisDecoder f) {
1014   uint x = 0;
1015   if (!f.eof) {
1016     version(LittleEndian) {
1017       if (f.rawRead((&x)[0..1]) != x.sizeof) { f.eof = true; x = 0; }
1018     } else {
1019       x = get8(f);
1020       x |= cast(uint)get8(f)<<8;
1021       x |= cast(uint)get8(f)<<16;
1022       x |= cast(uint)get8(f)<<24;
1023     }
1024   }
1025   return x;
1026 }
1027 
1028 private bool getn (VorbisDecoder f, void* data, int n) {
1029   if (f.eof || n < 0) return false;
1030   if (n == 0) return true;
1031   if (f.rawRead(data[0..n]) != n) { f.eof = true; return false; }
1032   return true;
1033 }
1034 
1035 private void skip (VorbisDecoder f, int n) {
1036   if (f.eof || n == 0) return;
1037   f.rawSkip(n);
1038 }
1039 
1040 private void set_file_offset (VorbisDecoder f, uint loc) {
1041   /+if (f.push_mode) return;+/
1042   f.eof = false;
1043   if (loc >= 0x80000000) { f.eof = true; return; }
1044   f.rawSeek(loc);
1045 }
1046 
1047 
1048 immutable char[4] ogg_page_header = "OggS"; //[ 0x4f, 0x67, 0x67, 0x53 ];
1049 
1050 private bool capture_pattern (VorbisDecoder f) {
1051   static if (__VERSION__ > 2067) pragma(inline, true);
1052   char[4] sign = void;
1053   if (!getn(f, sign.ptr, 4)) return false;
1054   return (sign == "OggS");
1055 }
1056 
1057 enum PAGEFLAG_continued_packet = 1;
1058 enum PAGEFLAG_first_page = 2;
1059 enum PAGEFLAG_last_page = 4;
1060 
1061 private int start_page_no_capturepattern (VorbisDecoder f) {
1062   uint loc0, loc1, n;
1063   // stream structure version
1064   if (get8(f) != 0) return error(f, STBVorbisError.invalid_stream_structure_version);
1065   // header flag
1066   f.page_flag = get8(f);
1067   // absolute granule position
1068   loc0 = get32(f);
1069   loc1 = get32(f);
1070   // @TODO: validate loc0, loc1 as valid positions?
1071   // stream serial number -- vorbis doesn't interleave, so discard
1072   get32(f);
1073   //if (f.serial != get32(f)) return error(f, STBVorbisError.incorrect_stream_serial_number);
1074   // page sequence number
1075   n = get32(f);
1076   f.last_page = n;
1077   // CRC32
1078   get32(f);
1079   // page_segments
1080   f.segment_count = get8(f);
1081   if (!getn(f, f.segments.ptr, f.segment_count)) return error(f, STBVorbisError.unexpected_eof);
1082   // assume we _don't_ know any the sample position of any segments
1083   f.end_seg_with_known_loc = -2;
1084   if (loc0 != ~0U || loc1 != ~0U) {
1085     int i;
1086     // determine which packet is the last one that will complete
1087     for (i = f.segment_count-1; i >= 0; --i) if (f.segments.ptr[i] < 255) break;
1088     // 'i' is now the index of the _last_ segment of a packet that ends
1089     if (i >= 0) {
1090       f.end_seg_with_known_loc = i;
1091       f.known_loc_for_packet = loc0;
1092     }
1093   }
1094   if (f.first_decode) {
1095     int len;
1096     ProbedPage p;
1097     len = 0;
1098     foreach (int i; 0..f.segment_count) len += f.segments.ptr[i];
1099     len += 27+f.segment_count;
1100     p.page_start = f.first_audio_page_offset;
1101     p.page_end = p.page_start+len;
1102     p.last_decoded_sample = loc0;
1103     f.p_first = p;
1104   }
1105   f.next_seg = 0;
1106   return true;
1107 }
1108 
1109 private int start_page (VorbisDecoder f) {
1110   if (!capture_pattern(f)) return error(f, STBVorbisError.missing_capture_pattern);
1111   return start_page_no_capturepattern(f);
1112 }
1113 
1114 private int start_packet (VorbisDecoder f) {
1115   while (f.next_seg == -1) {
1116     if (!start_page(f)) return false;
1117     if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.continued_packet_flag_invalid);
1118   }
1119   f.last_seg = false;
1120   f.valid_bits = 0;
1121   f.packet_bytes = 0;
1122   f.bytes_in_seg = 0;
1123   // f.next_seg is now valid
1124   return true;
1125 }
1126 
1127 private int maybe_start_packet (VorbisDecoder f) {
1128   if (f.next_seg == -1) {
1129     auto x = get8(f);
1130     if (f.eof) return false; // EOF at page boundary is not an error!
1131     // import std.stdio; debug writefln("CAPTURE %x %x", x, f.stpos);
1132     if (0x4f != x      ) return error(f, STBVorbisError.missing_capture_pattern);
1133     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1134     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1135     if (0x53 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1136     if (!start_page_no_capturepattern(f)) return false;
1137     if (f.page_flag&PAGEFLAG_continued_packet) {
1138       // set up enough state that we can read this packet if we want,
1139       // e.g. during recovery
1140       f.last_seg = false;
1141       f.bytes_in_seg = 0;
1142       return error(f, STBVorbisError.continued_packet_flag_invalid);
1143     }
1144   }
1145   return start_packet(f);
1146 }
1147 
1148 private int next_segment (VorbisDecoder f) {
1149   if (f.last_seg) return 0;
1150   if (f.next_seg == -1) {
1151     f.last_seg_which = f.segment_count-1; // in case start_page fails
1152     if (!start_page(f)) { f.last_seg = 1; return 0; }
1153     if (!(f.page_flag&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.continued_packet_flag_invalid);
1154   }
1155   auto len = f.segments.ptr[f.next_seg++];
1156   if (len < 255) {
1157     f.last_seg = true;
1158     f.last_seg_which = f.next_seg-1;
1159   }
1160   if (f.next_seg >= f.segment_count) f.next_seg = -1;
1161   debug(stb_vorbis) assert(f.bytes_in_seg == 0);
1162   f.bytes_in_seg = len;
1163   return len;
1164 }
1165 
1166 enum EOP = (-1);
1167 enum INVALID_BITS = (-1);
1168 
1169 private int get8_packet_raw (VorbisDecoder f) {
1170   if (!f.bytes_in_seg) {  // CLANG!
1171     if (f.last_seg) return EOP;
1172     else if (!next_segment(f)) return EOP;
1173   }
1174   debug(stb_vorbis) assert(f.bytes_in_seg > 0);
1175   --f.bytes_in_seg;
1176   ++f.packet_bytes;
1177   return get8(f);
1178 }
1179 
1180 private int get8_packet (VorbisDecoder f) {
1181   int x = get8_packet_raw(f);
1182   f.valid_bits = 0;
1183   return x;
1184 }
1185 
1186 private uint get32_packet (VorbisDecoder f) {
1187   uint x = get8_packet(f), b;
1188   if (x == EOP) return EOP;
1189   if ((b = get8_packet(f)) == EOP) return EOP;
1190   x += b<<8;
1191   if ((b = get8_packet(f)) == EOP) return EOP;
1192   x += b<<16;
1193   if ((b = get8_packet(f)) == EOP) return EOP;
1194   x += b<<24;
1195   return x;
1196 }
1197 
1198 private void flush_packet (VorbisDecoder f) {
1199   while (get8_packet_raw(f) != EOP) {}
1200 }
1201 
1202 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1203 // as the huffman decoder?
1204 private uint get_bits_main (VorbisDecoder f, int n) {
1205   uint z;
1206   if (f.valid_bits < 0) return 0;
1207   if (f.valid_bits < n) {
1208     if (n > 24) {
1209       // the accumulator technique below would not work correctly in this case
1210       z = get_bits_main(f, 24);
1211       z += get_bits_main(f, n-24)<<24;
1212       return z;
1213     }
1214     if (f.valid_bits == 0) f.acc = 0;
1215     while (f.valid_bits < n) {
1216       z = get8_packet_raw(f);
1217       if (z == EOP) {
1218         f.valid_bits = INVALID_BITS;
1219         return 0;
1220       }
1221       f.acc += z<<f.valid_bits;
1222       f.valid_bits += 8;
1223     }
1224   }
1225   if (f.valid_bits < 0) return 0;
1226   z = f.acc&((1<<n)-1);
1227   f.acc >>= n;
1228   f.valid_bits -= n;
1229   return z;
1230 }
1231 
1232 // chooses minimal possible integer type
1233 private auto get_bits(ubyte n) (VorbisDecoder f) if (n >= 1 && n <= 64) {
1234   static if (n <= 8) return cast(ubyte)get_bits_main(f, n);
1235   else static if (n <= 16) return cast(ushort)get_bits_main(f, n);
1236   else static if (n <= 32) return cast(uint)get_bits_main(f, n);
1237   else static if (n <= 64) return cast(ulong)get_bits_main(f, n);
1238   else static assert(0, "wtf?!");
1239 }
1240 
1241 // chooses minimal possible integer type, assume no overflow
1242 private auto get_bits_add_no(ubyte n) (VorbisDecoder f, ubyte add) if (n >= 1 && n <= 64) {
1243   static if (n <= 8) return cast(ubyte)(get_bits_main(f, n)+add);
1244   else static if (n <= 16) return cast(ushort)(get_bits_main(f, n)+add);
1245   else static if (n <= 32) return cast(uint)(get_bits_main(f, n)+add);
1246   else static if (n <= 64) return cast(ulong)(get_bits_main(f, n)+add);
1247   else static assert(0, "wtf?!");
1248 }
1249 
1250 // @OPTIMIZE: primary accumulator for huffman
1251 // expand the buffer to as many bits as possible without reading off end of packet
1252 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1253 // e.g. cache them locally and decode locally
1254 //private /*__forceinline*/ void prep_huffman (VorbisDecoder f)
1255 enum PrepHuffmanMixin = q{
1256   if (f.valid_bits <= 24) {
1257     if (f.valid_bits == 0) f.acc = 0;
1258     int phmz = void;
1259     do {
1260       if (f.last_seg && !f.bytes_in_seg) break;
1261       phmz = get8_packet_raw(f);
1262       if (phmz == EOP) break;
1263       f.acc += cast(uint)phmz<<f.valid_bits;
1264       f.valid_bits += 8;
1265     } while (f.valid_bits <= 24);
1266   }
1267 };
1268 
1269 enum VorbisPacket {
1270   id = 1,
1271   comment = 3,
1272   setup = 5,
1273 }
1274 
1275 private int codebook_decode_scalar_raw (VorbisDecoder f, Codebook *c) {
1276   mixin(PrepHuffmanMixin);
1277 
1278   if (c.codewords is null && c.sorted_codewords is null) return -1;
1279   // cases to use binary search: sorted_codewords && !c.codewords
1280   //                             sorted_codewords && c.entries > 8
1281   auto cond = (c.entries > 8 ? c.sorted_codewords !is null : !c.codewords);
1282   if (cond) {
1283     // binary search
1284     uint code = bit_reverse(f.acc);
1285     int x = 0, n = c.sorted_entries, len;
1286     while (n > 1) {
1287       // invariant: sc[x] <= code < sc[x+n]
1288       int m = x+(n>>1);
1289       if (c.sorted_codewords[m] <= code) {
1290         x = m;
1291         n -= (n>>1);
1292       } else {
1293         n >>= 1;
1294       }
1295     }
1296     // x is now the sorted index
1297     if (!c.sparse) x = c.sorted_values[x];
1298     // x is now sorted index if sparse, or symbol otherwise
1299     len = c.codeword_lengths[x];
1300     if (f.valid_bits >= len) {
1301       f.acc >>= len;
1302       f.valid_bits -= len;
1303       return x;
1304     }
1305     f.valid_bits = 0;
1306     return -1;
1307   }
1308   // if small, linear search
1309   debug(stb_vorbis) assert(!c.sparse);
1310   foreach (uint i; 0..c.entries) {
1311     if (c.codeword_lengths[i] == NO_CODE) continue;
1312     if (c.codewords[i] == (f.acc&((1<<c.codeword_lengths[i])-1))) {
1313       if (f.valid_bits >= c.codeword_lengths[i]) {
1314         f.acc >>= c.codeword_lengths[i];
1315         f.valid_bits -= c.codeword_lengths[i];
1316         return i;
1317       }
1318       f.valid_bits = 0;
1319       return -1;
1320     }
1321   }
1322   error(f, STBVorbisError.invalid_stream);
1323   f.valid_bits = 0;
1324   return -1;
1325 }
1326 
1327 
1328 template DECODE_RAW(string var, string c) {
1329   enum DECODE_RAW = q{
1330     if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) { mixin(PrepHuffmanMixin); }
1331     // fast huffman table lookup
1332     ${i} = f.acc&FAST_HUFFMAN_TABLE_MASK;
1333     ${i} = ${c}.fast_huffman.ptr[${i}];
1334     if (${i} >= 0) {
1335       auto ${__temp_prefix__}n = ${c}.codeword_lengths[${i}];
1336       f.acc >>= ${__temp_prefix__}n;
1337       f.valid_bits -= ${__temp_prefix__}n;
1338       if (f.valid_bits < 0) { f.valid_bits = 0; ${i} = -1; }
1339     } else {
1340       ${i} = codebook_decode_scalar_raw(f, ${c});
1341     }
1342   }.cmacroFixVars!("i", "c")(var, c);
1343 }
1344 
1345 enum DECODE(string var, string c) = q{
1346   ${DECODE_RAW}
1347   if (${c}.sparse) ${var} = ${c}.sorted_values[${var}];
1348 }.cmacroFixVars!("var", "c", "DECODE_RAW")(var, c, DECODE_RAW!(var, c));
1349 
1350 
1351 version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1352   alias DECODE_VQ = DECODE;
1353 } else {
1354   alias DECODE_VQ = DECODE_RAW;
1355 }
1356 
1357 
1358 
1359 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1360 // where we avoid one addition
1361 enum CODEBOOK_ELEMENT(string c, string off) = "("~c~".multiplicands["~off~"])";
1362 enum CODEBOOK_ELEMENT_FAST(string c, string off) = "("~c~".multiplicands["~off~"])";
1363 enum CODEBOOK_ELEMENT_BASE(string c) = "(0)";
1364 
1365 
1366 private int codebook_decode_start (VorbisDecoder f, Codebook* c) {
1367   int z = -1;
1368   // type 0 is only legal in a scalar context
1369   if (c.lookup_type == 0) {
1370     error(f, STBVorbisError.invalid_stream);
1371   } else {
1372     mixin(DECODE_VQ!("z", "c"));
1373     debug(stb_vorbis) if (c.sparse) assert(z < c.sorted_entries);
1374     if (z < 0) {  // check for EOP
1375       if (!f.bytes_in_seg && f.last_seg) return z;
1376       error(f, STBVorbisError.invalid_stream);
1377     }
1378   }
1379   return z;
1380 }
1381 
1382 private int codebook_decode (VorbisDecoder f, Codebook* c, float* output, int len) {
1383   int z = codebook_decode_start(f, c);
1384   if (z < 0) return false;
1385   if (len > c.dimensions) len = c.dimensions;
1386 
1387   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1388     if (c.lookup_type == 1) {
1389       float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1390       int div = 1;
1391       foreach (immutable i; 0..len) {
1392         int off = (z/div)%c.lookup_values;
1393         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1394         output[i] += val;
1395         if (c.sequence_p) last = val+c.minimum_value;
1396         div *= c.lookup_values;
1397       }
1398       return true;
1399     }
1400   }
1401 
1402   z *= c.dimensions;
1403   if (c.sequence_p) {
1404     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1405     foreach (immutable i; 0..len) {
1406       float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1407       output[i] += val;
1408       last = val+c.minimum_value;
1409     }
1410   } else {
1411     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1412     foreach (immutable i; 0..len) output[i] += mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1413   }
1414 
1415   return true;
1416 }
1417 
1418 private int codebook_decode_step (VorbisDecoder f, Codebook* c, float* output, int len, int step) {
1419   int z = codebook_decode_start(f, c);
1420   float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1421   if (z < 0) return false;
1422   if (len > c.dimensions) len = c.dimensions;
1423 
1424   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1425     if (c.lookup_type == 1) {
1426       int div = 1;
1427       foreach (immutable i; 0..len) {
1428         int off = (z/div)%c.lookup_values;
1429         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1430         output[i*step] += val;
1431         if (c.sequence_p) last = val;
1432         div *= c.lookup_values;
1433       }
1434       return true;
1435     }
1436   }
1437 
1438   z *= c.dimensions;
1439   foreach (immutable i; 0..len) {
1440     float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1441     output[i*step] += val;
1442     if (c.sequence_p) last = val;
1443   }
1444 
1445   return true;
1446 }
1447 
1448 private int codebook_decode_deinterleave_repeat (VorbisDecoder f, Codebook* c, ref float*[STB_VORBIS_MAX_CHANNELS] outputs, int ch, int* c_inter_p, int* p_inter_p, int len, int total_decode) {
1449   int c_inter = *c_inter_p;
1450   int p_inter = *p_inter_p;
1451   int z, effective = c.dimensions;
1452 
1453   // type 0 is only legal in a scalar context
1454   if (c.lookup_type == 0) return error(f, STBVorbisError.invalid_stream);
1455 
1456   while (total_decode > 0) {
1457     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1458     mixin(DECODE_VQ!("z", "c"));
1459     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
1460       debug(stb_vorbis) assert(!c.sparse || z < c.sorted_entries);
1461     }
1462     if (z < 0) {
1463       if (!f.bytes_in_seg && f.last_seg) return false;
1464       return error(f, STBVorbisError.invalid_stream);
1465     }
1466 
1467     // if this will take us off the end of the buffers, stop short!
1468     // we check by computing the length of the virtual interleaved
1469     // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1470     // and the length we'll be using (effective)
1471     if (c_inter+p_inter*ch+effective > len*ch) effective = len*ch-(p_inter*ch-c_inter);
1472 
1473     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1474       if (c.lookup_type == 1) {
1475         int div = 1;
1476         foreach (immutable i; 0..effective) {
1477           int off = (z/div)%c.lookup_values;
1478           float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1479           if (outputs.ptr[c_inter]) outputs.ptr[c_inter].ptr[p_inter] += val;
1480           if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1481           if (c.sequence_p) last = val;
1482           div *= c.lookup_values;
1483         }
1484         goto skipit;
1485       }
1486     }
1487     z *= c.dimensions;
1488     if (c.sequence_p) {
1489       foreach (immutable i; 0..effective) {
1490         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1491         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1492         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1493         last = val;
1494       }
1495     } else {
1496       foreach (immutable i; 0..effective) {
1497         float val = mixin(CODEBOOK_ELEMENT_FAST!("c","z+i"))+last;
1498         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1499         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1500       }
1501     }
1502    skipit:
1503     total_decode -= effective;
1504   }
1505   *c_inter_p = c_inter;
1506   *p_inter_p = p_inter;
1507   return true;
1508 }
1509 
1510 //private int predict_point (int x, int x0, int x1, int y0, int y1)
1511 enum predict_point(string dest, string x, string x0, string x1, string y0, string y1) = q{{
1512   //import std.math : abs;
1513   int dy = ${y1}-${y0};
1514   int adx = ${x1}-${x0};
1515   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1516   int err = /*abs(dy)*/(dy < 0 ? -dy : dy)*(${x}-${x0});
1517   int off = err/adx;
1518   /*return*/${dest} = (dy < 0 ? ${y0}-off : ${y0}+off);
1519 }}.cmacroFixVars!("dest", "x", "x0", "x1", "y0", "y1")(dest, x, x0, x1, y0, y1);
1520 
1521 // the following table is block-copied from the specification
1522 immutable float[256] inverse_db_table = [
1523   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1524   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1525   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1526   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1527   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1528   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1529   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1530   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1531   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1532   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1533   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1534   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1535   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1536   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1537   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1538   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1539   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1540   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1541   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1542   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1543   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1544   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1545   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1546   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1547   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1548   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1549   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1550   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1551   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1552   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1553   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1554   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1555   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1556   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1557   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1558   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1559   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1560   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1561   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1562   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1563   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1564   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1565   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1566   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1567   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1568   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1569   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1570   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1571   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1572   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1573   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1574   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1575   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1576   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1577   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1578   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1579   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1580   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1581   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1582   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1583   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1584   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1585   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1586   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1587 ];
1588 
1589 
1590 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1591 // note that you must produce bit-identical output to decode correctly;
1592 // this specific sequence of operations is specified in the spec (it's
1593 // drawing integer-quantized frequency-space lines that the encoder
1594 // expects to be exactly the same)
1595 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1596 // have to divide in the setup? sigh.
1597 version(STB_VORBIS_NO_DEFER_FLOOR) {
1598   enum LINE_OP(string a, string b) = a~" = "~b~";";
1599 } else {
1600   enum LINE_OP(string a, string b) = a~" *= "~b~";";
1601 }
1602 
1603 version(STB_VORBIS_DIVIDE_TABLE) {
1604   enum DIVTAB_NUMER = 32;
1605   enum DIVTAB_DENOM = 64;
1606   byte[DIVTAB_DENOM][DIVTAB_NUMER] integer_divide_table; // 2KB
1607 }
1608 
1609 // nobranch abs trick
1610 enum ABS(string v) = q{(((${v})+((${v})>>31))^((${v})>>31))}.cmacroFixVars!"v"(v);
1611 
1612 // this is forceinline, but dmd inliner sux
1613 // but hey, i have my k00l macrosystem!
1614 //void draw_line (float* ${output}, int ${x0}, int ${y0}, int ${x1}, int ${y1}, int ${n})
1615 enum draw_line(string output, string x0, string y0, string x1, string y1, string n) = q{{
1616   int ${__temp_prefix__}dy = ${y1}-${y0};
1617   int ${__temp_prefix__}adx = ${x1}-${x0};
1618   int ${__temp_prefix__}ady = mixin(ABS!"${__temp_prefix__}dy");
1619   int ${__temp_prefix__}base;
1620   int ${__temp_prefix__}x = ${x0}, ${__temp_prefix__}y = ${y0};
1621   int ${__temp_prefix__}err = 0;
1622   int ${__temp_prefix__}sy;
1623 
1624   version(STB_VORBIS_DIVIDE_TABLE) {
1625     if (${__temp_prefix__}adx < DIVTAB_DENOM && ${__temp_prefix__}ady < DIVTAB_NUMER) {
1626       if (${__temp_prefix__}dy < 0) {
1627         ${__temp_prefix__}base = -integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1628         ${__temp_prefix__}sy = ${__temp_prefix__}base-1;
1629       } else {
1630         ${__temp_prefix__}base = integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1631         ${__temp_prefix__}sy = ${__temp_prefix__}base+1;
1632       }
1633     } else {
1634       ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1635       ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1636     }
1637   } else {
1638     ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1639     ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1640   }
1641   ${__temp_prefix__}ady -= mixin(ABS!"${__temp_prefix__}base")*${__temp_prefix__}adx;
1642   if (${x1} > ${n}) ${x1} = ${n};
1643   if (${__temp_prefix__}x < ${x1}) {
1644     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1645     for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1646       ${__temp_prefix__}err += ${__temp_prefix__}ady;
1647       if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1648         ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1649         ${__temp_prefix__}y += ${__temp_prefix__}sy;
1650       } else {
1651         ${__temp_prefix__}y += ${__temp_prefix__}base;
1652       }
1653       mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1654     }
1655   }
1656   /*
1657   mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1658   for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1659     ${__temp_prefix__}err += ${__temp_prefix__}ady;
1660     if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1661       ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1662       ${__temp_prefix__}y += ${__temp_prefix__}sy;
1663     } else {
1664       ${__temp_prefix__}y += ${__temp_prefix__}base;
1665     }
1666     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1667   }
1668   */
1669 }}.cmacroFixVars!("output", "x0", "y0", "x1", "y1", "n")(output, x0, y0, x1, y1, n);
1670 
1671 private int residue_decode (VorbisDecoder f, Codebook* book, float* target, int offset, int n, int rtype) {
1672   if (rtype == 0) {
1673     int step = n/book.dimensions;
1674     foreach (immutable k; 0..step) if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) return false;
1675   } else {
1676     for (int k = 0; k < n; ) {
1677       if (!codebook_decode(f, book, target+offset, n-k)) return false;
1678       k += book.dimensions;
1679       offset += book.dimensions;
1680     }
1681   }
1682   return true;
1683 }
1684 
1685 private void decode_residue (VorbisDecoder f, ref float*[STB_VORBIS_MAX_CHANNELS] residue_buffers, int ch, int n, int rn, ubyte* do_not_decode) {
1686   import core.stdc.stdlib : alloca;
1687   import core.stdc.string : memset;
1688 
1689   Residue* r = f.residue_config+rn;
1690   int rtype = f.residue_types.ptr[rn];
1691   int c = r.classbook;
1692   int classwords = f.codebooks[c].dimensions;
1693   int n_read = r.end-r.begin;
1694   int part_read = n_read/r.part_size;
1695   uint temp_alloc_point = temp_alloc_save(f);
1696   version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1697     int** classifications = cast(int**)mixin(temp_block_array!("f.vrchannels", "part_read*int.sizeof"));
1698   } else {
1699     ubyte*** part_classdata = cast(ubyte***)mixin(temp_block_array!("f.vrchannels", "part_read*cast(int)(ubyte*).sizeof"));
1700   }
1701 
1702   //stb_prof(2);
1703   foreach (immutable i; 0..ch) if (!do_not_decode[i]) memset(residue_buffers.ptr[i], 0, float.sizeof*n);
1704 
1705   if (rtype == 2 && ch != 1) {
1706     int j = void;
1707     for (j = 0; j < ch; ++j) if (!do_not_decode[j]) break;
1708     if (j == ch) goto done;
1709 
1710     //stb_prof(3);
1711     foreach (immutable pass; 0..8) {
1712       int pcount = 0, class_set = 0;
1713       if (ch == 2) {
1714         //stb_prof(13);
1715         while (pcount < part_read) {
1716           int z = r.begin+pcount*r.part_size;
1717           int c_inter = (z&1), p_inter = z>>1;
1718           if (pass == 0) {
1719             Codebook *cc = f.codebooks+r.classbook;
1720             int q;
1721             mixin(DECODE!("q", "cc"));
1722             if (q == EOP) goto done;
1723             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1724               for (int i = classwords-1; i >= 0; --i) {
1725                 classifications[0].ptr[i+pcount] = q%r.classifications;
1726                 q /= r.classifications;
1727               }
1728             } else {
1729               part_classdata[0][class_set] = r.classdata[q];
1730             }
1731           }
1732           //stb_prof(5);
1733           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1734             int zz = r.begin+pcount*r.part_size;
1735             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1736               int cc = classifications[0].ptr[pcount];
1737             } else {
1738               int cc = part_classdata[0][class_set][i];
1739             }
1740             int b = r.residue_books[cc].ptr[pass];
1741             if (b >= 0) {
1742               Codebook* book = f.codebooks+b;
1743               //stb_prof(20); // accounts for X time
1744               version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1745                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1746               } else {
1747                 // saves 1%
1748                 //if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r.part_size)) goto done; // according to C source
1749                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1750               }
1751               //stb_prof(7);
1752             } else {
1753               zz += r.part_size;
1754               c_inter = zz&1;
1755               p_inter = zz>>1;
1756             }
1757           }
1758           //stb_prof(8);
1759           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1760             ++class_set;
1761           }
1762         }
1763       } else if (ch == 1) {
1764         while (pcount < part_read) {
1765           int z = r.begin+pcount*r.part_size;
1766           int c_inter = 0, p_inter = z;
1767           if (pass == 0) {
1768             Codebook* cc = f.codebooks+r.classbook;
1769             int q;
1770             mixin(DECODE!("q", "cc"));
1771             if (q == EOP) goto done;
1772             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1773               for (int i = classwords-1; i >= 0; --i) {
1774                 classifications[0].ptr[i+pcount] = q%r.classifications;
1775                 q /= r.classifications;
1776               }
1777             } else {
1778               part_classdata[0][class_set] = r.classdata[q];
1779             }
1780           }
1781           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1782             int zz = r.begin+pcount*r.part_size;
1783             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1784               int cc = classifications[0].ptr[pcount];
1785             } else {
1786               int cc = part_classdata[0][class_set][i];
1787             }
1788             int b = r.residue_books[cc].ptr[pass];
1789             if (b >= 0) {
1790               Codebook* book = f.codebooks+b;
1791               //stb_prof(22);
1792               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1793               //stb_prof(3);
1794             } else {
1795               zz += r.part_size;
1796               c_inter = 0;
1797               p_inter = zz;
1798             }
1799           }
1800           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1801             ++class_set;
1802           }
1803         }
1804       } else {
1805         while (pcount < part_read) {
1806           int z = r.begin+pcount*r.part_size;
1807           int c_inter = z%ch, p_inter = z/ch;
1808           if (pass == 0) {
1809             Codebook* cc = f.codebooks+r.classbook;
1810             int q;
1811             mixin(DECODE!("q", "cc"));
1812             if (q == EOP) goto done;
1813             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1814               for (int i = classwords-1; i >= 0; --i) {
1815                 classifications[0].ptr[i+pcount] = q%r.classifications;
1816                 q /= r.classifications;
1817               }
1818             } else {
1819               part_classdata[0][class_set] = r.classdata[q];
1820             }
1821           }
1822           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1823             int zz = r.begin+pcount*r.part_size;
1824             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1825               int cc = classifications[0].ptr[pcount];
1826             } else {
1827               int cc = part_classdata[0][class_set][i];
1828             }
1829             int b = r.residue_books[cc].ptr[pass];
1830             if (b >= 0) {
1831               Codebook* book = f.codebooks+b;
1832               //stb_prof(22);
1833               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1834               //stb_prof(3);
1835             } else {
1836               zz += r.part_size;
1837               c_inter = zz%ch;
1838               p_inter = zz/ch;
1839             }
1840           }
1841           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1842             ++class_set;
1843           }
1844         }
1845       }
1846     }
1847     goto done;
1848   }
1849   //stb_prof(9);
1850 
1851   foreach (immutable pass; 0..8) {
1852     int pcount = 0, class_set=0;
1853     while (pcount < part_read) {
1854       if (pass == 0) {
1855         foreach (immutable j; 0..ch) {
1856           if (!do_not_decode[j]) {
1857             Codebook* cc = f.codebooks+r.classbook;
1858             int temp;
1859             mixin(DECODE!("temp", "cc"));
1860             if (temp == EOP) goto done;
1861             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1862               for (int i = classwords-1; i >= 0; --i) {
1863                 classifications[j].ptr[i+pcount] = temp%r.classifications;
1864                 temp /= r.classifications;
1865               }
1866             } else {
1867               part_classdata[j][class_set] = r.classdata[temp];
1868             }
1869           }
1870         }
1871       }
1872       for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1873         foreach (immutable j; 0..ch) {
1874           if (!do_not_decode[j]) {
1875             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1876               int cc = classifications[j].ptr[pcount];
1877             } else {
1878               int cc = part_classdata[j][class_set][i];
1879             }
1880             int b = r.residue_books[cc].ptr[pass];
1881             if (b >= 0) {
1882               float* target = residue_buffers.ptr[j];
1883               int offset = r.begin+pcount*r.part_size;
1884               int nn = r.part_size;
1885               Codebook* book = f.codebooks+b;
1886               if (!residue_decode(f, book, target, offset, nn, rtype)) goto done;
1887             }
1888           }
1889         }
1890       }
1891       version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1892         ++class_set;
1893       }
1894     }
1895   }
1896  done:
1897   //stb_prof(0);
1898   version(STB_VORBIS_DIVIDES_IN_RESIDUE) temp_free(f, classifications); else temp_free(f, part_classdata);
1899   temp_alloc_restore(f, temp_alloc_point);
1900 }
1901 
1902 
1903 // the following were split out into separate functions while optimizing;
1904 // they could be pushed back up but eh. __forceinline showed no change;
1905 // they're probably already being inlined.
1906 private void imdct_step3_iter0_loop (int n, float* e, int i_off, int k_off, float* A) {
1907   float* ee0 = e+i_off;
1908   float* ee2 = ee0+k_off;
1909   debug(stb_vorbis) assert((n&3) == 0);
1910   foreach (immutable _; 0..n>>2) {
1911     float k00_20, k01_21;
1912     k00_20 = ee0[ 0]-ee2[ 0];
1913     k01_21 = ee0[-1]-ee2[-1];
1914     ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0]+ee2[ 0];
1915     ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1]+ee2[-1];
1916     ee2[ 0] = k00_20*A[0]-k01_21*A[1];
1917     ee2[-1] = k01_21*A[0]+k00_20*A[1];
1918     A += 8;
1919 
1920     k00_20 = ee0[-2]-ee2[-2];
1921     k01_21 = ee0[-3]-ee2[-3];
1922     ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2]+ee2[-2];
1923     ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3]+ee2[-3];
1924     ee2[-2] = k00_20*A[0]-k01_21*A[1];
1925     ee2[-3] = k01_21*A[0]+k00_20*A[1];
1926     A += 8;
1927 
1928     k00_20 = ee0[-4]-ee2[-4];
1929     k01_21 = ee0[-5]-ee2[-5];
1930     ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4]+ee2[-4];
1931     ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5]+ee2[-5];
1932     ee2[-4] = k00_20*A[0]-k01_21*A[1];
1933     ee2[-5] = k01_21*A[0]+k00_20*A[1];
1934     A += 8;
1935 
1936     k00_20 = ee0[-6]-ee2[-6];
1937     k01_21 = ee0[-7]-ee2[-7];
1938     ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6]+ee2[-6];
1939     ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7]+ee2[-7];
1940     ee2[-6] = k00_20*A[0]-k01_21*A[1];
1941     ee2[-7] = k01_21*A[0]+k00_20*A[1];
1942     A += 8;
1943     ee0 -= 8;
1944     ee2 -= 8;
1945   }
1946 }
1947 
1948 private void imdct_step3_inner_r_loop (int lim, float* e, int d0, int k_off, float* A, int k1) {
1949   float k00_20, k01_21;
1950   float* e0 = e+d0;
1951   float* e2 = e0+k_off;
1952   foreach (immutable _; 0..lim>>2) {
1953     k00_20 = e0[-0]-e2[-0];
1954     k01_21 = e0[-1]-e2[-1];
1955     e0[-0] += e2[-0];//e0[-0] = e0[-0]+e2[-0];
1956     e0[-1] += e2[-1];//e0[-1] = e0[-1]+e2[-1];
1957     e2[-0] = (k00_20)*A[0]-(k01_21)*A[1];
1958     e2[-1] = (k01_21)*A[0]+(k00_20)*A[1];
1959 
1960     A += k1;
1961 
1962     k00_20 = e0[-2]-e2[-2];
1963     k01_21 = e0[-3]-e2[-3];
1964     e0[-2] += e2[-2];//e0[-2] = e0[-2]+e2[-2];
1965     e0[-3] += e2[-3];//e0[-3] = e0[-3]+e2[-3];
1966     e2[-2] = (k00_20)*A[0]-(k01_21)*A[1];
1967     e2[-3] = (k01_21)*A[0]+(k00_20)*A[1];
1968 
1969     A += k1;
1970 
1971     k00_20 = e0[-4]-e2[-4];
1972     k01_21 = e0[-5]-e2[-5];
1973     e0[-4] += e2[-4];//e0[-4] = e0[-4]+e2[-4];
1974     e0[-5] += e2[-5];//e0[-5] = e0[-5]+e2[-5];
1975     e2[-4] = (k00_20)*A[0]-(k01_21)*A[1];
1976     e2[-5] = (k01_21)*A[0]+(k00_20)*A[1];
1977 
1978     A += k1;
1979 
1980     k00_20 = e0[-6]-e2[-6];
1981     k01_21 = e0[-7]-e2[-7];
1982     e0[-6] += e2[-6];//e0[-6] = e0[-6]+e2[-6];
1983     e0[-7] += e2[-7];//e0[-7] = e0[-7]+e2[-7];
1984     e2[-6] = (k00_20)*A[0]-(k01_21)*A[1];
1985     e2[-7] = (k01_21)*A[0]+(k00_20)*A[1];
1986 
1987     e0 -= 8;
1988     e2 -= 8;
1989 
1990     A += k1;
1991   }
1992 }
1993 
1994 private void imdct_step3_inner_s_loop (int n, float* e, int i_off, int k_off, float* A, int a_off, int k0) {
1995   float A0 = A[0];
1996   float A1 = A[0+1];
1997   float A2 = A[0+a_off];
1998   float A3 = A[0+a_off+1];
1999   float A4 = A[0+a_off*2+0];
2000   float A5 = A[0+a_off*2+1];
2001   float A6 = A[0+a_off*3+0];
2002   float A7 = A[0+a_off*3+1];
2003   float k00, k11;
2004   float *ee0 = e  +i_off;
2005   float *ee2 = ee0+k_off;
2006   foreach (immutable _; 0..n) {
2007     k00 = ee0[ 0]-ee2[ 0];
2008     k11 = ee0[-1]-ee2[-1];
2009     ee0[ 0] = ee0[ 0]+ee2[ 0];
2010     ee0[-1] = ee0[-1]+ee2[-1];
2011     ee2[ 0] = (k00)*A0-(k11)*A1;
2012     ee2[-1] = (k11)*A0+(k00)*A1;
2013 
2014     k00 = ee0[-2]-ee2[-2];
2015     k11 = ee0[-3]-ee2[-3];
2016     ee0[-2] = ee0[-2]+ee2[-2];
2017     ee0[-3] = ee0[-3]+ee2[-3];
2018     ee2[-2] = (k00)*A2-(k11)*A3;
2019     ee2[-3] = (k11)*A2+(k00)*A3;
2020 
2021     k00 = ee0[-4]-ee2[-4];
2022     k11 = ee0[-5]-ee2[-5];
2023     ee0[-4] = ee0[-4]+ee2[-4];
2024     ee0[-5] = ee0[-5]+ee2[-5];
2025     ee2[-4] = (k00)*A4-(k11)*A5;
2026     ee2[-5] = (k11)*A4+(k00)*A5;
2027 
2028     k00 = ee0[-6]-ee2[-6];
2029     k11 = ee0[-7]-ee2[-7];
2030     ee0[-6] = ee0[-6]+ee2[-6];
2031     ee0[-7] = ee0[-7]+ee2[-7];
2032     ee2[-6] = (k00)*A6-(k11)*A7;
2033     ee2[-7] = (k11)*A6+(k00)*A7;
2034 
2035     ee0 -= k0;
2036     ee2 -= k0;
2037   }
2038 }
2039 
2040 // this was forceinline
2041 //void iter_54(float *z)
2042 enum iter_54(string z) = q{{
2043   auto ${__temp_prefix__}z = (${z});
2044   float ${__temp_prefix__}k00, ${__temp_prefix__}k11, ${__temp_prefix__}k22, ${__temp_prefix__}k33;
2045   float ${__temp_prefix__}y0, ${__temp_prefix__}y1, ${__temp_prefix__}y2, ${__temp_prefix__}y3;
2046 
2047   ${__temp_prefix__}k00 = ${__temp_prefix__}z[ 0]-${__temp_prefix__}z[-4];
2048   ${__temp_prefix__}y0  = ${__temp_prefix__}z[ 0]+${__temp_prefix__}z[-4];
2049   ${__temp_prefix__}y2  = ${__temp_prefix__}z[-2]+${__temp_prefix__}z[-6];
2050   ${__temp_prefix__}k22 = ${__temp_prefix__}z[-2]-${__temp_prefix__}z[-6];
2051 
2052   ${__temp_prefix__}z[-0] = ${__temp_prefix__}y0+${__temp_prefix__}y2;   // z0+z4+z2+z6
2053   ${__temp_prefix__}z[-2] = ${__temp_prefix__}y0-${__temp_prefix__}y2;   // z0+z4-z2-z6
2054 
2055   // done with ${__temp_prefix__}y0, ${__temp_prefix__}y2
2056 
2057   ${__temp_prefix__}k33 = ${__temp_prefix__}z[-3]-${__temp_prefix__}z[-7];
2058 
2059   ${__temp_prefix__}z[-4] = ${__temp_prefix__}k00+${__temp_prefix__}k33; // z0-z4+z3-z7
2060   ${__temp_prefix__}z[-6] = ${__temp_prefix__}k00-${__temp_prefix__}k33; // z0-z4-z3+z7
2061 
2062   // done with ${__temp_prefix__}k33
2063 
2064   ${__temp_prefix__}k11 = ${__temp_prefix__}z[-1]-${__temp_prefix__}z[-5];
2065   ${__temp_prefix__}y1  = ${__temp_prefix__}z[-1]+${__temp_prefix__}z[-5];
2066   ${__temp_prefix__}y3  = ${__temp_prefix__}z[-3]+${__temp_prefix__}z[-7];
2067 
2068   ${__temp_prefix__}z[-1] = ${__temp_prefix__}y1+${__temp_prefix__}y3;   // z1+z5+z3+z7
2069   ${__temp_prefix__}z[-3] = ${__temp_prefix__}y1-${__temp_prefix__}y3;   // z1+z5-z3-z7
2070   ${__temp_prefix__}z[-5] = ${__temp_prefix__}k11-${__temp_prefix__}k22; // z1-z5+z2-z6
2071   ${__temp_prefix__}z[-7] = ${__temp_prefix__}k11+${__temp_prefix__}k22; // z1-z5-z2+z6
2072 }}.cmacroFixVars!"z"(z);
2073 
2074 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2075 {
2076     int a_off = base_n >> 3;
2077     float A2 = A[0+a_off];
2078     float *z = e + i_off;
2079     float *base = z - 16 * n;
2080 
2081     while (z > base) {
2082         float k00,k11;
2083         float l00,l11;
2084 
2085         k00    = z[-0] - z[ -8];
2086         k11    = z[-1] - z[ -9];
2087         l00    = z[-2] - z[-10];
2088         l11    = z[-3] - z[-11];
2089         z[ -0] = z[-0] + z[ -8];
2090         z[ -1] = z[-1] + z[ -9];
2091         z[ -2] = z[-2] + z[-10];
2092         z[ -3] = z[-3] + z[-11];
2093         z[ -8] = k00;
2094         z[ -9] = k11;
2095         z[-10] = (l00+l11) * A2;
2096         z[-11] = (l11-l00) * A2;
2097 
2098         k00    = z[ -4] - z[-12];
2099         k11    = z[ -5] - z[-13];
2100         l00    = z[ -6] - z[-14];
2101         l11    = z[ -7] - z[-15];
2102         z[ -4] = z[ -4] + z[-12];
2103         z[ -5] = z[ -5] + z[-13];
2104         z[ -6] = z[ -6] + z[-14];
2105         z[ -7] = z[ -7] + z[-15];
2106         z[-12] = k11;
2107         z[-13] = -k00;
2108         z[-14] = (l11-l00) * A2;
2109         z[-15] = (l00+l11) * -A2;
2110 
2111         mixin(iter_54!"z");
2112         mixin(iter_54!"z-8");
2113         z -= 16;
2114     }
2115 }
2116 
2117 private void inverse_mdct (float* buffer, int n, VorbisDecoder f, int blocktype) {
2118   import core.stdc.stdlib : alloca;
2119 
2120   int n2 = n>>1, n4 = n>>2, n8 = n>>3, l;
2121   int ld;
2122   // @OPTIMIZE: reduce register pressure by using fewer variables?
2123   int save_point = temp_alloc_save(f);
2124   float *buf2;
2125   buf2 = cast(float*)mixin(temp_alloc!("n2*float.sizeof"));
2126   float *u = null, v = null;
2127   // twiddle factors
2128   float *A = f.A.ptr[blocktype];
2129 
2130   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2131   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2132 
2133   // kernel from paper
2134 
2135 
2136   // merged:
2137   //   copy and reflect spectral data
2138   //   step 0
2139 
2140   // note that it turns out that the items added together during
2141   // this step are, in fact, being added to themselves (as reflected
2142   // by step 0). inexplicable inefficiency! this became obvious
2143   // once I combined the passes.
2144 
2145   // so there's a missing 'times 2' here (for adding X to itself).
2146   // this propogates through linearly to the end, where the numbers
2147   // are 1/2 too small, and need to be compensated for.
2148 
2149   {
2150     float* d, e, AA, e_stop;
2151     d = &buf2[n2-2];
2152     AA = A;
2153     e = &buffer[0];
2154     e_stop = &buffer[n2];
2155     while (e != e_stop) {
2156       d[1] = (e[0]*AA[0]-e[2]*AA[1]);
2157       d[0] = (e[0]*AA[1]+e[2]*AA[0]);
2158       d -= 2;
2159       AA += 2;
2160       e += 4;
2161     }
2162     e = &buffer[n2-3];
2163     while (d >= buf2) {
2164       d[1] = (-e[2]*AA[0]- -e[0]*AA[1]);
2165       d[0] = (-e[2]*AA[1]+ -e[0]*AA[0]);
2166       d -= 2;
2167       AA += 2;
2168       e -= 4;
2169     }
2170   }
2171 
2172   // now we use symbolic names for these, so that we can
2173   // possibly swap their meaning as we change which operations
2174   // are in place
2175 
2176   u = buffer;
2177   v = buf2;
2178 
2179   // step 2    (paper output is w, now u)
2180   // this could be in place, but the data ends up in the wrong
2181   // place... _somebody_'s got to swap it, so this is nominated
2182   {
2183     float* AA = &A[n2-8];
2184     float* d0, d1, e0, e1;
2185     e0 = &v[n4];
2186     e1 = &v[0];
2187     d0 = &u[n4];
2188     d1 = &u[0];
2189     while (AA >= A) {
2190       float v40_20, v41_21;
2191 
2192       v41_21 = e0[1]-e1[1];
2193       v40_20 = e0[0]-e1[0];
2194       d0[1]  = e0[1]+e1[1];
2195       d0[0]  = e0[0]+e1[0];
2196       d1[1]  = v41_21*AA[4]-v40_20*AA[5];
2197       d1[0]  = v40_20*AA[4]+v41_21*AA[5];
2198 
2199       v41_21 = e0[3]-e1[3];
2200       v40_20 = e0[2]-e1[2];
2201       d0[3]  = e0[3]+e1[3];
2202       d0[2]  = e0[2]+e1[2];
2203       d1[3]  = v41_21*AA[0]-v40_20*AA[1];
2204       d1[2]  = v40_20*AA[0]+v41_21*AA[1];
2205 
2206       AA -= 8;
2207 
2208       d0 += 4;
2209       d1 += 4;
2210       e0 += 4;
2211       e1 += 4;
2212     }
2213   }
2214 
2215   // step 3
2216   ld = ilog(n)-1; // ilog is off-by-one from normal definitions
2217 
2218   // optimized step 3:
2219 
2220   // the original step3 loop can be nested r inside s or s inside r;
2221   // it's written originally as s inside r, but this is dumb when r
2222   // iterates many times, and s few. So I have two copies of it and
2223   // switch between them halfway.
2224 
2225   // this is iteration 0 of step 3
2226   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*0, -(n>>3), A);
2227   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*1, -(n>>3), A);
2228 
2229   // this is iteration 1 of step 3
2230   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*0, -(n>>4), A, 16);
2231   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*1, -(n>>4), A, 16);
2232   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*2, -(n>>4), A, 16);
2233   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*3, -(n>>4), A, 16);
2234 
2235   l = 2;
2236   for (; l < (ld-3)>>1; ++l) {
2237     int k0 = n>>(l+2), k0_2 = k0>>1;
2238     int lim = 1<<(l+1);
2239     foreach (int i; 0..lim) imdct_step3_inner_r_loop(n>>(l+4), u, n2-1-k0*i, -k0_2, A, 1<<(l+3));
2240   }
2241 
2242   for (; l < ld-6; ++l) {
2243     int k0 = n>>(l+2), k1 = 1<<(l+3), k0_2 = k0>>1;
2244     int rlim = n>>(l+6);
2245     int lim = 1<<(l+1);
2246     int i_off;
2247     float *A0 = A;
2248     i_off = n2-1;
2249     foreach (immutable _; 0..rlim) {
2250       imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2251       A0 += k1*4;
2252       i_off -= 8;
2253     }
2254   }
2255 
2256   // iterations with count:
2257   //   ld-6,-5,-4 all interleaved together
2258   //       the big win comes from getting rid of needless flops
2259   //         due to the constants on pass 5 & 4 being all 1 and 0;
2260   //       combining them to be simultaneous to improve cache made little difference
2261   imdct_step3_inner_s_loop_ld654(n>>5, u, n2-1, A, n);
2262 
2263   // output is u
2264 
2265   // step 4, 5, and 6
2266   // cannot be in-place because of step 5
2267   {
2268     ushort *bitrev = f.bit_reverse.ptr[blocktype];
2269     // weirdly, I'd have thought reading sequentially and writing
2270     // erratically would have been better than vice-versa, but in
2271     // fact that's not what my testing showed. (That is, with
2272     // j = bitreverse(i), do you read i and write j, or read j and write i.)
2273     float *d0 = &v[n4-4];
2274     float *d1 = &v[n2-4];
2275     int k4;
2276     while (d0 >= v) {
2277       k4 = bitrev[0];
2278       d1[3] = u[k4+0];
2279       d1[2] = u[k4+1];
2280       d0[3] = u[k4+2];
2281       d0[2] = u[k4+3];
2282 
2283       k4 = bitrev[1];
2284       d1[1] = u[k4+0];
2285       d1[0] = u[k4+1];
2286       d0[1] = u[k4+2];
2287       d0[0] = u[k4+3];
2288 
2289       d0 -= 4;
2290       d1 -= 4;
2291       bitrev += 2;
2292     }
2293   }
2294   // (paper output is u, now v)
2295 
2296 
2297   // data must be in buf2
2298   debug(stb_vorbis) assert(v == buf2);
2299 
2300   // step 7   (paper output is v, now v)
2301   // this is now in place
2302   {
2303     float a02, a11, b0, b1, b2, b3;
2304     float* C = f.C.ptr[blocktype];
2305     float* d, e;
2306     d = v;
2307     e = v+n2-4;
2308     while (d < e) {
2309       a02 = d[0]-e[2];
2310       a11 = d[1]+e[3];
2311 
2312       b0 = C[1]*a02+C[0]*a11;
2313       b1 = C[1]*a11-C[0]*a02;
2314 
2315       b2 = d[0]+e[ 2];
2316       b3 = d[1]-e[ 3];
2317 
2318       d[0] = b2+b0;
2319       d[1] = b3+b1;
2320       e[2] = b2-b0;
2321       e[3] = b1-b3;
2322 
2323       a02 = d[2]-e[0];
2324       a11 = d[3]+e[1];
2325 
2326       b0 = C[3]*a02+C[2]*a11;
2327       b1 = C[3]*a11-C[2]*a02;
2328 
2329       b2 = d[2]+e[ 0];
2330       b3 = d[3]-e[ 1];
2331 
2332       d[2] = b2+b0;
2333       d[3] = b3+b1;
2334       e[0] = b2-b0;
2335       e[1] = b1-b3;
2336 
2337       C += 4;
2338       d += 4;
2339       e -= 4;
2340     }
2341   }
2342 
2343   // data must be in buf2
2344 
2345 
2346   // step 8+decode   (paper output is X, now buffer)
2347   // this generates pairs of data a la 8 and pushes them directly through
2348   // the decode kernel (pushing rather than pulling) to avoid having
2349   // to make another pass later
2350 
2351   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2352   {
2353     float p0, p1, p2, p3;
2354     float* d0, d1, d2, d3;
2355     float* B = f.B.ptr[blocktype]+n2-8;
2356     float* e = buf2+n2-8;
2357     d0 = &buffer[0];
2358     d1 = &buffer[n2-4];
2359     d2 = &buffer[n2];
2360     d3 = &buffer[n-4];
2361     while (e >= v) {
2362       p3 =  e[6]*B[7]-e[7]*B[6];
2363       p2 = -e[6]*B[6]-e[7]*B[7];
2364 
2365       d0[0] =   p3;
2366       d1[3] =  -p3;
2367       d2[0] =   p2;
2368       d3[3] =   p2;
2369 
2370       p1 =  e[4]*B[5]-e[5]*B[4];
2371       p0 = -e[4]*B[4]-e[5]*B[5];
2372 
2373       d0[1] =   p1;
2374       d1[2] = - p1;
2375       d2[1] =   p0;
2376       d3[2] =   p0;
2377 
2378       p3 =  e[2]*B[3]-e[3]*B[2];
2379       p2 = -e[2]*B[2]-e[3]*B[3];
2380 
2381       d0[2] =   p3;
2382       d1[1] = - p3;
2383       d2[2] =   p2;
2384       d3[1] =   p2;
2385 
2386       p1 =  e[0]*B[1]-e[1]*B[0];
2387       p0 = -e[0]*B[0]-e[1]*B[1];
2388 
2389       d0[3] =   p1;
2390       d1[0] = - p1;
2391       d2[3] =   p0;
2392       d3[0] =   p0;
2393 
2394       B -= 8;
2395       e -= 8;
2396       d0 += 4;
2397       d2 += 4;
2398       d1 -= 4;
2399       d3 -= 4;
2400     }
2401   }
2402 
2403   temp_free(f, buf2);
2404   temp_alloc_restore(f, save_point);
2405 }
2406 
2407 private float *get_window (VorbisDecoder f, int len) {
2408   len <<= 1;
2409   if (len == f.blocksize_0) return f.window.ptr[0];
2410   if (len == f.blocksize_1) return f.window.ptr[1];
2411   assert(0);
2412 }
2413 
2414 version(STB_VORBIS_NO_DEFER_FLOOR) {
2415   alias YTYPE = int;
2416 } else {
2417   alias YTYPE = short;
2418 }
2419 
2420 private int do_floor (VorbisDecoder f, Mapping* map, int i, int n, float* target, YTYPE* finalY, ubyte* step2_flag) {
2421   int n2 = n>>1;
2422   int s = map.chan[i].mux, floor;
2423   floor = map.submap_floor.ptr[s];
2424   if (f.floor_types.ptr[floor] == 0) {
2425     return error(f, STBVorbisError.invalid_stream);
2426   } else {
2427     Floor1* g = &f.floor_config[floor].floor1;
2428     int lx = 0, ly = finalY[0]*g.floor1_multiplier;
2429     foreach (immutable q; 1..g.values) {
2430       int j = g.sorted_order.ptr[q];
2431       version(STB_VORBIS_NO_DEFER_FLOOR) {
2432         auto cond = step2_flag[j];
2433       } else {
2434         auto cond = (finalY[j] >= 0);
2435       }
2436       if (cond) {
2437         int hy = finalY[j]*g.floor1_multiplier;
2438         int hx = g.Xlist.ptr[j];
2439         if (lx != hx) { mixin(draw_line!("target", "lx", "ly", "hx", "hy", "n2")); }
2440         lx = hx; ly = hy;
2441       }
2442     }
2443     if (lx < n2) {
2444       // optimization of: draw_line(target, lx, ly, n, ly, n2);
2445       foreach (immutable j; lx..n2) { mixin(LINE_OP!("target[j]", "inverse_db_table[ly]")); }
2446     }
2447   }
2448   return true;
2449 }
2450 
2451 // The meaning of "left" and "right"
2452 //
2453 // For a given frame:
2454 //     we compute samples from 0..n
2455 //     window_center is n/2
2456 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2457 //     all of the samples from left_end to right_start can be output without mixing; however,
2458 //        this interval is 0-length except when transitioning between short and long frames
2459 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2460 //        which we don't have, so those get saved in a buffer
2461 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2462 //        has to be the same as frame N+1's left_end-left_start (which they are by
2463 //        construction)
2464 
2465 private int vorbis_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
2466   Mode *m;
2467   int i, n, prev, next, window_center;
2468   f.channel_buffer_start = f.channel_buffer_end = 0;
2469 
2470  retry:
2471   if (f.eof) return false;
2472   if (!maybe_start_packet(f)) return false;
2473   // check packet type
2474   if (get_bits!1(f) != 0) {
2475     /+if (f.push_mode) return error(f, STBVorbisError.bad_packet_type);+/
2476     while (EOP != get8_packet(f)) {}
2477     goto retry;
2478   }
2479 
2480   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2481 
2482   i = get_bits_main(f, ilog(f.mode_count-1));
2483   if (i == EOP) return false;
2484   if (i >= f.mode_count) return false;
2485   *mode = i;
2486   m = f.mode_config.ptr+i;
2487   if (m.blockflag) {
2488     n = f.blocksize_1;
2489     prev = get_bits!1(f);
2490     next = get_bits!1(f);
2491   } else {
2492     prev = next = 0;
2493     n = f.blocksize_0;
2494   }
2495 
2496   // WINDOWING
2497   window_center = n>>1;
2498   if (m.blockflag && !prev) {
2499     *p_left_start = (n-f.blocksize_0)>>2;
2500     *p_left_end   = (n+f.blocksize_0)>>2;
2501   } else {
2502     *p_left_start = 0;
2503     *p_left_end   = window_center;
2504   }
2505   if (m.blockflag && !next) {
2506     *p_right_start = (n*3-f.blocksize_0)>>2;
2507     *p_right_end   = (n*3+f.blocksize_0)>>2;
2508   } else {
2509     *p_right_start = window_center;
2510     *p_right_end   = n;
2511   }
2512   return true;
2513 }
2514 
2515 private int vorbis_decode_packet_rest (VorbisDecoder f, int* len, Mode* m, int left_start, int left_end, int right_start, int right_end, int* p_left) {
2516   import core.stdc.string : memcpy, memset;
2517 
2518   Mapping* map;
2519   int n, n2;
2520   int[256] zero_channel;
2521   int[256] really_zero_channel;
2522 
2523   // WINDOWING
2524   n = f.blocksize.ptr[m.blockflag];
2525   map = &f.mapping[m.mapping];
2526 
2527   // FLOORS
2528   n2 = n>>1;
2529 
2530   //stb_prof(1);
2531   foreach (immutable i; 0..f.vrchannels) {
2532     int s = map.chan[i].mux, floor;
2533     zero_channel[i] = false;
2534     floor = map.submap_floor.ptr[s];
2535     if (f.floor_types.ptr[floor] == 0) {
2536       return error(f, STBVorbisError.invalid_stream);
2537     } else {
2538       Floor1* g = &f.floor_config[floor].floor1;
2539       if (get_bits!1(f)) {
2540         short* finalY;
2541         ubyte[256] step2_flag = void;
2542         immutable int[4] range_list = [ 256, 128, 86, 64 ];
2543         int range = range_list[g.floor1_multiplier-1];
2544         int offset = 2;
2545         finalY = f.finalY.ptr[i];
2546         finalY[0] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2547         finalY[1] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2548         foreach (immutable j; 0..g.partitions) {
2549           int pclass = g.partition_class_list.ptr[j];
2550           int cdim = g.class_dimensions.ptr[pclass];
2551           int cbits = g.class_subclasses.ptr[pclass];
2552           int csub = (1<<cbits)-1;
2553           int cval = 0;
2554           if (cbits) {
2555             Codebook *cc = f.codebooks+g.class_masterbooks.ptr[pclass];
2556             mixin(DECODE!("cval", "cc"));
2557           }
2558           foreach (immutable k; 0..cdim) {
2559             int book = g.subclass_books.ptr[pclass].ptr[cval&csub];
2560             cval = cval>>cbits;
2561             if (book >= 0) {
2562               int temp;
2563               Codebook *cc = f.codebooks+book;
2564               mixin(DECODE!("temp", "cc"));
2565               finalY[offset++] = cast(short)temp; //k8
2566             } else {
2567               finalY[offset++] = 0;
2568             }
2569           }
2570         }
2571         if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2572         step2_flag[0] = step2_flag[1] = 1;
2573         foreach (immutable j; 2..g.values) {
2574           int low = g.neighbors.ptr[j].ptr[0];
2575           int high = g.neighbors.ptr[j].ptr[1];
2576           //neighbors(g.Xlist, j, &low, &high);
2577           int pred = void;
2578           mixin(predict_point!("pred", "g.Xlist.ptr[j]", "g.Xlist.ptr[low]", "g.Xlist.ptr[high]", "finalY[low]", "finalY[high]"));
2579           int val = finalY[j];
2580           int highroom = range-pred;
2581           int lowroom = pred;
2582           auto room = (highroom < lowroom ? highroom : lowroom)*2;
2583           if (val) {
2584             step2_flag[low] = step2_flag[high] = 1;
2585             step2_flag[j] = 1;
2586             if (val >= room) {
2587               finalY[j] = cast(short)(highroom > lowroom ? val-lowroom+pred : pred-val+highroom-1); //k8
2588             } else {
2589               finalY[j] = cast(short)(val&1 ? pred-((val+1)>>1) : pred+(val>>1)); //k8
2590             }
2591           } else {
2592             step2_flag[j] = 0;
2593             finalY[j] = cast(short)pred; //k8
2594           }
2595         }
2596 
2597         version(STB_VORBIS_NO_DEFER_FLOOR) {
2598           do_floor(f, map, i, n, f.floor_buffers.ptr[i], finalY, step2_flag);
2599         } else {
2600           // defer final floor computation until _after_ residue
2601           foreach (immutable j; 0..g.values) if (!step2_flag[j]) finalY[j] = -1;
2602         }
2603       } else {
2604   error:
2605         zero_channel[i] = true;
2606       }
2607       // So we just defer everything else to later
2608       // at this point we've decoded the floor into buffer
2609     }
2610   }
2611   //stb_prof(0);
2612   // at this point we've decoded all floors
2613 
2614   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2615 
2616   // re-enable coupled channels if necessary
2617   memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof*f.vrchannels);
2618   foreach (immutable i; 0..map.coupling_steps) {
2619     if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2620       zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = false;
2621     }
2622   }
2623 
2624   // RESIDUE DECODE
2625   foreach (immutable i; 0..map.submaps) {
2626     float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2627     ubyte[256] do_not_decode = void;
2628     int ch = 0;
2629     foreach (immutable j; 0..f.vrchannels) {
2630       if (map.chan[j].mux == i) {
2631         if (zero_channel[j]) {
2632           do_not_decode[ch] = true;
2633           residue_buffers.ptr[ch] = null;
2634         } else {
2635           do_not_decode[ch] = false;
2636           residue_buffers.ptr[ch] = f.channel_buffers.ptr[j];
2637         }
2638         ++ch;
2639       }
2640     }
2641     int r = map.submap_residue.ptr[i];
2642     decode_residue(f, residue_buffers, ch, n2, r, do_not_decode.ptr);
2643   }
2644 
2645   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2646 
2647    // INVERSE COUPLING
2648   //stb_prof(14);
2649   foreach_reverse (immutable i; 0..map.coupling_steps) {
2650     int n2n = n>>1;
2651     float* mm = f.channel_buffers.ptr[map.chan[i].magnitude];
2652     float* a = f.channel_buffers.ptr[map.chan[i].angle];
2653     foreach (immutable j; 0..n2n) {
2654       float a2, m2;
2655       if (mm[j] > 0) {
2656         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]-a[j]; } else { a2 = mm[j]; m2 = mm[j]+a[j]; }
2657       } else {
2658         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]+a[j]; } else { a2 = mm[j]; m2 = mm[j]-a[j]; }
2659       }
2660       mm[j] = m2;
2661       a[j] = a2;
2662     }
2663   }
2664 
2665   // finish decoding the floors
2666   version(STB_VORBIS_NO_DEFER_FLOOR) {
2667     foreach (immutable i; 0..f.vrchannels) {
2668       if (really_zero_channel[i]) {
2669         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2670       } else {
2671         foreach (immutable j; 0..n2) f.channel_buffers.ptr[i].ptr[j] *= f.floor_buffers.ptr[i].ptr[j];
2672       }
2673     }
2674   } else {
2675     //stb_prof(15);
2676     foreach (immutable i; 0..f.vrchannels) {
2677       if (really_zero_channel[i]) {
2678         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2679       } else {
2680         do_floor(f, map, i, n, f.channel_buffers.ptr[i], f.finalY.ptr[i], null);
2681       }
2682     }
2683   }
2684 
2685   // INVERSE MDCT
2686   //stb_prof(16);
2687   foreach (immutable i; 0..f.vrchannels) inverse_mdct(f.channel_buffers.ptr[i], n, f, m.blockflag);
2688   //stb_prof(0);
2689 
2690   // this shouldn't be necessary, unless we exited on an error
2691   // and want to flush to get to the next packet
2692   flush_packet(f);
2693 
2694   if (f.first_decode) {
2695     // assume we start so first non-discarded sample is sample 0
2696     // this isn't to spec, but spec would require us to read ahead
2697     // and decode the size of all current frames--could be done,
2698     // but presumably it's not a commonly used feature
2699     f.current_loc = -n2; // start of first frame is positioned for discard
2700     // we might have to discard samples "from" the next frame too,
2701     // if we're lapping a large block then a small at the start?
2702     f.discard_samples_deferred = n-right_end;
2703     f.current_loc_valid = true;
2704     f.first_decode = false;
2705   } else if (f.discard_samples_deferred) {
2706     if (f.discard_samples_deferred >= right_start-left_start) {
2707       f.discard_samples_deferred -= (right_start-left_start);
2708       left_start = right_start;
2709       *p_left = left_start;
2710     } else {
2711       left_start += f.discard_samples_deferred;
2712       *p_left = left_start;
2713       f.discard_samples_deferred = 0;
2714     }
2715   } else if (f.previous_length == 0 && f.current_loc_valid) {
2716     // we're recovering from a seek... that means we're going to discard
2717     // the samples from this packet even though we know our position from
2718     // the last page header, so we need to update the position based on
2719     // the discarded samples here
2720     // but wait, the code below is going to add this in itself even
2721     // on a discard, so we don't need to do it here...
2722   }
2723 
2724   // check if we have ogg information about the sample # for this packet
2725   if (f.last_seg_which == f.end_seg_with_known_loc) {
2726     // if we have a valid current loc, and this is final:
2727     if (f.current_loc_valid && (f.page_flag&PAGEFLAG_last_page)) {
2728       uint current_end = f.known_loc_for_packet-(n-right_end);
2729       // then let's infer the size of the (probably) short final frame
2730       if (current_end < f.current_loc+right_end) {
2731         if (current_end < f.current_loc+(right_end-left_start)) {
2732           // negative truncation, that's impossible!
2733           *len = 0;
2734         } else {
2735           *len = current_end-f.current_loc;
2736         }
2737         *len += left_start;
2738         if (*len > right_end) *len = right_end; // this should never happen
2739         f.current_loc += *len;
2740         return true;
2741       }
2742     }
2743     // otherwise, just set our sample loc
2744     // guess that the ogg granule pos refers to the _middle_ of the
2745     // last frame?
2746     // set f.current_loc to the position of left_start
2747     f.current_loc = f.known_loc_for_packet-(n2-left_start);
2748     f.current_loc_valid = true;
2749   }
2750   if (f.current_loc_valid) f.current_loc += (right_start-left_start);
2751 
2752   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2753 
2754   *len = right_end;  // ignore samples after the window goes to 0
2755   return true;
2756 }
2757 
2758 private int vorbis_decode_packet (VorbisDecoder f, int* len, int* p_left, int* p_right) {
2759   int mode, left_end, right_end;
2760   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2761   return vorbis_decode_packet_rest(f, len, f.mode_config.ptr+mode, *p_left, left_end, *p_right, right_end, p_left);
2762 }
2763 
2764 private int vorbis_finish_frame (VorbisDecoder f, int len, int left, int right) {
2765   // we use right&left (the start of the right- and left-window sin()-regions)
2766   // to determine how much to return, rather than inferring from the rules
2767   // (same result, clearer code); 'left' indicates where our sin() window
2768   // starts, therefore where the previous window's right edge starts, and
2769   // therefore where to start mixing from the previous buffer. 'right'
2770   // indicates where our sin() ending-window starts, therefore that's where
2771   // we start saving, and where our returned-data ends.
2772 
2773   // mixin from previous window
2774   if (f.previous_length) {
2775     int n = f.previous_length;
2776     float *w = get_window(f, n);
2777     foreach (immutable i; 0..f.vrchannels) {
2778       foreach (immutable j; 0..n) {
2779         (f.channel_buffers.ptr[i])[left+j] =
2780           (f.channel_buffers.ptr[i])[left+j]*w[    j]+
2781           (f.previous_window.ptr[i])[     j]*w[n-1-j];
2782       }
2783     }
2784   }
2785 
2786   auto prev = f.previous_length;
2787 
2788   // last half of this data becomes previous window
2789   f.previous_length = len-right;
2790 
2791   // @OPTIMIZE: could avoid this copy by double-buffering the
2792   // output (flipping previous_window with channel_buffers), but
2793   // then previous_window would have to be 2x as large, and
2794   // channel_buffers couldn't be temp mem (although they're NOT
2795   // currently temp mem, they could be (unless we want to level
2796   // performance by spreading out the computation))
2797   foreach (immutable i; 0..f.vrchannels) {
2798     for (uint j = 0; right+j < len; ++j) (f.previous_window.ptr[i])[j] = (f.channel_buffers.ptr[i])[right+j];
2799   }
2800 
2801   if (!prev) {
2802     // there was no previous packet, so this data isn't valid...
2803     // this isn't entirely true, only the would-have-overlapped data
2804     // isn't valid, but this seems to be what the spec requires
2805     return 0;
2806   }
2807 
2808   // truncate a short frame
2809   if (len < right) right = len;
2810 
2811   f.samples_output += right-left;
2812 
2813   return right-left;
2814 }
2815 
2816 private bool vorbis_pump_first_frame (VorbisDecoder f) {
2817   int len, right, left;
2818   if (vorbis_decode_packet(f, &len, &left, &right)) {
2819     vorbis_finish_frame(f, len, left, right);
2820     return true;
2821   }
2822   return false;
2823 }
2824 
2825 /+ k8: i don't need that, so it's dead
2826 private int is_whole_packet_present (VorbisDecoder f, int end_page) {
2827   import core.stdc.string : memcmp;
2828 
2829   // make sure that we have the packet available before continuing...
2830   // this requires a full ogg parse, but we know we can fetch from f.stream
2831 
2832   // instead of coding this out explicitly, we could save the current read state,
2833   // read the next packet with get8() until end-of-packet, check f.eof, then
2834   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
2835   // of state to restore (primarily the page segment table)
2836 
2837   int s = f.next_seg, first = true;
2838   ubyte *p = f.stream;
2839 
2840   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
2841     for (; s < f.segment_count; ++s) {
2842       p += f.segments[s];
2843       if (f.segments[s] < 255) break; // stop at first short segment
2844     }
2845     // either this continues, or it ends it...
2846     if (end_page && s < f.segment_count-1) return error(f, STBVorbisError.invalid_stream);
2847     if (s == f.segment_count) s = -1; // set 'crosses page' flag
2848     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2849     first = false;
2850   }
2851   while (s == -1) {
2852     ubyte* q = void;
2853     int n = void;
2854     // check that we have the page header ready
2855     if (p+26 >= f.stream_end) return error(f, STBVorbisError.need_more_data);
2856     // validate the page
2857     if (memcmp(p, ogg_page_header.ptr, 4)) return error(f, STBVorbisError.invalid_stream);
2858     if (p[4] != 0) return error(f, STBVorbisError.invalid_stream);
2859     if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
2860       if (f.previous_length && (p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2861       // if no previous length, we're resynching, so we can come in on a continued-packet,
2862       // which we'll just drop
2863     } else {
2864       if (!(p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2865     }
2866     n = p[26]; // segment counts
2867     q = p+27; // q points to segment table
2868     p = q+n; // advance past header
2869     // make sure we've read the segment table
2870     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2871     for (s = 0; s < n; ++s) {
2872       p += q[s];
2873       if (q[s] < 255) break;
2874     }
2875     if (end_page && s < n-1) return error(f, STBVorbisError.invalid_stream);
2876     if (s == n) s = -1; // set 'crosses page' flag
2877     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2878     first = false;
2879   }
2880   return true;
2881 }
2882 +/
2883 
2884 private int start_decoder (VorbisDecoder f) {
2885   import core.stdc.string : memcpy, memset;
2886 
2887   ubyte[6] header;
2888   ubyte x, y;
2889   int len, max_submaps = 0;
2890   int longest_floorlist = 0;
2891 
2892   // first page, first packet
2893 
2894   if (!start_page(f)) return false;
2895   // validate page flag
2896   if (!(f.page_flag&PAGEFLAG_first_page)) return error(f, STBVorbisError.invalid_first_page);
2897   if (f.page_flag&PAGEFLAG_last_page) return error(f, STBVorbisError.invalid_first_page);
2898   if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.invalid_first_page);
2899   // check for expected packet length
2900   if (f.segment_count != 1) return error(f, STBVorbisError.invalid_first_page);
2901   if (f.segments[0] != 30) return error(f, STBVorbisError.invalid_first_page);
2902   // read packet
2903   // check packet header
2904   if (get8(f) != VorbisPacket.id) return error(f, STBVorbisError.invalid_first_page);
2905   if (!getn(f, header.ptr, 6)) return error(f, STBVorbisError.unexpected_eof);
2906   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_first_page);
2907   // vorbis_version
2908   if (get32(f) != 0) return error(f, STBVorbisError.invalid_first_page);
2909   f.vrchannels = get8(f); if (!f.vrchannels) return error(f, STBVorbisError.invalid_first_page);
2910   if (f.vrchannels > STB_VORBIS_MAX_CHANNELS) return error(f, STBVorbisError.too_many_channels);
2911   f.sample_rate = get32(f); if (!f.sample_rate) return error(f, STBVorbisError.invalid_first_page);
2912   get32(f); // bitrate_maximum
2913   get32(f); // bitrate_nominal
2914   get32(f); // bitrate_minimum
2915   x = get8(f);
2916   {
2917     int log0 = x&15;
2918     int log1 = x>>4;
2919     f.blocksize_0 = 1<<log0;
2920     f.blocksize_1 = 1<<log1;
2921     if (log0 < 6 || log0 > 13) return error(f, STBVorbisError.invalid_setup);
2922     if (log1 < 6 || log1 > 13) return error(f, STBVorbisError.invalid_setup);
2923     if (log0 > log1) return error(f, STBVorbisError.invalid_setup);
2924   }
2925 
2926   // framing_flag
2927   x = get8(f);
2928   if (!(x&1)) return error(f, STBVorbisError.invalid_first_page);
2929 
2930   // second packet! (comments)
2931   if (!start_page(f)) return false;
2932 
2933   // read comments
2934   if (!start_packet(f)) return false;
2935 
2936   if (f.read_comments) {
2937     /+if (f.push_mode) {
2938       if (!is_whole_packet_present(f, true)) {
2939         // convert error in ogg header to write type
2940         if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2941         return false;
2942       }
2943     }+/
2944     if (get8_packet(f) != VorbisPacket.comment) return error(f, STBVorbisError.invalid_setup);
2945     foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2946     if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2947 
2948     // skip vendor id
2949     uint vidsize = get32_packet(f);
2950     //{ import core.stdc.stdio; printf("vendor size: %u\n", vidsize); }
2951     if (vidsize == EOP) return error(f, STBVorbisError.invalid_setup);
2952     while (vidsize--) get8_packet(f);
2953 
2954     // read comments section
2955     uint cmtcount = get32_packet(f);
2956     if (cmtcount == EOP) return error(f, STBVorbisError.invalid_setup);
2957     if (cmtcount > 0) {
2958       uint cmtsize = 32768; // this should be enough for everyone
2959       f.comment_data = setup_malloc!ubyte(f, cmtsize);
2960       if (f.comment_data is null) return error(f, STBVorbisError.outofmem);
2961       auto cmtpos = 0;
2962       auto d = f.comment_data;
2963       while (cmtcount--) {
2964         uint linelen = get32_packet(f);
2965         //{ import core.stdc.stdio; printf("linelen: %u; lines left: %u\n", linelen, cmtcount); }
2966         if (linelen == EOP || linelen > ushort.max-2) break;
2967         if (linelen == 0) { continue; }
2968         if (cmtpos+2+linelen > cmtsize) break;
2969         cmtpos += linelen+2;
2970         *d++ = (linelen+2)&0xff;
2971         *d++ = ((linelen+2)>>8)&0xff;
2972         while (linelen--) {
2973           auto b = get8_packet(f);
2974           if (b == EOP) return error(f, STBVorbisError.outofmem);
2975           *d++ = cast(ubyte)b;
2976         }
2977         //{ import core.stdc.stdio; printf("%u bytes of comments read\n", cmtpos); }
2978         f.comment_size = cmtpos;
2979       }
2980     }
2981     flush_packet(f);
2982     f.comment_rewind();
2983   } else {
2984     // skip comments
2985     do {
2986       len = next_segment(f);
2987       skip(f, len);
2988       f.bytes_in_seg = 0;
2989     } while (len);
2990   }
2991 
2992   // third packet!
2993   if (!start_packet(f)) return false;
2994 
2995   /+if (f.push_mode) {
2996     if (!is_whole_packet_present(f, true)) {
2997       // convert error in ogg header to write type
2998       if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2999       return false;
3000     }
3001   }+/
3002 
3003   if (get8_packet(f) != VorbisPacket.setup) return error(f, STBVorbisError.invalid_setup);
3004   foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
3005   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
3006 
3007   // codebooks
3008   f.codebook_count = get_bits!8(f)+1;
3009   f.codebooks = setup_malloc!Codebook(f, f.codebook_count);
3010   static assert((*f.codebooks).sizeof == Codebook.sizeof);
3011   if (f.codebooks is null) return error(f, STBVorbisError.outofmem);
3012   memset(f.codebooks, 0, (*f.codebooks).sizeof*f.codebook_count);
3013   foreach (immutable i; 0..f.codebook_count) {
3014     uint* values;
3015     int ordered, sorted_count;
3016     int total = 0;
3017     ubyte* lengths;
3018     Codebook* c = f.codebooks+i;
3019     x = get_bits!8(f); if (x != 0x42) return error(f, STBVorbisError.invalid_setup);
3020     x = get_bits!8(f); if (x != 0x43) return error(f, STBVorbisError.invalid_setup);
3021     x = get_bits!8(f); if (x != 0x56) return error(f, STBVorbisError.invalid_setup);
3022     x = get_bits!8(f);
3023     c.dimensions = (get_bits!8(f)<<8)+x;
3024     x = get_bits!8(f);
3025     y = get_bits!8(f);
3026     c.entries = (get_bits!8(f)<<16)+(y<<8)+x;
3027     ordered = get_bits!1(f);
3028     c.sparse = (ordered ? 0 : get_bits!1(f));
3029 
3030     if (c.dimensions == 0 && c.entries != 0) return error(f, STBVorbisError.invalid_setup);
3031 
3032     if (c.sparse) {
3033       lengths = cast(ubyte*)setup_temp_malloc(f, c.entries);
3034     } else {
3035       lengths = c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3036     }
3037 
3038     if (lengths is null) return error(f, STBVorbisError.outofmem);
3039 
3040     if (ordered) {
3041       int current_entry = 0;
3042       int current_length = get_bits_add_no!5(f, 1);
3043       while (current_entry < c.entries) {
3044         int limit = c.entries-current_entry;
3045         int n = get_bits_main(f, ilog(limit));
3046         if (current_entry+n > cast(int)c.entries) return error(f, STBVorbisError.invalid_setup);
3047         memset(lengths+current_entry, current_length, n);
3048         current_entry += n;
3049         ++current_length;
3050       }
3051     } else {
3052       foreach (immutable j; 0..c.entries) {
3053         int present = (c.sparse ? get_bits!1(f) : 1);
3054         if (present) {
3055           lengths[j] = get_bits_add_no!5(f, 1);
3056           ++total;
3057           if (lengths[j] == 32) return error(f, STBVorbisError.invalid_setup);
3058         } else {
3059           lengths[j] = NO_CODE;
3060         }
3061       }
3062     }
3063 
3064     if (c.sparse && total >= c.entries>>2) {
3065       // convert sparse items to non-sparse!
3066       if (c.entries > cast(int)f.setup_temp_memory_required) f.setup_temp_memory_required = c.entries;
3067       c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3068       if (c.codeword_lengths is null) return error(f, STBVorbisError.outofmem);
3069       memcpy(c.codeword_lengths, lengths, c.entries);
3070       setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
3071       lengths = c.codeword_lengths;
3072       c.sparse = 0;
3073     }
3074 
3075     // compute the size of the sorted tables
3076     if (c.sparse) {
3077       sorted_count = total;
3078     } else {
3079       sorted_count = 0;
3080       version(STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH) {} else {
3081         foreach (immutable j; 0..c.entries) if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) ++sorted_count;
3082       }
3083     }
3084 
3085     c.sorted_entries = sorted_count;
3086     values = null;
3087 
3088     if (!c.sparse) {
3089       c.codewords = setup_malloc!uint(f, c.entries);
3090       if (!c.codewords) return error(f, STBVorbisError.outofmem);
3091     } else {
3092       if (c.sorted_entries) {
3093         c.codeword_lengths = setup_malloc!ubyte(f, c.sorted_entries);
3094         if (!c.codeword_lengths) return error(f, STBVorbisError.outofmem);
3095         c.codewords = cast(uint*)setup_temp_malloc(f, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3096         if (!c.codewords) return error(f, STBVorbisError.outofmem);
3097         values = cast(uint*)setup_temp_malloc(f, cast(int)(*values).sizeof*c.sorted_entries);
3098         if (!values) return error(f, STBVorbisError.outofmem);
3099       }
3100       uint size = c.entries+cast(int)((*c.codewords).sizeof+(*values).sizeof)*c.sorted_entries;
3101       if (size > f.setup_temp_memory_required) f.setup_temp_memory_required = size;
3102     }
3103 
3104     if (!compute_codewords(c, lengths, c.entries, values)) {
3105       if (c.sparse) setup_temp_free(f, values, 0);
3106       return error(f, STBVorbisError.invalid_setup);
3107     }
3108 
3109     if (c.sorted_entries) {
3110       // allocate an extra slot for sentinels
3111       c.sorted_codewords = setup_malloc!uint(f, c.sorted_entries+1);
3112       if (c.sorted_codewords is null) return error(f, STBVorbisError.outofmem);
3113       // allocate an extra slot at the front so that c.sorted_values[-1] is defined
3114       // so that we can catch that case without an extra if
3115       c.sorted_values = setup_malloc!int(f, c.sorted_entries+1);
3116       if (c.sorted_values is null) return error(f, STBVorbisError.outofmem);
3117       ++c.sorted_values;
3118       c.sorted_values[-1] = -1;
3119       compute_sorted_huffman(c, lengths, values);
3120     }
3121 
3122     if (c.sparse) {
3123       setup_temp_free(f, values, cast(int)(*values).sizeof*c.sorted_entries);
3124       setup_temp_free(f, c.codewords, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3125       setup_temp_free(f, lengths, c.entries);
3126       c.codewords = null;
3127     }
3128 
3129     compute_accelerated_huffman(c);
3130 
3131     c.lookup_type = get_bits!4(f);
3132     if (c.lookup_type > 2) return error(f, STBVorbisError.invalid_setup);
3133     if (c.lookup_type > 0) {
3134       ushort* mults;
3135       c.minimum_value = float32_unpack(get_bits!32(f));
3136       c.delta_value = float32_unpack(get_bits!32(f));
3137       c.value_bits = get_bits_add_no!4(f, 1);
3138       c.sequence_p = get_bits!1(f);
3139       if (c.lookup_type == 1) {
3140         c.lookup_values = lookup1_values(c.entries, c.dimensions);
3141       } else {
3142         c.lookup_values = c.entries*c.dimensions;
3143       }
3144       if (c.lookup_values == 0) return error(f, STBVorbisError.invalid_setup);
3145       mults = cast(ushort*)setup_temp_malloc(f, cast(int)(mults[0]).sizeof*c.lookup_values);
3146       if (mults is null) return error(f, STBVorbisError.outofmem);
3147       foreach (immutable j; 0..cast(int)c.lookup_values) {
3148         int q = get_bits_main(f, c.value_bits);
3149         if (q == EOP) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.invalid_setup); }
3150         mults[j] = cast(ushort)q; //k8
3151       }
3152 
3153       version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
3154         if (c.lookup_type == 1) {
3155           int sparse = c.sparse; //len
3156           float last = 0;
3157           // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3158           if (sparse) {
3159             if (c.sorted_entries == 0) goto skip;
3160             c.multiplicands = setup_malloc!codetype(f, c.sorted_entries*c.dimensions);
3161           } else {
3162             c.multiplicands = setup_malloc!codetype(f, c.entries*c.dimensions);
3163           }
3164           if (c.multiplicands is null) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3165           foreach (immutable j; 0..(sparse ? c.sorted_entries : c.entries)) {
3166             uint z = (sparse ? c.sorted_values[j] : j);
3167             uint div = 1;
3168             foreach (immutable k; 0..c.dimensions) {
3169               int off = (z/div)%c.lookup_values;
3170               float val = mults[off];
3171               val = val*c.delta_value+c.minimum_value+last;
3172               c.multiplicands[j*c.dimensions+k] = val;
3173               if (c.sequence_p) last = val;
3174               if (k+1 < c.dimensions) {
3175                  if (div > uint.max/cast(uint)c.lookup_values) {
3176                     setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3177                     return error(f, STBVorbisError.invalid_setup);
3178                  }
3179                  div *= c.lookup_values;
3180               }
3181             }
3182           }
3183           c.lookup_type = 2;
3184           goto skip;
3185         }
3186         //else
3187       }
3188       {
3189         float last = 0;
3190         c.multiplicands = setup_malloc!codetype(f, c.lookup_values);
3191         if (c.multiplicands is null) { setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3192         foreach (immutable j; 0..cast(int)c.lookup_values) {
3193           float val = mults[j]*c.delta_value+c.minimum_value+last;
3194           c.multiplicands[j] = val;
3195           if (c.sequence_p) last = val;
3196         }
3197       }
3198      //version(STB_VORBIS_DIVIDES_IN_CODEBOOK)
3199      skip: // this is versioned out in C
3200       setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3201     }
3202   }
3203 
3204   // time domain transfers (notused)
3205   x = get_bits_add_no!6(f, 1);
3206   foreach (immutable i; 0..x) {
3207     auto z = get_bits!16(f);
3208     if (z != 0) return error(f, STBVorbisError.invalid_setup);
3209   }
3210 
3211   // Floors
3212   f.floor_count = get_bits_add_no!6(f, 1);
3213   f.floor_config = setup_malloc!Floor(f, f.floor_count);
3214   if (f.floor_config is null) return error(f, STBVorbisError.outofmem);
3215   foreach (immutable i; 0..f.floor_count) {
3216     f.floor_types[i] = get_bits!16(f);
3217     if (f.floor_types[i] > 1) return error(f, STBVorbisError.invalid_setup);
3218     if (f.floor_types[i] == 0) {
3219       Floor0* g = &f.floor_config[i].floor0;
3220       g.order = get_bits!8(f);
3221       g.rate = get_bits!16(f);
3222       g.bark_map_size = get_bits!16(f);
3223       g.amplitude_bits = get_bits!6(f);
3224       g.amplitude_offset = get_bits!8(f);
3225       g.number_of_books = get_bits_add_no!4(f, 1);
3226       foreach (immutable j; 0..g.number_of_books) g.book_list[j] = get_bits!8(f);
3227       return error(f, STBVorbisError.feature_not_supported);
3228     } else {
3229       Point[31*8+2] p;
3230       Floor1 *g = &f.floor_config[i].floor1;
3231       int max_class = -1;
3232       g.partitions = get_bits!5(f);
3233       foreach (immutable j; 0..g.partitions) {
3234         g.partition_class_list[j] = get_bits!4(f);
3235         if (g.partition_class_list[j] > max_class) max_class = g.partition_class_list[j];
3236       }
3237       foreach (immutable j; 0..max_class+1) {
3238         g.class_dimensions[j] = get_bits_add_no!3(f, 1);
3239         g.class_subclasses[j] = get_bits!2(f);
3240         if (g.class_subclasses[j]) {
3241           g.class_masterbooks[j] = get_bits!8(f);
3242           if (g.class_masterbooks[j] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3243         }
3244         foreach (immutable k; 0..1<<g.class_subclasses[j]) {
3245           g.subclass_books[j].ptr[k] = get_bits!8(f)-1;
3246           if (g.subclass_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3247         }
3248       }
3249       g.floor1_multiplier = get_bits_add_no!2(f, 1);
3250       g.rangebits = get_bits!4(f);
3251       g.Xlist[0] = 0;
3252       g.Xlist[1] = cast(ushort)(1<<g.rangebits); //k8
3253       g.values = 2;
3254       foreach (immutable j; 0..g.partitions) {
3255         int c = g.partition_class_list[j];
3256         foreach (immutable k; 0..g.class_dimensions[c]) {
3257           g.Xlist[g.values] = cast(ushort)get_bits_main(f, g.rangebits); //k8
3258           ++g.values;
3259         }
3260       }
3261       assert(g.values <= ushort.max);
3262       // precompute the sorting
3263       foreach (ushort j; 0..cast(ushort)g.values) {
3264         p[j].x = g.Xlist[j];
3265         p[j].y = j;
3266       }
3267       qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
3268       foreach (uint j; 0..g.values) g.sorted_order.ptr[j] = cast(ubyte)p.ptr[j].y;
3269       // precompute the neighbors
3270       foreach (uint j; 2..g.values) {
3271         ushort low = void, hi = void;
3272         neighbors(g.Xlist.ptr, j, &low, &hi);
3273         assert(low <= ubyte.max);
3274         assert(hi <= ubyte.max);
3275         g.neighbors[j].ptr[0] = cast(ubyte)low;
3276         g.neighbors[j].ptr[1] = cast(ubyte)hi;
3277       }
3278       if (g.values > longest_floorlist) longest_floorlist = g.values;
3279     }
3280   }
3281 
3282   // Residue
3283   f.residue_count = get_bits_add_no!6(f, 1);
3284   f.residue_config = setup_malloc!Residue(f, f.residue_count);
3285   if (f.residue_config is null) return error(f, STBVorbisError.outofmem);
3286   memset(f.residue_config, 0, f.residue_count*(f.residue_config[0]).sizeof);
3287   foreach (immutable i; 0..f.residue_count) {
3288     ubyte[64] residue_cascade;
3289     Residue* r = f.residue_config+i;
3290     f.residue_types[i] = get_bits!16(f);
3291     if (f.residue_types[i] > 2) return error(f, STBVorbisError.invalid_setup);
3292     r.begin = get_bits!24(f);
3293     r.end = get_bits!24(f);
3294     if (r.end < r.begin) return error(f, STBVorbisError.invalid_setup);
3295     r.part_size = get_bits_add_no!24(f, 1);
3296     r.classifications = get_bits_add_no!6(f, 1);
3297     r.classbook = get_bits!8(f);
3298     if (r.classbook >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3299     foreach (immutable j; 0..r.classifications) {
3300       ubyte high_bits = 0;
3301       ubyte low_bits = get_bits!3(f);
3302       if (get_bits!1(f)) high_bits = get_bits!5(f);
3303       assert(high_bits*8+low_bits <= ubyte.max);
3304       residue_cascade[j] = cast(ubyte)(high_bits*8+low_bits);
3305     }
3306     static assert(r.residue_books[0].sizeof == 16);
3307     r.residue_books = setup_malloc!(short[8])(f, r.classifications);
3308     if (r.residue_books is null) return error(f, STBVorbisError.outofmem);
3309     foreach (immutable j; 0..r.classifications) {
3310       foreach (immutable k; 0..8) {
3311         if (residue_cascade[j]&(1<<k)) {
3312           r.residue_books[j].ptr[k] = get_bits!8(f);
3313           if (r.residue_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3314         } else {
3315           r.residue_books[j].ptr[k] = -1;
3316         }
3317       }
3318     }
3319     // precompute the classifications[] array to avoid inner-loop mod/divide
3320     // call it 'classdata' since we already have r.classifications
3321     r.classdata = setup_malloc!(ubyte*)(f, f.codebooks[r.classbook].entries);
3322     if (!r.classdata) return error(f, STBVorbisError.outofmem);
3323     memset(r.classdata, 0, (*r.classdata).sizeof*f.codebooks[r.classbook].entries);
3324     foreach (immutable j; 0..f.codebooks[r.classbook].entries) {
3325       int classwords = f.codebooks[r.classbook].dimensions;
3326       int temp = j;
3327       r.classdata[j] = setup_malloc!ubyte(f, classwords);
3328       if (r.classdata[j] is null) return error(f, STBVorbisError.outofmem);
3329       foreach_reverse (immutable k; 0..classwords) {
3330         assert(temp%r.classifications >= 0 && temp%r.classifications <= ubyte.max);
3331         r.classdata[j][k] = cast(ubyte)(temp%r.classifications);
3332         temp /= r.classifications;
3333       }
3334     }
3335   }
3336 
3337   f.mapping_count = get_bits_add_no!6(f, 1);
3338   f.mapping = setup_malloc!Mapping(f, f.mapping_count);
3339   if (f.mapping is null) return error(f, STBVorbisError.outofmem);
3340   memset(f.mapping, 0, f.mapping_count*(*f.mapping).sizeof);
3341   foreach (immutable i; 0..f.mapping_count) {
3342     Mapping* m = f.mapping+i;
3343     int mapping_type = get_bits!16(f);
3344     if (mapping_type != 0) return error(f, STBVorbisError.invalid_setup);
3345     m.chan = setup_malloc!MappingChannel(f, f.vrchannels);
3346     if (m.chan is null) return error(f, STBVorbisError.outofmem);
3347     m.submaps = (get_bits!1(f) ? get_bits_add_no!4(f, 1) : 1);
3348     if (m.submaps > max_submaps) max_submaps = m.submaps;
3349     if (get_bits!1(f)) {
3350       m.coupling_steps = get_bits_add_no!8(f, 1);
3351       foreach (immutable k; 0..m.coupling_steps) {
3352         m.chan[k].magnitude = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3353         m.chan[k].angle = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3354         if (m.chan[k].magnitude >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3355         if (m.chan[k].angle     >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3356         if (m.chan[k].magnitude == m.chan[k].angle) return error(f, STBVorbisError.invalid_setup);
3357       }
3358     } else {
3359       m.coupling_steps = 0;
3360     }
3361 
3362     // reserved field
3363     if (get_bits!2(f)) return error(f, STBVorbisError.invalid_setup);
3364     if (m.submaps > 1) {
3365       foreach (immutable j; 0..f.vrchannels) {
3366         m.chan[j].mux = get_bits!4(f);
3367         if (m.chan[j].mux >= m.submaps) return error(f, STBVorbisError.invalid_setup);
3368       }
3369     } else {
3370       // @SPECIFICATION: this case is missing from the spec
3371       foreach (immutable j; 0..f.vrchannels) m.chan[j].mux = 0;
3372     }
3373     foreach (immutable j; 0..m.submaps) {
3374       get_bits!8(f); // discard
3375       m.submap_floor[j] = get_bits!8(f);
3376       m.submap_residue[j] = get_bits!8(f);
3377       if (m.submap_floor[j] >= f.floor_count) return error(f, STBVorbisError.invalid_setup);
3378       if (m.submap_residue[j] >= f.residue_count) return error(f, STBVorbisError.invalid_setup);
3379     }
3380   }
3381 
3382   // Modes
3383   f.mode_count = get_bits_add_no!6(f, 1);
3384   foreach (immutable i; 0..f.mode_count) {
3385     Mode* m = f.mode_config.ptr+i;
3386     m.blockflag = get_bits!1(f);
3387     m.windowtype = get_bits!16(f);
3388     m.transformtype = get_bits!16(f);
3389     m.mapping = get_bits!8(f);
3390     if (m.windowtype != 0) return error(f, STBVorbisError.invalid_setup);
3391     if (m.transformtype != 0) return error(f, STBVorbisError.invalid_setup);
3392     if (m.mapping >= f.mapping_count) return error(f, STBVorbisError.invalid_setup);
3393   }
3394 
3395   flush_packet(f);
3396 
3397   f.previous_length = 0;
3398 
3399   foreach (immutable i; 0..f.vrchannels) {
3400     f.channel_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1);
3401     f.previous_window.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3402     f.finalY.ptr[i]          = setup_malloc!short(f, longest_floorlist);
3403     if (f.channel_buffers.ptr[i] is null || f.previous_window.ptr[i] is null || f.finalY.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3404     version(STB_VORBIS_NO_DEFER_FLOOR) {
3405       f.floor_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3406       if (f.floor_buffers.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3407     }
3408   }
3409 
3410   if (!init_blocksize(f, 0, f.blocksize_0)) return false;
3411   if (!init_blocksize(f, 1, f.blocksize_1)) return false;
3412   f.blocksize.ptr[0] = f.blocksize_0;
3413   f.blocksize.ptr[1] = f.blocksize_1;
3414 
3415   version(STB_VORBIS_DIVIDE_TABLE) {
3416     if (integer_divide_table[1].ptr[1] == 0) {
3417       foreach (immutable i; 0..DIVTAB_NUMER) foreach (immutable j; 1..DIVTAB_DENOM) integer_divide_table[i].ptr[j] = i/j;
3418     }
3419   }
3420 
3421   // compute how much temporary memory is needed
3422 
3423   // 1.
3424   {
3425     uint imdct_mem = (f.blocksize_1*cast(uint)(float).sizeof>>1);
3426     uint classify_mem;
3427     int max_part_read = 0;
3428     foreach (immutable i; 0..f.residue_count) {
3429       Residue* r = f.residue_config+i;
3430       int n_read = r.end-r.begin;
3431       int part_read = n_read/r.part_size;
3432       if (part_read > max_part_read) max_part_read = part_read;
3433     }
3434     version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
3435       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(int*).sizeof);
3436     } else {
3437       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(ubyte*).sizeof);
3438     }
3439     f.temp_memory_required = classify_mem;
3440     if (imdct_mem > f.temp_memory_required) f.temp_memory_required = imdct_mem;
3441   }
3442 
3443   f.first_decode = true;
3444 
3445   /+
3446   if (f.alloc.alloc_buffer) {
3447     debug(stb_vorbis) assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3448     // check if there's enough temp memory so we don't error later
3449     if (f.setup_offset+ /*(*f).sizeof+*/ f.temp_memory_required > cast(uint)f.temp_offset) return error(f, STBVorbisError.outofmem);
3450   }
3451   +/
3452 
3453   f.first_audio_page_offset = f.fileOffset();
3454 
3455   return true;
3456 }
3457 
3458 /+
3459 private int vorbis_search_for_page_pushdata (VorbisDecoder f, ubyte* data, int data_len) {
3460   import core.stdc.string : memcmp;
3461 
3462   foreach (immutable i; 0..f.page_crc_tests) f.scan.ptr[i].bytes_done = 0;
3463 
3464   // if we have room for more scans, search for them first, because
3465   // they may cause us to stop early if their header is incomplete
3466   if (f.page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
3467     if (data_len < 4) return 0;
3468     data_len -= 3; // need to look for 4-byte sequence, so don't miss one that straddles a boundary
3469     foreach (immutable i; 0..data_len) {
3470       if (data[i] == 0x4f) {
3471         if (memcmp(data+i, ogg_page_header.ptr, 4) == 0) {
3472           // make sure we have the whole page header
3473           if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
3474             // only read up to this page start, so hopefully we'll
3475             // have the whole page header start next time
3476             data_len = i;
3477             break;
3478           }
3479           // ok, we have it all; compute the length of the page
3480           auto len = 27+data[i+26];
3481           foreach (immutable j; 0..data[i+26]) len += data[i+27+j];
3482           // scan everything up to the embedded crc (which we must 0)
3483           uint crc = 0;
3484           foreach (immutable j; 0..22) crc = crc32_update(crc, data[i+j]);
3485           // now process 4 0-bytes
3486           foreach (immutable j; 22..26) crc = crc32_update(crc, 0);
3487           // len is the total number of bytes we need to scan
3488           auto n = f.page_crc_tests++;
3489           f.scan.ptr[n].bytes_left = len-/*j*/26;
3490           f.scan.ptr[n].crc_so_far = crc;
3491           f.scan.ptr[n].goal_crc = data[i+22]+(data[i+23]<<8)+(data[i+24]<<16)+(data[i+25]<<24);
3492           // if the last frame on a page is continued to the next, then
3493           // we can't recover the sample_loc immediately
3494           if (data[i+27+data[i+26]-1] == 255) {
3495             f.scan.ptr[n].sample_loc = ~0;
3496           } else {
3497             f.scan.ptr[n].sample_loc = data[i+6]+(data[i+7]<<8)+(data[i+8]<<16)+(data[i+9]<<24);
3498           }
3499           f.scan.ptr[n].bytes_done = i+26/*j*/;
3500           if (f.page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) break;
3501           // keep going if we still have room for more
3502         }
3503       }
3504     }
3505   }
3506 
3507   for (uint i = 0; i < f.page_crc_tests; ) {
3508     int nn = f.scan.ptr[i].bytes_done;
3509     int m = f.scan.ptr[i].bytes_left;
3510     if (m > data_len-nn) m = data_len-nn;
3511     // m is the bytes to scan in the current chunk
3512     uint crc = f.scan.ptr[i].crc_so_far;
3513     foreach (immutable j; 0..m) crc = crc32_update(crc, data[nn+j]);
3514     f.scan.ptr[i].bytes_left -= m;
3515     f.scan.ptr[i].crc_so_far = crc;
3516     if (f.scan.ptr[i].bytes_left == 0) {
3517       // does it match?
3518       if (f.scan.ptr[i].crc_so_far == f.scan.ptr[i].goal_crc) {
3519         // Houston, we have page
3520         data_len = nn+m; // consumption amount is wherever that scan ended
3521         f.page_crc_tests = -1; // drop out of page scan mode
3522         f.previous_length = 0; // decode-but-don't-output one frame
3523         f.next_seg = -1;       // start a new page
3524         f.current_loc = f.scan.ptr[i].sample_loc; // set the current sample location to the amount we'd have decoded had we decoded this page
3525         f.current_loc_valid = f.current_loc != ~0U;
3526         return data_len;
3527       }
3528       // delete entry
3529       f.scan.ptr[i] = f.scan.ptr[--f.page_crc_tests];
3530     } else {
3531       ++i;
3532     }
3533   }
3534 
3535   return data_len;
3536 }
3537 +/
3538 
3539 private uint vorbis_find_page (VorbisDecoder f, uint* end, uint* last) {
3540   for (;;) {
3541     if (f.eof) return 0;
3542     auto n = get8(f);
3543     if (n == 0x4f) { // page header candidate
3544       uint retry_loc = f.fileOffset;
3545       // check if we're off the end of a file_section stream
3546       if (retry_loc-25 > f.stream_len) return 0;
3547       // check the rest of the header
3548       int i = void;
3549       for (i = 1; i < 4; ++i) if (get8(f) != ogg_page_header[i]) break;
3550       if (f.eof) return 0;
3551       if (i == 4) {
3552         ubyte[27] header;
3553         //for (i=0; i < 4; ++i) header[i] = ogg_page_header[i];
3554         header[0..4] = cast(immutable(ubyte)[])ogg_page_header[0..4];
3555         for (i = 4; i < 27; ++i) header[i] = get8(f);
3556         if (f.eof) return 0;
3557         if (header[4] != 0) goto invalid;
3558         uint goal = header[22]+(header[23]<<8)+(header[24]<<16)+(header[25]<<24);
3559         for (i = 22; i < 26; ++i) header[i] = 0;
3560         uint crc = 0;
3561         for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]);
3562         uint len = 0;
3563         for (i = 0; i < header[26]; ++i) {
3564           auto s = get8(f);
3565           crc = crc32_update(crc, s);
3566           len += s;
3567         }
3568         if (len && f.eof) return 0;
3569         for (i = 0; i < len; ++i) crc = crc32_update(crc, get8(f));
3570         // finished parsing probable page
3571         if (crc == goal) {
3572           // we could now check that it's either got the last
3573           // page flag set, OR it's followed by the capture
3574           // pattern, but I guess TECHNICALLY you could have
3575           // a file with garbage between each ogg page and recover
3576           // from it automatically? So even though that paranoia
3577           // might decrease the chance of an invalid decode by
3578           // another 2^32, not worth it since it would hose those
3579           // invalid-but-useful files?
3580           if (end) *end = f.fileOffset;
3581           if (last) *last = (header[5]&0x04 ? 1 : 0);
3582           set_file_offset(f, retry_loc-1);
3583           return 1;
3584         }
3585       }
3586      invalid:
3587       // not a valid page, so rewind and look for next one
3588       set_file_offset(f, retry_loc);
3589     }
3590   }
3591   assert(0);
3592 }
3593 
3594 enum SAMPLE_unknown = 0xffffffff;
3595 
3596 // seeking is implemented with a binary search, which narrows down the range to
3597 // 64K, before using a linear search (because finding the synchronization
3598 // pattern can be expensive, and the chance we'd find the end page again is
3599 // relatively high for small ranges)
3600 //
3601 // two initial interpolation-style probes are used at the start of the search
3602 // to try to bound either side of the binary search sensibly, while still
3603 // working in O(log n) time if they fail.
3604 private int get_seek_page_info (VorbisDecoder f, ProbedPage* z) {
3605   ubyte[27] header;
3606   ubyte[255] lacing;
3607 
3608   // record where the page starts
3609   z.page_start = f.fileOffset;
3610 
3611   // parse the header
3612   getn(f, header.ptr, 27);
3613   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') return 0;
3614   getn(f, lacing.ptr, header[26]);
3615 
3616   // determine the length of the payload
3617   uint len = 0;
3618   foreach (immutable i; 0..header[26]) len += lacing[i];
3619 
3620   // this implies where the page ends
3621   z.page_end = z.page_start+27+header[26]+len;
3622 
3623   // read the last-decoded sample out of the data
3624   z.last_decoded_sample = header[6]+(header[7]<<8)+(header[8]<<16)+(header[9]<<24);
3625 
3626   // restore file state to where we were
3627   set_file_offset(f, z.page_start);
3628   return 1;
3629 }
3630 
3631 // rarely used function to seek back to the preceeding page while finding the start of a packet
3632 private int go_to_page_before (VorbisDecoder f, uint limit_offset) {
3633   uint previous_safe, end;
3634 
3635   // now we want to seek back 64K from the limit
3636   if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) {
3637     previous_safe = limit_offset-65536;
3638   } else {
3639     previous_safe = f.first_audio_page_offset;
3640   }
3641 
3642   set_file_offset(f, previous_safe);
3643 
3644   while (vorbis_find_page(f, &end, null)) {
3645     if (end >= limit_offset && f.fileOffset < limit_offset) return 1;
3646     set_file_offset(f, end);
3647   }
3648 
3649   return 0;
3650 }
3651 
3652 // implements the search logic for finding a page and starting decoding. if
3653 // the function succeeds, current_loc_valid will be true and current_loc will
3654 // be less than or equal to the provided sample number (the closer the
3655 // better).
3656 private int seek_to_sample_coarse (VorbisDecoder f, uint sample_number) {
3657   ProbedPage left, right, mid;
3658   int i, start_seg_with_known_loc, end_pos, page_start;
3659   uint delta, stream_length, padding;
3660   double offset, bytes_per_sample;
3661   int probe = 0;
3662 
3663   // find the last page and validate the target sample
3664   stream_length = f.streamLengthInSamples;
3665   if (stream_length == 0) return error(f, STBVorbisError.seek_without_length);
3666   if (sample_number > stream_length) return error(f, STBVorbisError.seek_invalid);
3667 
3668   // this is the maximum difference between the window-center (which is the
3669   // actual granule position value), and the right-start (which the spec
3670   // indicates should be the granule position (give or take one)).
3671   padding = ((f.blocksize_1-f.blocksize_0)>>2);
3672   if (sample_number < padding) sample_number = 0; else sample_number -= padding;
3673 
3674   left = f.p_first;
3675   while (left.last_decoded_sample == ~0U) {
3676     // (untested) the first page does not have a 'last_decoded_sample'
3677     set_file_offset(f, left.page_end);
3678     if (!get_seek_page_info(f, &left)) goto error;
3679   }
3680 
3681   right = f.p_last;
3682   debug(stb_vorbis) assert(right.last_decoded_sample != ~0U);
3683 
3684   // starting from the start is handled differently
3685   if (sample_number <= left.last_decoded_sample) {
3686     f.seekStart;
3687     return 1;
3688   }
3689 
3690   while (left.page_end != right.page_start) {
3691     debug(stb_vorbis) assert(left.page_end < right.page_start);
3692     // search range in bytes
3693     delta = right.page_start-left.page_end;
3694     if (delta <= 65536) {
3695       // there's only 64K left to search - handle it linearly
3696       set_file_offset(f, left.page_end);
3697     } else {
3698       if (probe < 2) {
3699         if (probe == 0) {
3700           // first probe (interpolate)
3701           double data_bytes = right.page_end-left.page_start;
3702           bytes_per_sample = data_bytes/right.last_decoded_sample;
3703           offset = left.page_start+bytes_per_sample*(sample_number-left.last_decoded_sample);
3704         } else {
3705           // second probe (try to bound the other side)
3706           double error = (cast(double)sample_number-mid.last_decoded_sample)*bytes_per_sample;
3707           if (error >= 0 && error <  8000) error =  8000;
3708           if (error <  0 && error > -8000) error = -8000;
3709           offset += error*2;
3710         }
3711 
3712         // ensure the offset is valid
3713         if (offset < left.page_end) offset = left.page_end;
3714         if (offset > right.page_start-65536) offset = right.page_start-65536;
3715 
3716         set_file_offset(f, cast(uint)offset);
3717       } else {
3718         // binary search for large ranges (offset by 32K to ensure
3719         // we don't hit the right page)
3720         set_file_offset(f, left.page_end+(delta/2)-32768);
3721       }
3722 
3723       if (!vorbis_find_page(f, null, null)) goto error;
3724     }
3725 
3726     for (;;) {
3727       if (!get_seek_page_info(f, &mid)) goto error;
3728       if (mid.last_decoded_sample != ~0U) break;
3729       // (untested) no frames end on this page
3730       set_file_offset(f, mid.page_end);
3731       debug(stb_vorbis) assert(mid.page_start < right.page_start);
3732     }
3733 
3734     // if we've just found the last page again then we're in a tricky file,
3735     // and we're close enough.
3736     if (mid.page_start == right.page_start) break;
3737 
3738     if (sample_number < mid.last_decoded_sample) right = mid; else left = mid;
3739 
3740     ++probe;
3741   }
3742 
3743   // seek back to start of the last packet
3744   page_start = left.page_start;
3745   set_file_offset(f, page_start);
3746   if (!start_page(f)) return error(f, STBVorbisError.seek_failed);
3747   end_pos = f.end_seg_with_known_loc;
3748   debug(stb_vorbis) assert(end_pos >= 0);
3749 
3750   for (;;) {
3751     for (i = end_pos; i > 0; --i) if (f.segments.ptr[i-1] != 255) break;
3752     start_seg_with_known_loc = i;
3753     if (start_seg_with_known_loc > 0 || !(f.page_flag&PAGEFLAG_continued_packet)) break;
3754     // (untested) the final packet begins on an earlier page
3755     if (!go_to_page_before(f, page_start)) goto error;
3756     page_start = f.fileOffset;
3757     if (!start_page(f)) goto error;
3758     end_pos = f.segment_count-1;
3759   }
3760 
3761   // prepare to start decoding
3762   f.current_loc_valid = false;
3763   f.last_seg = false;
3764   f.valid_bits = 0;
3765   f.packet_bytes = 0;
3766   f.bytes_in_seg = 0;
3767   f.previous_length = 0;
3768   f.next_seg = start_seg_with_known_loc;
3769 
3770   for (i = 0; i < start_seg_with_known_loc; ++i) skip(f, f.segments.ptr[i]);
3771 
3772   // start decoding (optimizable - this frame is generally discarded)
3773   if (!vorbis_pump_first_frame(f)) return 0;
3774   if (f.current_loc > sample_number) return error(f, STBVorbisError.seek_failed);
3775   return 1;
3776 
3777 error:
3778   // try to restore the file to a valid state
3779   f.seekStart;
3780   return error(f, STBVorbisError.seek_failed);
3781 }
3782 
3783 // the same as vorbis_decode_initial, but without advancing
3784 private int peek_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
3785   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) return 0;
3786 
3787   // either 1 or 2 bytes were read, figure out which so we can rewind
3788   int bits_read = 1+ilog(f.mode_count-1);
3789   if (f.mode_config.ptr[*mode].blockflag) bits_read += 2;
3790   int bytes_read = (bits_read+7)/8;
3791 
3792   f.bytes_in_seg += bytes_read;
3793   f.packet_bytes -= bytes_read;
3794   skip(f, -bytes_read);
3795   if (f.next_seg == -1) f.next_seg = f.segment_count-1; else --f.next_seg;
3796   f.valid_bits = 0;
3797 
3798   return 1;
3799 }
3800 
3801 // ////////////////////////////////////////////////////////////////////////// //
3802 // utility and supporting functions for getting s16 samples
3803 enum PLAYBACK_MONO  = (1<<0);
3804 enum PLAYBACK_LEFT  = (1<<1);
3805 enum PLAYBACK_RIGHT = (1<<2);
3806 
3807 enum L = (PLAYBACK_LEFT |PLAYBACK_MONO);
3808 enum C = (PLAYBACK_LEFT |PLAYBACK_RIGHT|PLAYBACK_MONO);
3809 enum R = (PLAYBACK_RIGHT|PLAYBACK_MONO);
3810 
3811 immutable byte[6][7] channel_position = [
3812   [ 0 ],
3813   [ C ],
3814   [ L, R ],
3815   [ L, C, R ],
3816   [ L, R, L, R ],
3817   [ L, C, R, L, R ],
3818   [ L, C, R, L, R, C ],
3819 ];
3820 
3821 
3822 version(STB_VORBIS_NO_FAST_SCALED_FLOAT) {
3823   enum declfcvar(string name) = "{}";
3824   template FAST_SCALED_FLOAT_TO_INT(string x, string s) {
3825     static assert(s == "15");
3826     enum FAST_SCALED_FLOAT_TO_INT = q{import core.stdc.math : lrintf; int v = lrintf((${x})*32768.0f);}.cmacroFixVars!"x"(x);
3827   }
3828 } else {
3829   //k8: actually, this is only marginally faster than using `lrintf()`, but anyway...
3830   align(1) union float_conv {
3831   align(1):
3832     float f;
3833     int i;
3834   }
3835   enum declfcvar(string name) = "float_conv "~name~" = void;";
3836   static assert(float_conv.i.sizeof == 4 && float_conv.f.sizeof == 4);
3837   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
3838   //#define check_endianness()
3839   enum MAGIC(string SHIFT) = q{(1.5f*(1<<(23-${SHIFT}))+0.5f/(1<<${SHIFT}))}.cmacroFixVars!("SHIFT")(SHIFT);
3840   enum ADDEND(string SHIFT) = q{(((150-${SHIFT})<<23)+(1<<22))}.cmacroFixVars!("SHIFT")(SHIFT);
3841   enum FAST_SCALED_FLOAT_TO_INT(string x, string s) = q{temp.f = (${x})+${MAGIC}; int v = temp.i-${ADDEND};}
3842     .cmacroFixVars!("x", "s", "MAGIC", "ADDEND")(x, s, MAGIC!(s), ADDEND!(s));
3843 }
3844 
3845 private void copy_samples (short* dest, float* src, int len) {
3846   //check_endianness();
3847   mixin(declfcvar!"temp");
3848   foreach (immutable _; 0..len) {
3849     mixin(FAST_SCALED_FLOAT_TO_INT!("*src", "15"));
3850     if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3851     *dest++ = cast(short)v; //k8
3852     ++src;
3853   }
3854 }
3855 
3856 private void compute_samples (int mask, short* output, int num_c, float** data, int d_offset, int len) {
3857   import core.stdc.string : memset;
3858   enum BUFFER_SIZE = 32;
3859   float[BUFFER_SIZE] buffer;
3860   int n = BUFFER_SIZE;
3861   //check_endianness();
3862   mixin(declfcvar!"temp");
3863   for (uint o = 0; o < len; o += BUFFER_SIZE) {
3864     memset(buffer.ptr, 0, (buffer).sizeof);
3865     if (o+n > len) n = len-o;
3866     foreach (immutable j; 0..num_c) {
3867       if (channel_position[num_c].ptr[j]&mask) foreach (immutable i; 0..n) buffer.ptr[i] += data[j][d_offset+o+i];
3868     }
3869     foreach (immutable i; 0..n) {
3870       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3871       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3872       output[o+i] = cast(short)v; //k8
3873     }
3874   }
3875 }
3876 
3877 private void compute_stereo_samples (short* output, int num_c, float** data, int d_offset, int len) {
3878   import core.stdc.string : memset;
3879 
3880   enum BUFFER_SIZE = 32;
3881   float[BUFFER_SIZE] buffer;
3882   int n = BUFFER_SIZE>>1;
3883   // o is the offset in the source data
3884   //check_endianness();
3885   mixin(declfcvar!"temp");
3886   for (uint o = 0; o < len; o += BUFFER_SIZE>>1) {
3887     // o2 is the offset in the output data
3888     int o2 = o<<1;
3889     memset(buffer.ptr, 0, buffer.sizeof);
3890     if (o+n > len) n = len-o;
3891     foreach (immutable j; 0..num_c) {
3892       int m = channel_position[num_c].ptr[j]&(PLAYBACK_LEFT|PLAYBACK_RIGHT);
3893       if (m == (PLAYBACK_LEFT|PLAYBACK_RIGHT)) {
3894         foreach (immutable i; 0..n) {
3895           buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3896           buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3897         }
3898       } else if (m == PLAYBACK_LEFT) {
3899         foreach (immutable i; 0..n) buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3900       } else if (m == PLAYBACK_RIGHT) {
3901         foreach (immutable i; 0..n) buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3902       }
3903     }
3904     foreach (immutable i; 0..n<<1) {
3905       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3906       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3907       output[o2+i] = cast(short)v; //k8
3908     }
3909   }
3910 }
3911 
3912 private void convert_samples_short (int buf_c, short** buffer, int b_offset, int data_c, float** data, int d_offset, int samples) {
3913   import core.stdc.string : memset;
3914 
3915   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3916     immutable int[2][3] channel_selector = [ [0,0], [PLAYBACK_MONO,0], [PLAYBACK_LEFT, PLAYBACK_RIGHT] ];
3917     foreach (immutable i; 0..buf_c) compute_samples(channel_selector[buf_c].ptr[i], buffer[i]+b_offset, data_c, data, d_offset, samples);
3918   } else {
3919     int limit = (buf_c < data_c ? buf_c : data_c);
3920     foreach (immutable i; 0..limit) copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
3921     foreach (immutable i; limit..buf_c) memset(buffer[i]+b_offset, 0, short.sizeof*samples);
3922   }
3923 }
3924 
3925 private void convert_channels_short_interleaved (int buf_c, short* buffer, int data_c, float** data, int d_offset, int len) {
3926   //check_endianness();
3927   mixin(declfcvar!"temp");
3928   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3929     debug(stb_vorbis) assert(buf_c == 2);
3930     foreach (immutable i; 0..buf_c) compute_stereo_samples(buffer, data_c, data, d_offset, len);
3931   } else {
3932     int limit = (buf_c < data_c ? buf_c : data_c);
3933     foreach (immutable j; 0..len) {
3934       foreach (immutable i; 0..limit) {
3935         float f = data[i][d_offset+j];
3936         mixin(FAST_SCALED_FLOAT_TO_INT!("f", "15"));//data[i][d_offset+j], 15);
3937         if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3938         *buffer++ = cast(short)v; //k8
3939       }
3940       foreach (immutable i; limit..buf_c) *buffer++ = 0;
3941     }
3942   }
3943 }
3944 } // @nogc
3945 
3946 
3947 public class VorbisDecoder {
3948   // return # of bytes read, 0 on eof, -1 on error
3949   // if called with `buf is null`, do `close()`
3950   alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3951 
3952   //TODO
3953   static struct Allocator {
3954   static nothrow @nogc: // because
3955     void* alloc (uint sz, VorbisDecoder vb) {
3956       import core.stdc.stdlib : malloc;
3957       return malloc(sz);
3958     }
3959     void free (void* p, VorbisDecoder vb) {
3960       import core.stdc.stdlib : free;
3961       free(p);
3962     }
3963     void* allocTemp (uint sz, VorbisDecoder vb) {
3964       import core.stdc.stdlib : malloc;
3965       return malloc(sz);
3966     }
3967     void freeTemp (void* p, uint sz, VorbisDecoder vb) {
3968       import core.stdc.stdlib : free;
3969       free(p);
3970     }
3971     uint tempSave (VorbisDecoder vb) { return 0; }
3972     void tempRestore (uint pos, VorbisDecoder vb) {}
3973   }
3974 
3975 nothrow @nogc:
3976 private:
3977   bool isOpened;
3978   readCB stmread;
3979   uint stlastofs = uint.max;
3980   uint stst;
3981   uint stpos;
3982   uint stend;
3983   bool stclose;
3984   FILE* stfl;
3985 
3986 private:
3987   //ubyte* stream;
3988   //ubyte* stream_start;
3989   //ubyte* stream_end;
3990   //uint stream_len;
3991 
3992   /+bool push_mode;+/
3993 
3994   uint first_audio_page_offset;
3995 
3996   ProbedPage p_first, p_last;
3997 
3998   // memory management
3999   Allocator alloc;
4000   int setup_offset;
4001   int temp_offset;
4002 
4003   // run-time results
4004   bool eof = true;
4005   STBVorbisError error;
4006 
4007   // header info
4008   int[2] blocksize;
4009   int blocksize_0, blocksize_1;
4010   int codebook_count;
4011   Codebook* codebooks;
4012   int floor_count;
4013   ushort[64] floor_types; // varies
4014   Floor* floor_config;
4015   int residue_count;
4016   ushort[64] residue_types; // varies
4017   Residue* residue_config;
4018   int mapping_count;
4019   Mapping* mapping;
4020   int mode_count;
4021   Mode[64] mode_config;  // varies
4022 
4023   uint total_samples;
4024 
4025   // decode buffer
4026   float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
4027   float*[STB_VORBIS_MAX_CHANNELS] outputs;
4028 
4029   float*[STB_VORBIS_MAX_CHANNELS] previous_window;
4030   int previous_length;
4031 
4032   version(STB_VORBIS_NO_DEFER_FLOOR) {
4033     float*[STB_VORBIS_MAX_CHANNELS] floor_buffers;
4034   } else {
4035     short*[STB_VORBIS_MAX_CHANNELS] finalY;
4036   }
4037 
4038   uint current_loc; // sample location of next frame to decode
4039   int current_loc_valid;
4040 
4041   // per-blocksize precomputed data
4042 
4043   // twiddle factors
4044   float*[2] A, B, C;
4045   float*[2] window;
4046   ushort*[2] bit_reverse;
4047 
4048   // current page/packet/segment streaming info
4049   uint serial; // stream serial number for verification
4050   int last_page;
4051   int segment_count;
4052   ubyte[255] segments;
4053   ubyte page_flag;
4054   ubyte bytes_in_seg;
4055   ubyte first_decode;
4056   int next_seg;
4057   int last_seg;  // flag that we're on the last segment
4058   int last_seg_which; // what was the segment number of the last seg?
4059   uint acc;
4060   int valid_bits;
4061   int packet_bytes;
4062   int end_seg_with_known_loc;
4063   uint known_loc_for_packet;
4064   int discard_samples_deferred;
4065   uint samples_output;
4066 
4067   // push mode scanning
4068   /+
4069   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
4070   CRCscan[STB_VORBIS_PUSHDATA_CRC_COUNT] scan;
4071   +/
4072 
4073   // sample-access
4074   int channel_buffer_start;
4075   int channel_buffer_end;
4076 
4077 private: // k8: 'cause i'm evil
4078   // user-accessible info
4079   uint sample_rate;
4080   int vrchannels;
4081 
4082   uint setup_memory_required;
4083   uint temp_memory_required;
4084   uint setup_temp_memory_required;
4085 
4086   bool read_comments;
4087   ubyte* comment_data;
4088   uint comment_size;
4089 
4090   // functions to get comment data
4091   uint comment_data_pos;
4092 
4093 private:
4094   int rawRead (void[] buf) {
4095     static if (__VERSION__ > 2067) pragma(inline, true);
4096     if (isOpened && buf.length > 0 && stpos < stend) {
4097       if (stend-stpos < buf.length) buf = buf[0..stend-stpos];
4098       auto rd = stmread(buf, stpos, this);
4099       if (rd > 0) stpos += rd;
4100       return rd;
4101     }
4102     return 0;
4103   }
4104   void rawSkip (int n) { static if (__VERSION__ > 2067) pragma(inline, true);
4105   	if (isOpened) {
4106 		stpos += n;
4107 		if(stpos < stst)
4108 			stpos = stst;
4109 		else if(stpos > stend)
4110 			stpos = stend;
4111 	}
4112   }
4113   void rawSeek (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { stpos = stst+(n < 0 ? 0 : n); if (stpos > stend) stpos = stend; } }
4114   void rawClose () { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { isOpened = false; stmread(null, 0, this); } }
4115 
4116 final:
4117 private:
4118   void doInit () {
4119     import core.stdc.string : memset;
4120     /*
4121     if (z) {
4122       alloc = *z;
4123       alloc.alloc_buffer_length_in_bytes = (alloc.alloc_buffer_length_in_bytes+3)&~3;
4124       temp_offset = alloc.alloc_buffer_length_in_bytes;
4125     }
4126     */
4127     eof = false;
4128     error = STBVorbisError.no_error;
4129     /+stream = null;+/
4130     codebooks = null;
4131     /+page_crc_tests = -1;+/
4132   }
4133 
4134   static int stflRead (void[] buf, uint ofs, VorbisDecoder vb) {
4135     if (buf !is null) {
4136       if (vb.stlastofs != ofs) {
4137       	// { import core.stdc.stdio; printf("stflRead: ofs=%u; len=%u\n", ofs, cast(uint)buf.length); }
4138         import core.stdc.stdio : fseek, SEEK_SET;
4139         vb.stlastofs = ofs;
4140         fseek(vb.stfl, ofs, SEEK_SET);
4141       }
4142       import core.stdc.stdio : fread;
4143       auto rd = cast(int)fread(buf.ptr, 1, buf.length, vb.stfl);
4144       if(rd > 0)
4145       	vb.stlastofs += rd;
4146       return rd;
4147     } else {
4148       if (vb.stclose) {
4149         import core.stdc.stdio : fclose;
4150         if (vb.stfl !is null) fclose(vb.stfl);
4151       }
4152       vb.stfl = null;
4153       return 0;
4154     }
4155   }
4156 
4157 public:
4158   this () {}
4159   ~this () { close(); }
4160 
4161   this (int asize, readCB rcb) {
4162   	assert(rcb !is null);
4163 	stend = (asize > 0 ? asize : 0);
4164 	stmread = rcb;
4165 	isOpened = true;
4166 	eof = false;
4167 	read_comments = true;
4168 	if (start_decoder(this)) {
4169 		vorbis_pump_first_frame(this);
4170 		return;
4171 	}
4172   }
4173   this (FILE* fl, bool doclose=true) { open(fl, doclose); }
4174   this (const(char)[] filename) { open(filename); }
4175 
4176   @property bool closed () { return !isOpened; }
4177 
4178   void open (FILE *fl, bool doclose=true) {
4179     import core.stdc.stdio : ftell, fseek, SEEK_SET, SEEK_END;
4180     close();
4181     if (fl is null) { error = STBVorbisError.invalid_stream; return; }
4182     stclose = doclose;
4183     stst = stpos = cast(uint)ftell(fl);
4184     fseek(fl, 0, SEEK_END);
4185     stend = cast(uint)ftell(fl);
4186     stlastofs = stlastofs.max;
4187     stclose = false;
4188     stfl = fl;
4189     import std.functional : toDelegate;
4190     stmread = toDelegate(&stflRead);
4191     isOpened = true;
4192     eof = false;
4193     read_comments = true;
4194     if (start_decoder(this)) {
4195       vorbis_pump_first_frame(this);
4196       return;
4197     }
4198     auto err = error;
4199     close();
4200     error = err;
4201   }
4202 
4203   void open (const(char)[] filename) {
4204     import core.stdc.stdio : fopen;
4205     import std.internal.cstring; // sorry
4206     close();
4207     FILE* fl = fopen(filename.tempCString, "rb");
4208     if (fl is null) { error = STBVorbisError.file_open_failure; return; }
4209     open(fl, true);
4210   }
4211 
4212   /+
4213   void openPushdata(void* data, int data_len, // the memory available for decoding
4214                     int* data_used)           // only defined on success
4215   {
4216     close();
4217     eof = false;
4218     stream = cast(ubyte*)data;
4219     stream_end = stream+data_len;
4220     push_mode = true;
4221     if (!start_decoder(this)) {
4222       auto err = error;
4223       if (eof) err = STBVorbisError.need_more_data; else close();
4224       error = err;
4225       return;
4226     }
4227     *data_used = stream-(cast(ubyte*)data);
4228     error = STBVorbisError.no_error;
4229   }
4230   +/
4231 
4232   void close () {
4233     import core.stdc.string : memset;
4234 
4235     setup_free(this, this.comment_data);
4236     if (this.residue_config) {
4237       foreach (immutable i; 0..this.residue_count) {
4238         Residue* r = this.residue_config+i;
4239         if (r.classdata) {
4240           foreach (immutable j; 0..this.codebooks[r.classbook].entries) setup_free(this, r.classdata[j]);
4241           setup_free(this, r.classdata);
4242         }
4243         setup_free(this, r.residue_books);
4244       }
4245     }
4246 
4247     if (this.codebooks) {
4248       foreach (immutable i; 0..this.codebook_count) {
4249         Codebook* c = this.codebooks+i;
4250         setup_free(this, c.codeword_lengths);
4251         setup_free(this, c.multiplicands);
4252         setup_free(this, c.codewords);
4253         setup_free(this, c.sorted_codewords);
4254         // c.sorted_values[-1] is the first entry in the array
4255         setup_free(this, c.sorted_values ? c.sorted_values-1 : null);
4256       }
4257       setup_free(this, this.codebooks);
4258     }
4259     setup_free(this, this.floor_config);
4260     setup_free(this, this.residue_config);
4261     if (this.mapping) {
4262       foreach (immutable i; 0..this.mapping_count) setup_free(this, this.mapping[i].chan);
4263       setup_free(this, this.mapping);
4264     }
4265     foreach (immutable i; 0..(this.vrchannels > STB_VORBIS_MAX_CHANNELS ? STB_VORBIS_MAX_CHANNELS : this.vrchannels)) {
4266       setup_free(this, this.channel_buffers.ptr[i]);
4267       setup_free(this, this.previous_window.ptr[i]);
4268       version(STB_VORBIS_NO_DEFER_FLOOR) setup_free(this, this.floor_buffers.ptr[i]);
4269       setup_free(this, this.finalY.ptr[i]);
4270     }
4271     foreach (immutable i; 0..2) {
4272       setup_free(this, this.A.ptr[i]);
4273       setup_free(this, this.B.ptr[i]);
4274       setup_free(this, this.C.ptr[i]);
4275       setup_free(this, this.window.ptr[i]);
4276       setup_free(this, this.bit_reverse.ptr[i]);
4277     }
4278 
4279     rawClose();
4280     isOpened = false;
4281     stmread = null;
4282     stlastofs = uint.max;
4283     stst = 0;
4284     stpos = 0;
4285     stend = 0;
4286     stclose = false;
4287     stfl = null;
4288 
4289     sample_rate = 0;
4290     vrchannels = 0;
4291 
4292     setup_memory_required = 0;
4293     temp_memory_required = 0;
4294     setup_temp_memory_required = 0;
4295 
4296     read_comments = 0;
4297     comment_data = null;
4298     comment_size = 0;
4299 
4300     comment_data_pos = 0;
4301 
4302     /+
4303     stream = null;
4304     stream_start = null;
4305     stream_end = null;
4306     +/
4307 
4308     //stream_len = 0;
4309 
4310     /+push_mode = false;+/
4311 
4312     first_audio_page_offset = 0;
4313 
4314     p_first = p_first.init;
4315     p_last = p_last.init;
4316 
4317     setup_offset = 0;
4318     temp_offset = 0;
4319 
4320     eof = true;
4321     error = STBVorbisError.no_error;
4322 
4323     blocksize[] = 0;
4324     blocksize_0 = 0;
4325     blocksize_1 = 0;
4326     codebook_count = 0;
4327     codebooks = null;
4328     floor_count = 0;
4329     floor_types[] = 0;
4330     floor_config = null;
4331     residue_count = 0;
4332     residue_types[] = 0;
4333     residue_config = null;
4334     mapping_count = 0;
4335     mapping = null;
4336     mode_count = 0;
4337     mode_config[] = Mode.init;
4338 
4339     total_samples = 0;
4340 
4341     channel_buffers[] = null;
4342     outputs[] = null;
4343 
4344     previous_window[] = null;
4345     previous_length = 0;
4346 
4347     version(STB_VORBIS_NO_DEFER_FLOOR) {
4348       floor_buffers[] = null;
4349     } else {
4350       finalY[] = null;
4351     }
4352 
4353     current_loc = 0;
4354     current_loc_valid = 0;
4355 
4356     A[] = null;
4357     B[] = null;
4358     C[] = null;
4359     window[] = null;
4360     bit_reverse = null;
4361 
4362     serial = 0;
4363     last_page = 0;
4364     segment_count = 0;
4365     segments[] = 0;
4366     page_flag = 0;
4367     bytes_in_seg = 0;
4368     first_decode = 0;
4369     next_seg = 0;
4370     last_seg = 0;
4371     last_seg_which = 0;
4372     acc = 0;
4373     valid_bits = 0;
4374     packet_bytes = 0;
4375     end_seg_with_known_loc = 0;
4376     known_loc_for_packet = 0;
4377     discard_samples_deferred = 0;
4378     samples_output = 0;
4379 
4380     /+
4381     page_crc_tests = -1;
4382     scan[] = CRCscan.init;
4383     +/
4384 
4385     channel_buffer_start = 0;
4386     channel_buffer_end = 0;
4387   }
4388 
4389   @property const pure {
4390     int getSampleOffset () { return (current_loc_valid ? current_loc : -1); }
4391 
4392     @property ubyte chans () { return (isOpened ? cast(ubyte)this.vrchannels : 0); }
4393     @property uint sampleRate () { return (isOpened ? this.sample_rate : 0); }
4394     @property uint maxFrameSize () { return (isOpened ? this.blocksize_1>>1 : 0); }
4395 
4396     @property uint getSetupMemoryRequired () { return (isOpened ? this.setup_memory_required : 0); }
4397     @property uint getSetupTempMemoryRequired () { return (isOpened ? this.setup_temp_memory_required : 0); }
4398     @property uint getTempMemoryRequired () { return (isOpened ? this.temp_memory_required : 0); }
4399   }
4400 
4401   // will clear last error
4402   @property int lastError () {
4403     int e = error;
4404     error = STBVorbisError.no_error;
4405     return e;
4406   }
4407 
4408   // PUSHDATA API
4409   /+
4410   void flushPushdata () {
4411     if (push_mode) {
4412       previous_length = 0;
4413       page_crc_tests = 0;
4414       discard_samples_deferred = 0;
4415       current_loc_valid = false;
4416       first_decode = false;
4417       samples_output = 0;
4418       channel_buffer_start = 0;
4419       channel_buffer_end = 0;
4420     }
4421   }
4422 
4423   // return value: number of bytes we used
4424   int decodeFramePushdata(
4425            void* data, int data_len, // the memory available for decoding
4426            int* channels,            // place to write number of float* buffers
4427            float*** output,          // place to write float** array of float* buffers
4428            int* samples              // place to write number of output samples
4429        )
4430   {
4431     if (!this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);
4432 
4433     if (this.page_crc_tests >= 0) {
4434       *samples = 0;
4435       return vorbis_search_for_page_pushdata(this, cast(ubyte*)data, data_len);
4436     }
4437 
4438     this.stream = cast(ubyte*)data;
4439     this.stream_end = this.stream+data_len;
4440     this.error = STBVorbisError.no_error;
4441 
4442     // check that we have the entire packet in memory
4443     if (!is_whole_packet_present(this, false)) {
4444       *samples = 0;
4445       return 0;
4446     }
4447 
4448     int len, left, right;
4449 
4450     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4451       // save the actual error we encountered
4452       STBVorbisError error = this.error;
4453       if (error == STBVorbisError.bad_packet_type) {
4454         // flush and resynch
4455         this.error = STBVorbisError.no_error;
4456         while (get8_packet(this) != EOP) if (this.eof) break;
4457         *samples = 0;
4458         return this.stream-data;
4459       }
4460       if (error == STBVorbisError.continued_packet_flag_invalid) {
4461         if (this.previous_length == 0) {
4462           // we may be resynching, in which case it's ok to hit one
4463           // of these; just discard the packet
4464           this.error = STBVorbisError.no_error;
4465           while (get8_packet(this) != EOP) if (this.eof) break;
4466           *samples = 0;
4467           return this.stream-data;
4468         }
4469       }
4470       // if we get an error while parsing, what to do?
4471       // well, it DEFINITELY won't work to continue from where we are!
4472       flushPushdata();
4473       // restore the error that actually made us bail
4474       this.error = error;
4475       *samples = 0;
4476       return 1;
4477     }
4478 
4479     // success!
4480     len = vorbis_finish_frame(this, len, left, right);
4481     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4482 
4483     if (channels) *channels = this.vrchannels;
4484     *samples = len;
4485     *output = this.outputs.ptr;
4486     return this.stream-data;
4487   }
4488   +/
4489 
4490   public uint fileOffset () {
4491     if (/+push_mode ||+/ !isOpened) return 0;
4492     /+if (stream !is null) return cast(uint)(stream-stream_start);+/
4493     return (stpos > stst ? stpos-stst : 0);
4494   }
4495 
4496   public uint stream_len () { return stend-stst; }
4497 
4498   // DATA-PULLING API
4499   public int seekFrame (uint sample_number) {
4500     uint max_frame_samples;
4501 
4502     /+if (this.push_mode) return -.error(this, STBVorbisError.invalid_api_mixing);+/
4503 
4504     // fast page-level search
4505     if (!seek_to_sample_coarse(this, sample_number)) return 0;
4506 
4507     assert(this.current_loc_valid);
4508     assert(this.current_loc <= sample_number);
4509 
4510     import std.stdio;
4511 
4512     // linear search for the relevant packet
4513     max_frame_samples = (this.blocksize_1*3-this.blocksize_0)>>2;
4514     while (this.current_loc < sample_number) {
4515       int left_start, left_end, right_start, right_end, mode, frame_samples;
4516       if (!peek_decode_initial(this, &left_start, &left_end, &right_start, &right_end, &mode)) return .error(this, STBVorbisError.seek_failed);
4517       // calculate the number of samples returned by the next frame
4518       frame_samples = right_start-left_start;
4519       if (this.current_loc+frame_samples > sample_number) {
4520         return 1; // the next frame will contain the sample
4521       } else if (this.current_loc+frame_samples+max_frame_samples > sample_number) {
4522         // there's a chance the frame after this could contain the sample
4523         vorbis_pump_first_frame(this);
4524       } else {
4525         // this frame is too early to be relevant
4526         this.current_loc += frame_samples;
4527         this.previous_length = 0;
4528         maybe_start_packet(this);
4529         flush_packet(this);
4530       }
4531     }
4532     // the next frame will start with the sample
4533     assert(this.current_loc == sample_number);
4534 
4535     return 1;
4536   }
4537 
4538   public int seek (uint sample_number) {
4539     if (!seekFrame(sample_number)) return 0;
4540     if (sample_number != this.current_loc) {
4541       int n;
4542       uint frame_start = this.current_loc;
4543       getFrameFloat(&n, null);
4544       assert(sample_number > frame_start);
4545       assert(this.channel_buffer_start+cast(int)(sample_number-frame_start) <= this.channel_buffer_end);
4546       this.channel_buffer_start += (sample_number-frame_start);
4547     }
4548     return 1;
4549   }
4550 
4551   public bool seekStart () {
4552     /+if (push_mode) { .error(this, STBVorbisError.invalid_api_mixing); return; }+/
4553     set_file_offset(this, first_audio_page_offset);
4554     previous_length = 0;
4555     first_decode = true;
4556     next_seg = -1;
4557     return vorbis_pump_first_frame(this);
4558   }
4559 
4560   public uint streamLengthInSamples () {
4561     uint restore_offset, previous_safe;
4562     uint end, last_page_loc;
4563 
4564     /+if (this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4565     if (!this.total_samples) {
4566       uint last;
4567       uint lo, hi;
4568       char[6] header;
4569 
4570       // first, store the current decode position so we can restore it
4571       restore_offset = fileOffset;
4572 
4573       // now we want to seek back 64K from the end (the last page must
4574       // be at most a little less than 64K, but let's allow a little slop)
4575       if (this.stream_len >= 65536 && this.stream_len-65536 >= this.first_audio_page_offset) {
4576         previous_safe = this.stream_len-65536;
4577       } else {
4578         previous_safe = this.first_audio_page_offset;
4579       }
4580 
4581       set_file_offset(this, previous_safe);
4582       // previous_safe is now our candidate 'earliest known place that seeking
4583       // to will lead to the final page'
4584 
4585       if (!vorbis_find_page(this, &end, &last)) {
4586         // if we can't find a page, we're hosed!
4587         this.error = STBVorbisError.cant_find_last_page;
4588         this.total_samples = 0xffffffff;
4589         goto done;
4590       }
4591 
4592       // check if there are more pages
4593       last_page_loc = fileOffset;
4594 
4595       // stop when the last_page flag is set, not when we reach eof;
4596       // this allows us to stop short of a 'file_section' end without
4597       // explicitly checking the length of the section
4598       while (!last) {
4599         set_file_offset(this, end);
4600         if (!vorbis_find_page(this, &end, &last)) {
4601           // the last page we found didn't have the 'last page' flag set. whoops!
4602           break;
4603         }
4604         previous_safe = last_page_loc+1;
4605         last_page_loc = fileOffset;
4606       }
4607 
4608       set_file_offset(this, last_page_loc);
4609 
4610       // parse the header
4611       getn(this, cast(ubyte*)header, 6);
4612       // extract the absolute granule position
4613       lo = get32(this);
4614       hi = get32(this);
4615       if (lo == 0xffffffff && hi == 0xffffffff) {
4616         this.error = STBVorbisError.cant_find_last_page;
4617         this.total_samples = SAMPLE_unknown;
4618         goto done;
4619       }
4620       if (hi) lo = 0xfffffffe; // saturate
4621       this.total_samples = lo;
4622 
4623       this.p_last.page_start = last_page_loc;
4624       this.p_last.page_end = end;
4625       this.p_last.last_decoded_sample = lo;
4626 
4627      done:
4628       set_file_offset(this, restore_offset);
4629     }
4630     return (this.total_samples == SAMPLE_unknown ? 0 : this.total_samples);
4631   }
4632 
4633   public float streamLengthInSeconds () {
4634     return (isOpened ? streamLengthInSamples()/cast(float)sample_rate : 0.0f);
4635   }
4636 
4637   public int getFrameFloat (int* channels, float*** output) {
4638     int len, right, left;
4639     /+if (push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4640 
4641     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4642       channel_buffer_start = channel_buffer_end = 0;
4643       return 0;
4644     }
4645 
4646     len = vorbis_finish_frame(this, len, left, right);
4647     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4648 
4649     channel_buffer_start = left;
4650     channel_buffer_end = left+len;
4651 
4652     if (channels) *channels = this.vrchannels;
4653     if (output) *output = this.outputs.ptr;
4654     return len;
4655   }
4656 
4657   /+
4658   public VorbisDecoder stb_vorbis_open_memory (const(void)* data, int len, int* error=null, stb_vorbis_alloc* alloc=null) {
4659     VorbisDecoder this;
4660     stb_vorbis_ctx p = void;
4661     if (data is null) return null;
4662     vorbis_init(&p, alloc);
4663     p.stream = cast(ubyte*)data;
4664     p.stream_end = cast(ubyte*)data+len;
4665     p.stream_start = cast(ubyte*)p.stream;
4666     p.stream_len = len;
4667     p.push_mode = false;
4668     if (start_decoder(&p)) {
4669       this = vorbis_alloc(&p);
4670       if (this) {
4671         *this = p;
4672         vorbis_pump_first_frame(this);
4673         return this;
4674       }
4675     }
4676     if (error) *error = p.error;
4677     vorbis_deinit(&p);
4678     return null;
4679   }
4680   +/
4681 
4682   // s16 samples API
4683   int getFrameShort (int num_c, short** buffer, int num_samples) {
4684     float** output;
4685     int len = getFrameFloat(null, &output);
4686     if (len > num_samples) len = num_samples;
4687     if (len) convert_samples_short(num_c, buffer, 0, vrchannels, output, 0, len);
4688     return len;
4689   }
4690 
4691   int getFrameShortInterleaved (int num_c, short* buffer, int num_shorts) {
4692     float** output;
4693     int len;
4694     if (num_c == 1) return getFrameShort(num_c, &buffer, num_shorts);
4695     len = getFrameFloat(null, &output);
4696     if (len) {
4697       if (len*num_c > num_shorts) len = num_shorts/num_c;
4698       convert_channels_short_interleaved(num_c, buffer, vrchannels, output, 0, len);
4699     }
4700     return len;
4701   }
4702 
4703   int getSamplesShortInterleaved (int channels, short* buffer, int num_shorts) {
4704     float** outputs;
4705     int len = num_shorts/channels;
4706     int n = 0;
4707     int z = this.vrchannels;
4708     if (z > channels) z = channels;
4709     while (n < len) {
4710       int k = channel_buffer_end-channel_buffer_start;
4711       if (n+k >= len) k = len-n;
4712       if (k) convert_channels_short_interleaved(channels, buffer, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4713       buffer += k*channels;
4714       n += k;
4715       channel_buffer_start += k;
4716       if (n == len) break;
4717       if (!getFrameFloat(null, &outputs)) break;
4718     }
4719     return n;
4720   }
4721 
4722   int getSamplesShort (int channels, short** buffer, int len) {
4723     float** outputs;
4724     int n = 0;
4725     int z = this.vrchannels;
4726     if (z > channels) z = channels;
4727     while (n < len) {
4728       int k = channel_buffer_end-channel_buffer_start;
4729       if (n+k >= len) k = len-n;
4730       if (k) convert_samples_short(channels, buffer, n, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4731       n += k;
4732       channel_buffer_start += k;
4733       if (n == len) break;
4734       if (!getFrameFloat(null, &outputs)) break;
4735     }
4736     return n;
4737   }
4738 
4739   /+
4740   public int stb_vorbis_decode_filename (string filename, int* channels, int* sample_rate, short** output) {
4741     import core.stdc.stdlib : malloc, realloc;
4742 
4743     int data_len, offset, total, limit, error;
4744     short* data;
4745     VorbisDecoder v = stb_vorbis_open_filename(filename, &error, null);
4746     if (v is null) return -1;
4747     limit = v.vrchannels*4096;
4748     *channels = v.vrchannels;
4749     if (sample_rate) *sample_rate = v.sample_rate;
4750     offset = data_len = 0;
4751     total = limit;
4752     data = cast(short*)malloc(total*(*data).sizeof);
4753     if (data is null) {
4754       stb_vorbis_close(v);
4755       return -2;
4756     }
4757     for (;;) {
4758       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4759       if (n == 0) break;
4760       data_len += n;
4761       offset += n*v.vrchannels;
4762       if (offset+limit > total) {
4763         short *data2;
4764         total *= 2;
4765         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4766         if (data2 is null) {
4767           import core.stdc.stdlib : free;
4768           free(data);
4769           stb_vorbis_close(v);
4770           return -2;
4771         }
4772         data = data2;
4773       }
4774     }
4775     *output = data;
4776     stb_vorbis_close(v);
4777     return data_len;
4778   }
4779 
4780   public int stb_vorbis_decode_memory (const(void)* mem, int len, int* channels, int* sample_rate, short** output) {
4781     import core.stdc.stdlib : malloc, realloc;
4782 
4783     int data_len, offset, total, limit, error;
4784     short* data;
4785     VorbisDecoder v = stb_vorbis_open_memory(mem, len, &error, null);
4786     if (v is null) return -1;
4787     limit = v.vrchannels*4096;
4788     *channels = v.vrchannels;
4789     if (sample_rate) *sample_rate = v.sample_rate;
4790     offset = data_len = 0;
4791     total = limit;
4792     data = cast(short*)malloc(total*(*data).sizeof);
4793     if (data is null) {
4794       stb_vorbis_close(v);
4795       return -2;
4796     }
4797     for (;;) {
4798       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4799       if (n == 0) break;
4800       data_len += n;
4801       offset += n*v.vrchannels;
4802       if (offset+limit > total) {
4803         short *data2;
4804         total *= 2;
4805         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4806         if (data2 is null) {
4807           import core.stdc.stdlib : free;
4808           free(data);
4809           stb_vorbis_close(v);
4810           return -2;
4811         }
4812         data = data2;
4813       }
4814     }
4815     *output = data;
4816     stb_vorbis_close(v);
4817     return data_len;
4818   }
4819 
4820   public int stb_vorbis_get_samples_float_interleaved (VorbisDecoder this, int channels, float* buffer, int num_floats) {
4821     float** outputs;
4822     int len = num_floats/channels;
4823     int n = 0;
4824     int z = this.vrchannels;
4825     if (z > channels) z = channels;
4826     while (n < len) {
4827       int k = this.channel_buffer_end-this.channel_buffer_start;
4828       if (n+k >= len) k = len-n;
4829       foreach (immutable j; 0..k) {
4830         foreach (immutable i; 0..z) *buffer++ = (this.channel_buffers.ptr[i])[this.channel_buffer_start+j];
4831         foreach (immutable i; z..channels) *buffer++ = 0;
4832       }
4833       n += k;
4834       this.channel_buffer_start += k;
4835       if (n == len) break;
4836       if (!stb_vorbis_get_frame_float(this, null, &outputs)) break;
4837     }
4838     return n;
4839   }
4840   +/
4841 
4842   public int getSamplesFloat (int achans, float** buffer, int num_samples) {
4843     import core.stdc.string : memcpy, memset;
4844     float** outputs;
4845     int n = 0;
4846     int z = vrchannels;
4847     if (z > achans) z = achans;
4848     while (n < num_samples) {
4849       int k = channel_buffer_end-channel_buffer_start;
4850       if (n+k >= num_samples) k = num_samples-n;
4851       if (k) {
4852         foreach (immutable i; 0..z) memcpy(buffer[i]+n, channel_buffers.ptr[i]+channel_buffer_start, float.sizeof*k);
4853         foreach (immutable i; z..achans) memset(buffer[i]+n, 0, float.sizeof*k);
4854       }
4855       n += k;
4856       channel_buffer_start += k;
4857       if (n == num_samples) break;
4858       if (!getFrameFloat(null, &outputs)) break;
4859     }
4860     return n;
4861   }
4862 
4863 private: // k8: 'cause i'm evil
4864   private enum cmt_len_size = 2;
4865   nothrow /*@trusted*/ @nogc {
4866     public @property bool comment_empty () const pure { return (comment_get_line_len == 0); }
4867 
4868     // 0: error
4869     // includes length itself
4870     private uint comment_get_line_len () const pure {
4871       if (comment_data_pos >= comment_size) return 0;
4872       if (comment_size-comment_data_pos < cmt_len_size) return 0;
4873       uint len = comment_data[comment_data_pos];
4874       len += cast(uint)comment_data[comment_data_pos+1]<<8;
4875       return (len >= cmt_len_size && comment_data_pos+len <= comment_size ? len : 0);
4876     }
4877 
4878     public bool comment_rewind () {
4879       comment_data_pos = 0;
4880       for (;;) {
4881         auto len = comment_get_line_len();
4882         if (!len) { comment_data_pos = comment_size; return false; }
4883         if (len != cmt_len_size) return true;
4884         comment_data_pos += len;
4885       }
4886     }
4887 
4888     // true: has something to read after skip
4889     public bool comment_skip () {
4890       comment_data_pos += comment_get_line_len();
4891       for (;;) {
4892         auto len = comment_get_line_len();
4893         if (!len) { comment_data_pos = comment_size; return false; }
4894         if (len != cmt_len_size) break;
4895         comment_data_pos += len;
4896       }
4897       return true;
4898     }
4899 
4900     public const(char)[] comment_line () {
4901       auto len = comment_get_line_len();
4902       if (len < cmt_len_size) return null;
4903       if (len == cmt_len_size) return "";
4904       return (cast(char*)comment_data+comment_data_pos+cmt_len_size)[0..len-cmt_len_size];
4905     }
4906 
4907     public const(char)[] comment_name () {
4908       auto line = comment_line();
4909       if (line.length == 0) return line;
4910       uint epos = 0;
4911       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4912       return (epos < line.length ? line[0..epos] : "");
4913     }
4914 
4915     public const(char)[] comment_value () {
4916       auto line = comment_line();
4917       if (line.length == 0) return line;
4918       uint epos = 0;
4919       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4920       return (epos < line.length ? line[epos+1..$] : line);
4921     }
4922   }
4923 }
4924 
4925 
4926 // ////////////////////////////////////////////////////////////////////////// //
4927 private:
4928 // cool helper to translate C defines
4929 template cmacroFixVars(T...) {
4930   /**
4931    * 64-bit implementation of fasthash
4932    *
4933    * Params:
4934    *   buf =  data buffer
4935    *   seed = the seed
4936    *
4937    * Returns:
4938    *   32-bit or 64-bit hash
4939    */
4940   size_t hashOf (const(void)* buf, size_t len, size_t seed=0) pure nothrow @trusted @nogc {
4941     enum Get8Bytes = q{
4942       cast(ulong)data[0]|
4943       (cast(ulong)data[1]<<8)|
4944       (cast(ulong)data[2]<<16)|
4945       (cast(ulong)data[3]<<24)|
4946       (cast(ulong)data[4]<<32)|
4947       (cast(ulong)data[5]<<40)|
4948       (cast(ulong)data[6]<<48)|
4949       (cast(ulong)data[7]<<56)
4950     };
4951     enum m = 0x880355f21e6d1965UL;
4952     auto data = cast(const(ubyte)*)buf;
4953     ulong h = seed;
4954     ulong t;
4955     foreach (immutable _; 0..len/8) {
4956       version(HasUnalignedOps) {
4957         if (__ctfe) {
4958           t = mixin(Get8Bytes);
4959         } else {
4960           t = *cast(ulong*)data;
4961         }
4962       } else {
4963         t = mixin(Get8Bytes);
4964       }
4965       data += 8;
4966       t ^= t>>23;
4967       t *= 0x2127599bf4325c37UL;
4968       t ^= t>>47;
4969       h ^= t;
4970       h *= m;
4971     }
4972 
4973     h ^= len*m;
4974     t = 0;
4975     switch (len&7) {
4976       case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
4977       case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
4978       case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
4979       case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
4980       case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
4981       case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
4982       case 1: t ^= cast(ulong)data[0]; goto default;
4983       default:
4984         t ^= t>>23;
4985         t *= 0x2127599bf4325c37UL;
4986         t ^= t>>47;
4987         h ^= t;
4988         h *= m;
4989         break;
4990     }
4991 
4992     h ^= h>>23;
4993     h *= 0x2127599bf4325c37UL;
4994     h ^= h>>47;
4995     static if (size_t.sizeof == 4) {
4996       // 32-bit hash
4997       // the following trick converts the 64-bit hashcode to Fermat
4998       // residue, which shall retain information from both the higher
4999       // and lower parts of hashcode.
5000       return cast(size_t)(h-(h>>32));
5001     } else {
5002       return h;
5003     }
5004   }
5005 
5006   string cmacroFixVars (string s, string[] names...) {
5007     assert(T.length == names.length, "cmacroFixVars: names and arguments count mismatch");
5008     enum tmpPfxName = "__temp_prefix__";
5009     string res;
5010     string tmppfx;
5011     uint pos = 0;
5012     // skip empty lines (for pretty printing)
5013     // trim trailing spaces
5014     while (s.length > 0 && s[$-1] <= ' ') s = s[0..$-1];
5015     uint linestpos = 0; // start of the current line
5016     while (pos < s.length) {
5017       if (s[pos] > ' ') break;
5018       if (s[pos] == '\n') linestpos = pos+1;
5019       ++pos;
5020     }
5021     pos = linestpos;
5022     while (pos+2 < s.length) {
5023       int epos = pos;
5024       while (epos+2 < s.length && (s[epos] != '$' || s[epos+1] != '{')) ++epos;
5025       if (epos > pos) {
5026         if (s.length-epos < 3) break;
5027         res ~= s[pos..epos];
5028         pos = epos;
5029       }
5030       assert(s[pos] == '$' && s[pos+1] == '{');
5031       pos += 2;
5032       bool found = false;
5033       if (s.length-pos >= tmpPfxName.length+1 && s[pos+tmpPfxName.length] == '}' && s[pos..pos+tmpPfxName.length] == tmpPfxName) {
5034         if (tmppfx.length == 0) {
5035           // generate temporary prefix
5036           auto hash = hashOf(s.ptr, s.length);
5037           immutable char[16] hexChars = "0123456789abcdef";
5038           tmppfx = "_temp_macro_var_";
5039           foreach_reverse (immutable idx; 0..size_t.sizeof*2) {
5040             tmppfx ~= hexChars[hash&0x0f];
5041             hash >>= 4;
5042           }
5043           tmppfx ~= "_";
5044         }
5045         pos += tmpPfxName.length+1;
5046         res ~= tmppfx;
5047         found = true;
5048       } else {
5049         foreach (immutable nidx, string oname; T) {
5050           static assert(oname.length > 0);
5051           if (s.length-pos >= oname.length+1 && s[pos+oname.length] == '}' && s[pos..pos+oname.length] == oname) {
5052             found = true;
5053             pos += oname.length+1;
5054             res ~= names[nidx];
5055             break;
5056           }
5057         }
5058       }
5059       assert(found, "unknown variable in macro");
5060     }
5061     if (pos < s.length) res ~= s[pos..$];
5062     return res;
5063   }
5064 }
5065 
5066 // ////////////////////////////////////////////////////////////////////////// //
5067 /* Version history
5068     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5069     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5070                            avoid discarding last frame of audio data
5071     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5072                            some more crash fixes when out of memory or with corrupt files
5073     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5074                            some crash fixes when out of memory or with corrupt files
5075     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5076     1.04    - 2014/08/27 - fix missing const-correct case in API
5077     1.03    - 2014/08/07 - Warning fixes
5078     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5079     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5080     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5081                            (API change) report sample rate for decode-full-file funcs
5082     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5083     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5084     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5085     0.99993 - remove assert that fired on legal files with empty tables
5086     0.99992 - rewind-to-start
5087     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5088     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5089     0.9998 - add a full-decode function with a memory source
5090     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5091     0.9996 - query length of vorbis stream in samples/seconds
5092     0.9995 - bugfix to another optimization that only happened in certain files
5093     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5094     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5095     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5096     0.9991 - performance improvement of IMDCT
5097     0.999 - (should have been 0.9990) performance improvement of IMDCT
5098     0.998 - no-CRT support from Casey Muratori
5099     0.997 - bugfixes for bugs found by Terje Mathisen
5100     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5101     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5102     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5103     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5104     0.992 - fixes for MinGW warning
5105     0.991 - turn fast-float-conversion on by default
5106     0.990 - fix push-mode seek recovery if you seek into the headers
5107     0.98b - fix to bad release of 0.98
5108     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5109     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5110     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5111     0.95 - clamping code for 16-bit functions
5112     0.94 - not publically released
5113     0.93 - fixed all-zero-floor case (was decoding garbage)
5114     0.92 - fixed a memory leak
5115     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5116     0.90 - first public release
5117 */
5118 
5119 /*
5120 ------------------------------------------------------------------------------
5121 This software is available under 2 licenses -- choose whichever you prefer.
5122 ------------------------------------------------------------------------------
5123 ALTERNATIVE A - MIT License
5124 Copyright (c) 2017 Sean Barrett
5125 Permission is hereby granted, free of charge, to any person obtaining a copy of
5126 this software and associated documentation files (the "Software"), to deal in
5127 the Software without restriction, including without limitation the rights to
5128 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5129 of the Software, and to permit persons to whom the Software is furnished to do
5130 so, subject to the following conditions:
5131 The above copyright notice and this permission notice shall be included in all
5132 copies or substantial portions of the Software.
5133 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5134 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5135 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5136 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5137 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5138 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5139 SOFTWARE.
5140 ------------------------------------------------------------------------------
5141 ALTERNATIVE B - Public Domain (www.unlicense.org)
5142 This is free and unencumbered software released into the public domain.
5143 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5144 software, either in source code form or as a compiled binary, for any purpose,
5145 commercial or non-commercial, and by any means.
5146 In jurisdictions that recognize copyright laws, the author or authors of this
5147 software dedicate any and all copyright interest in the software to the public
5148 domain. We make this dedication for the benefit of the public at large and to
5149 the detriment of our heirs and successors. We intend this dedication to be an
5150 overt act of relinquishment in perpetuity of all present and future rights to
5151 this software under copyright law.
5152 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5153 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5154 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5155 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5156 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5157 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5158 ------------------------------------------------------------------------------
5159 */