1 // Ogg Vorbis audio decoder - v1.10 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix
33 //
34 // Partial history:
35 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
36 //    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
37 //    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
38 //                           avoid discarding last frame of audio data
39 //    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
40 //                           some more crash fixes when out of memory or with corrupt files
41 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
42 //                           some crash fixes when out of memory or with corrupt files
43 //                           fix some inappropriately signed shifts
44 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
45 //    1.04    - 2014/08/27 - fix missing const-correct case in API
46 //    1.03    - 2014/08/07 - warning fixes
47 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
48 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
49 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
50 //                           (API change) report sample rate for decode-full-file funcs
51 //    0.99996 -            - bracket #include <malloc.h> for macintosh compilation
52 //    0.99995 -            - avoid alias-optimization issue in float-to-int conversion
53 //
54 // See end of file for full version history.
55 // D translation by Ketmar // Invisible Vector
56 // stolen by adam and module renamed.
57 /++
58 	Port of stb_vorbis to D. Provides .ogg audio file reading capabilities. See [arsd.simpleaudio] for code that can use this to actually load and play the file.
59 +/
60 module arsd.vorbis;
61 
62 import core.stdc.stdio : FILE;
63 
64 version(Windows)
65 	extern(C) int lrintf(float f) { return cast(int) f; }
66 
67 nothrow /*@trusted*/:
68 @nogc { // code block, as c macro helper is not @nogc; yet it's CTFE-only
69 // import it here, as druntime has no `@nogc` on it (for a reason)
70 private extern(C) void qsort (void* base, size_t nmemb, size_t size, int function(in void*, in void*) compar);
71 
72 
73 //////////////////////////////////////////////////////////////////////////////
74 //
75 //  HEADER BEGINS HERE
76 //
77 
78 ///////////   THREAD SAFETY
79 
80 // Individual VorbisDecoder handles are not thread-safe; you cannot decode from
81 // them from multiple threads at the same time. However, you can have multiple
82 // VorbisDecoder handles and decode from them independently in multiple thrads.
83 
84 
85 ///////////   MEMORY ALLOCATION
86 
87 // normally stb_vorbis uses malloc() to allocate memory at startup,
88 // and alloca() to allocate temporary memory during a frame on the
89 // stack. (Memory consumption will depend on the amount of setup
90 // data in the file and how you set the compile flags for speed
91 // vs. size. In my test files the maximal-size usage is ~150KB.)
92 //
93 // You can modify the wrapper functions in the source (setup_malloc,
94 // setup_temp_malloc, temp_malloc) to change this behavior, or you
95 // can use a simpler allocation model: you pass in a buffer from
96 // which stb_vorbis will allocate _all_ its memory (including the
97 // temp memory). "open" may fail with a VORBIS_outofmem if you
98 // do not pass in enough data; there is no way to determine how
99 // much you do need except to succeed (at which point you can
100 // query get_info to find the exact amount required. yes I know
101 // this is lame).
102 //
103 // If you pass in a non-null buffer of the type below, allocation
104 // will occur from it as described above. Otherwise just pass null
105 // to use malloc()/alloca()
106 
107 public struct stb_vorbis_alloc {
108   ubyte* alloc_buffer;
109   int alloc_buffer_length_in_bytes;
110 }
111 
112 
113 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
114 
115 /*
116 public struct stb_vorbis_info {
117   uint sample_rate;
118   int channels;
119 
120   uint setup_memory_required;
121   uint setup_temp_memory_required;
122   uint temp_memory_required;
123 
124   int max_frame_size;
125 }
126 */
127 
128 
129 /* ************************************************************************** *
130 // get general information about the file
131 stb_vorbis_info stb_vorbis_get_info (VorbisDecoder f);
132 
133 // get the last error detected (clears it, too)
134 int stb_vorbis_get_error (VorbisDecoder f);
135 
136 // close an ogg vorbis file and free all memory in use
137 void stb_vorbis_close (VorbisDecoder f);
138 
139 // this function returns the offset (in samples) from the beginning of the
140 // file that will be returned by the next decode, if it is known, or -1
141 // otherwise. after a flush_pushdata() call, this may take a while before
142 // it becomes valid again.
143 // NOT WORKING YET after a seek with PULLDATA API
144 int stb_vorbis_get_sample_offset (VorbisDecoder f);
145 
146 // returns the current seek point within the file, or offset from the beginning
147 // of the memory buffer. In pushdata mode it returns 0.
148 uint stb_vorbis_get_file_offset (VorbisDecoder f);
149 
150 
151 ///////////   PUSHDATA API
152 
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
159 
160 // create a vorbis decoder by passing in the initial data block containing
161 //    the ogg&vorbis headers (you don't need to do parse them, just provide
162 //    the first N bytes of the file--you're told if it's not enough, see below)
163 // on success, returns an VorbisDecoder, does not set error, returns the amount of
164 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
165 // on failure, returns null on error and sets *error, does not change *datablock_memory_consumed
166 // if returns null and *error is VORBIS_need_more_data, then the input block was
167 //       incomplete and you need to pass in a larger block from the start of the file
168 VorbisDecoder stb_vorbis_open_pushdata (
169               ubyte* datablock, int datablock_length_in_bytes,
170               int* datablock_memory_consumed_in_bytes,
171               int* error,
172               stb_vorbis_alloc* alloc_buffer
173             );
174 
175 // decode a frame of audio sample data if possible from the passed-in data block
176 //
177 // return value: number of bytes we used from datablock
178 //
179 // possible cases:
180 //     0 bytes used, 0 samples output (need more data)
181 //     N bytes used, 0 samples output (resynching the stream, keep going)
182 //     N bytes used, M samples output (one frame of data)
183 // note that after opening a file, you will ALWAYS get one N-bytes, 0-sample
184 // frame, because Vorbis always "discards" the first frame.
185 //
186 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
187 // instead only datablock_length_in_bytes-3 or less. This is because it wants
188 // to avoid missing parts of a page header if they cross a datablock boundary,
189 // without writing state-machiney code to record a partial detection.
190 //
191 // The number of channels returned are stored in *channels (which can be
192 // null--it is always the same as the number of channels reported by
193 // get_info). *output will contain an array of float* buffers, one per
194 // channel. In other words, (*output)[0][0] contains the first sample from
195 // the first channel, and (*output)[1][0] contains the first sample from
196 // the second channel.
197 int stb_vorbis_decode_frame_pushdata (
198       VorbisDecoder f, ubyte* datablock, int datablock_length_in_bytes,
199       int* channels,   // place to write number of float * buffers
200       float*** output, // place to write float ** array of float * buffers
201       int* samples     // place to write number of output samples
202     );
203 
204 // inform stb_vorbis that your next datablock will not be contiguous with
205 // previous ones (e.g. you've seeked in the data); future attempts to decode
206 // frames will cause stb_vorbis to resynchronize (as noted above), and
207 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
208 // will begin decoding the _next_ frame.
209 //
210 // if you want to seek using pushdata, you need to seek in your file, then
211 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
212 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
213 // if you don't like the result, seek your file again and repeat.
214 void stb_vorbis_flush_pushdata (VorbisDecoder f);
215 
216 
217 //////////   PULLING INPUT API
218 
219 // This API assumes stb_vorbis is allowed to pull data from a source--
220 // either a block of memory containing the _entire_ vorbis stream, or a
221 // FILE* that you or it create, or possibly some other reading mechanism
222 // if you go modify the source to replace the FILE* case with some kind
223 // of callback to your code. (But if you don't support seeking, you may
224 // just want to go ahead and use pushdata.)
225 
226 // decode an entire file and output the data interleaved into a malloc()ed
227 // buffer stored in *output. The return value is the number of samples
228 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
229 // When you're done with it, just free() the pointer returned in *output.
230 int stb_vorbis_decode_filename (const(char)* filename, int* channels, int* sample_rate, short** output);
231 int stb_vorbis_decode_memory (const(ubyte)* mem, int len, int* channels, int* sample_rate, short** output);
232 
233 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
234 // this must be the entire stream!). on failure, returns null and sets *error
235 VorbisDecoder stb_vorbis_open_memory (const(ubyte)* data, int len, int* error, stb_vorbis_alloc* alloc_buffer);
236 
237 // create an ogg vorbis decoder from a filename via fopen(). on failure,
238 // returns null and sets *error (possibly to VORBIS_file_open_failure).
239 VorbisDecoder stb_vorbis_open_filename (const(char)* filename, int* error, stb_vorbis_alloc* alloc_buffer);
240 
241 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
242 // the _current_ seek point (ftell). on failure, returns null and sets *error.
243 // note that stb_vorbis must "own" this stream; if you seek it in between
244 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
245 // perform stb_vorbis_seek_*() operations on this file, it will assume it
246 // owns the _entire_ rest of the file after the start point. Use the next
247 // function, stb_vorbis_open_file_section(), to limit it.
248 VorbisDecoder stb_vorbis_open_file (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer);
249 
250 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
251 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
252 // on failure, returns null and sets *error. note that stb_vorbis must "own"
253 // this stream; if you seek it in between calls to stb_vorbis, it will become
254 // confused.
255 VorbisDecoder stb_vorbis_open_file_section (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer, uint len);
256 
257 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
258 // after calling seek_frame(), the next call to get_frame_*() will include
259 // the specified sample. after calling stb_vorbis_seek(), the next call to
260 // stb_vorbis_get_samples_* will start with the specified sample. If you
261 // do not need to seek to EXACTLY the target sample when using get_samples_*,
262 // you can also use seek_frame().
263 int stb_vorbis_seek_frame (VorbisDecoder f, uint sample_number);
264 int stb_vorbis_seek (VorbisDecoder f, uint sample_number);
265 
266 // this function is equivalent to stb_vorbis_seek(f, 0)
267 int stb_vorbis_seek_start (VorbisDecoder f);
268 
269 // these functions return the total length of the vorbis stream
270 uint stb_vorbis_stream_length_in_samples (VorbisDecoder f);
271 float stb_vorbis_stream_length_in_seconds (VorbisDecoder f);
272 
273 // decode the next frame and return the number of samples. the number of
274 // channels returned are stored in *channels (which can be null--it is always
275 // the same as the number of channels reported by get_info). *output will
276 // contain an array of float* buffers, one per channel. These outputs will
277 // be overwritten on the next call to stb_vorbis_get_frame_*.
278 //
279 // You generally should not intermix calls to stb_vorbis_get_frame_*()
280 // and stb_vorbis_get_samples_*(), since the latter calls the former.
281 int stb_vorbis_get_frame_float (VorbisDecoder f, int* channels, float*** output);
282 
283 // decode the next frame and return the number of *samples* per channel.
284 // Note that for interleaved data, you pass in the number of shorts (the
285 // size of your array), but the return value is the number of samples per
286 // channel, not the total number of samples.
287 //
288 // The data is coerced to the number of channels you request according to the
289 // channel coercion rules (see below). You must pass in the size of your
290 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
291 // The maximum buffer size needed can be gotten from get_info(); however,
292 // the Vorbis I specification implies an absolute maximum of 4096 samples
293 // per channel.
294 int stb_vorbis_get_frame_short_interleaved (VorbisDecoder f, int num_c, short* buffer, int num_shorts);
295 int stb_vorbis_get_frame_short (VorbisDecoder f, int num_c, short** buffer, int num_samples);
296 
297 // Channel coercion rules:
298 //    Let M be the number of channels requested, and N the number of channels present,
299 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
300 //    and stereo R be the sum of all R and center channels (channel assignment from the
301 //    vorbis spec).
302 //        M    N       output
303 //        1    k      sum(Ck) for all k
304 //        2    *      stereo L, stereo R
305 //        k    l      k > l, the first l channels, then 0s
306 //        k    l      k <= l, the first k channels
307 //    Note that this is not _good_ surround etc. mixing at all! It's just so
308 //    you get something useful.
309 
310 // gets num_samples samples, not necessarily on a frame boundary--this requires
311 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
312 // Returns the number of samples stored per channel; it may be less than requested
313 // at the end of the file. If there are no more samples in the file, returns 0.
314 int stb_vorbis_get_samples_float_interleaved (VorbisDecoder f, int channels, float* buffer, int num_floats);
315 int stb_vorbis_get_samples_float (VorbisDecoder f, int channels, float** buffer, int num_samples);
316 
317 // gets num_samples samples, not necessarily on a frame boundary--this requires
318 // buffering so you have to supply the buffers. Applies the coercion rules above
319 // to produce 'channels' channels. Returns the number of samples stored per channel;
320 // it may be less than requested at the end of the file. If there are no more
321 // samples in the file, returns 0.
322 int stb_vorbis_get_samples_short_interleaved (VorbisDecoder f, int channels, short* buffer, int num_shorts);
323 int stb_vorbis_get_samples_short (VorbisDecoder f, int channels, short** buffer, int num_samples);
324 */
325 
326 ////////   ERROR CODES
327 
328 public enum STBVorbisError {
329   no_error,
330 
331   need_more_data = 1,    // not a real error
332 
333   invalid_api_mixing,    // can't mix API modes
334   outofmem,              // not enough memory
335   feature_not_supported, // uses floor 0
336   too_many_channels,     // STB_VORBIS_MAX_CHANNELS is too small
337   file_open_failure,     // fopen() failed
338   seek_without_length,   // can't seek in unknown-length file
339 
340   unexpected_eof = 10,   // file is truncated?
341   seek_invalid,          // seek past EOF
342 
343   // decoding errors (corrupt/invalid stream) -- you probably
344   // don't care about the exact details of these
345 
346   // vorbis errors:
347   invalid_setup = 20,
348   invalid_stream,
349 
350   // ogg errors:
351   missing_capture_pattern = 30,
352   invalid_stream_structure_version,
353   continued_packet_flag_invalid,
354   incorrect_stream_serial_number,
355   invalid_first_page,
356   bad_packet_type,
357   cant_find_last_page,
358   seek_failed,
359 }
360 //
361 //  HEADER ENDS HERE
362 //
363 //////////////////////////////////////////////////////////////////////////////
364 
365 
366 // global configuration settings (e.g. set these in the project/makefile),
367 // or just set them in this file at the top (although ideally the first few
368 // should be visible when the header file is compiled too, although it's not
369 // crucial)
370 
371 // STB_VORBIS_NO_INTEGER_CONVERSION
372 //     does not compile the code for converting audio sample data from
373 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
374 //version = STB_VORBIS_NO_INTEGER_CONVERSION;
375 
376 // STB_VORBIS_NO_FAST_SCALED_FLOAT
377 //      does not use a fast float-to-int trick to accelerate float-to-int on
378 //      most platforms which requires endianness be defined correctly.
379 //version = STB_VORBIS_NO_FAST_SCALED_FLOAT;
380 
381 // STB_VORBIS_MAX_CHANNELS [number]
382 //     globally define this to the maximum number of channels you need.
383 //     The spec does not put a restriction on channels except that
384 //     the count is stored in a byte, so 255 is the hard limit.
385 //     Reducing this saves about 16 bytes per value, so using 16 saves
386 //     (255-16)*16 or around 4KB. Plus anything other memory usage
387 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
388 //     6 (5.1 audio), or 2 (stereo only).
389 enum STB_VORBIS_MAX_CHANNELS = 16; // enough for anyone?
390 
391 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
392 //     after a flush_pushdata(), stb_vorbis begins scanning for the
393 //     next valid page, without backtracking. when it finds something
394 //     that looks like a page, it streams through it and verifies its
395 //     CRC32. Should that validation fail, it keeps scanning. But it's
396 //     possible that _while_ streaming through to check the CRC32 of
397 //     one candidate page, it sees another candidate page. This #define
398 //     determines how many "overlapping" candidate pages it can search
399 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
400 //     garbage pages could be as big as 64KB, but probably average ~16KB.
401 //     So don't hose ourselves by scanning an apparent 64KB page and
402 //     missing a ton of real ones in the interim; so minimum of 2
403 enum STB_VORBIS_PUSHDATA_CRC_COUNT = 4;
404 
405 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
406 //     sets the log size of the huffman-acceleration table.  Maximum
407 //     supported value is 24. with larger numbers, more decodings are O(1),
408 //     but the table size is larger so worse cache missing, so you'll have
409 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
410 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
411 
412 // STB_VORBIS_FAST_BINARY_LENGTH [number]
413 //     sets the log size of the binary-search acceleration table. this
414 //     is used in similar fashion to the fast-huffman size to set initial
415 //     parameters for the binary search
416 
417 // STB_VORBIS_FAST_HUFFMAN_INT
418 //     The fast huffman tables are much more efficient if they can be
419 //     stored as 16-bit results instead of 32-bit results. This restricts
420 //     the codebooks to having only 65535 possible outcomes, though.
421 //     (At least, accelerated by the huffman table.)
422 //version = STB_VORBIS_FAST_HUFFMAN_INT;
423 version(STB_VORBIS_FAST_HUFFMAN_INT) {} else version = STB_VORBIS_FAST_HUFFMAN_SHORT;
424 
425 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
426 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
427 //     back on binary searching for the correct one. This requires storing
428 //     extra tables with the huffman codes in sorted order. Defining this
429 //     symbol trades off space for speed by forcing a linear search in the
430 //     non-fast case, except for "sparse" codebooks.
431 //version = STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH;
432 
433 // STB_VORBIS_DIVIDES_IN_RESIDUE
434 //     stb_vorbis precomputes the result of the scalar residue decoding
435 //     that would otherwise require a divide per chunk. you can trade off
436 //     space for time by defining this symbol.
437 //version = STB_VORBIS_DIVIDES_IN_RESIDUE;
438 
439 // STB_VORBIS_DIVIDES_IN_CODEBOOK
440 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
441 //     stored, or with all elements being chosen from a small range of values,
442 //     and all values possible in all elements. By default, stb_vorbis expands
443 //     this latter kind out to look like the former kind for ease of decoding,
444 //     because otherwise an integer divide-per-vector-element is required to
445 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
446 //     trade off storage for speed.
447 //version = STB_VORBIS_DIVIDES_IN_CODEBOOK;
448 
449 version(STB_VORBIS_CODEBOOK_SHORTS) static assert(0, "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats");
450 
451 // STB_VORBIS_DIVIDE_TABLE
452 //     this replaces small integer divides in the floor decode loop with
453 //     table lookups. made less than 1% difference, so disabled by default.
454 //version = STB_VORBIS_DIVIDE_TABLE;
455 
456 // STB_VORBIS_NO_DEFER_FLOOR
457 //     Normally we only decode the floor without synthesizing the actual
458 //     full curve. We can instead synthesize the curve immediately. This
459 //     requires more memory and is very likely slower, so I don't think
460 //     you'd ever want to do it except for debugging.
461 //version = STB_VORBIS_NO_DEFER_FLOOR;
462 //version(STB_VORBIS_CODEBOOK_FLOATS) static assert(0);
463 
464 
465 // ////////////////////////////////////////////////////////////////////////// //
466 private:
467 static assert(STB_VORBIS_MAX_CHANNELS <= 256, "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range");
468 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24, "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range");
469 
470 enum MAX_BLOCKSIZE_LOG = 13; // from specification
471 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
472 
473 
474 alias codetype = float;
475 
476 // @NOTE
477 //
478 // Some arrays below are tagged "//varies", which means it's actually
479 // a variable-sized piece of data, but rather than malloc I assume it's
480 // small enough it's better to just allocate it all together with the
481 // main thing
482 //
483 // Most of the variables are specified with the smallest size I could pack
484 // them into. It might give better performance to make them all full-sized
485 // integers. It should be safe to freely rearrange the structures or change
486 // the sizes larger--nothing relies on silently truncating etc., nor the
487 // order of variables.
488 
489 enum FAST_HUFFMAN_TABLE_SIZE = (1<<STB_VORBIS_FAST_HUFFMAN_LENGTH);
490 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE-1);
491 
492 struct Codebook {
493   int dimensions, entries;
494   ubyte* codeword_lengths;
495   float minimum_value;
496   float delta_value;
497   ubyte value_bits;
498   ubyte lookup_type;
499   ubyte sequence_p;
500   ubyte sparse;
501   uint lookup_values;
502   codetype* multiplicands;
503   uint *codewords;
504   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
505     short[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
506   } else {
507     int[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
508   }
509   uint* sorted_codewords;
510   int* sorted_values;
511   int sorted_entries;
512 }
513 
514 struct Floor0 {
515   ubyte order;
516   ushort rate;
517   ushort bark_map_size;
518   ubyte amplitude_bits;
519   ubyte amplitude_offset;
520   ubyte number_of_books;
521   ubyte[16] book_list; // varies
522 }
523 
524 struct Floor1 {
525   ubyte partitions;
526   ubyte[32] partition_class_list; // varies
527   ubyte[16] class_dimensions; // varies
528   ubyte[16] class_subclasses; // varies
529   ubyte[16] class_masterbooks; // varies
530   short[8][16] subclass_books; // varies
531   ushort[31*8+2] Xlist; // varies
532   ubyte[31*8+2] sorted_order;
533   ubyte[2][31*8+2] neighbors;
534   ubyte floor1_multiplier;
535   ubyte rangebits;
536   int values;
537 }
538 
539 union Floor {
540   Floor0 floor0;
541   Floor1 floor1;
542 }
543 
544 struct Residue {
545   uint begin, end;
546   uint part_size;
547   ubyte classifications;
548   ubyte classbook;
549   ubyte** classdata;
550   //int16 (*residue_books)[8];
551   short[8]* residue_books;
552 }
553 
554 struct MappingChannel {
555   ubyte magnitude;
556   ubyte angle;
557   ubyte mux;
558 }
559 
560 struct Mapping {
561   ushort coupling_steps;
562   MappingChannel* chan;
563   ubyte submaps;
564   ubyte[15] submap_floor; // varies
565   ubyte[15] submap_residue; // varies
566 }
567 
568 struct Mode {
569   ubyte blockflag;
570   ubyte mapping;
571   ushort windowtype;
572   ushort transformtype;
573 }
574 
575 struct CRCscan {
576   uint goal_crc;   // expected crc if match
577   int bytes_left;  // bytes left in packet
578   uint crc_so_far; // running crc
579   int bytes_done;  // bytes processed in _current_ chunk
580   uint sample_loc; // granule pos encoded in page
581 }
582 
583 struct ProbedPage {
584   uint page_start, page_end;
585   uint last_decoded_sample;
586 }
587 
588 private int error (VorbisDecoder f, STBVorbisError e) {
589   f.error = e;
590   if (!f.eof && e != STBVorbisError.need_more_data) {
591     f.error = e; // breakpoint for debugging
592   }
593   return 0;
594 }
595 
596 // these functions are used for allocating temporary memory
597 // while decoding. if you can afford the stack space, use
598 // alloca(); otherwise, provide a temp buffer and it will
599 // allocate out of those.
600 uint temp_alloc_save (VorbisDecoder f) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); return f.alloc.tempSave(f); }
601 void temp_alloc_restore (VorbisDecoder f, uint p) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); f.alloc.tempRestore(p, f); }
602 void temp_free (VorbisDecoder f, void* p) nothrow @nogc {}
603 /*
604 T* temp_alloc(T) (VorbisDecoder f, uint count) nothrow @nogc {
605   auto res = f.alloc.alloc(count*T.sizeof, f);
606   return cast(T*)res;
607 }
608 */
609 
610 /+
611 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
612 
613 // has to be a mixin, due to `alloca`
614 template temp_alloc(string size) {
615   enum temp_alloc = q{(f.alloc.alloc_buffer ? setup_temp_malloc(f, (${size})) : alloca(${size}))}.cmacroFixVars!("size")(size);
616 }
617 
618 // has to be a mixin, due to `alloca`
619 template temp_block_array(string count, string size) {
620   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
621     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
622 }
623 +/
624 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
625 
626 template temp_alloc(string size) {
627   enum temp_alloc = q{alloca(${size})}.cmacroFixVars!("size")(size);
628 }
629 
630 template temp_block_array(string count, string size) {
631   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
632     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
633 }
634 
635 /*
636 T** temp_block_array(T) (VorbisDecoder f, uint count, uint size) {
637   size *= T.sizeof;
638   auto mem = f.alloc.alloc(count*(void*).sizeof+size, f);
639   if (mem !is null) make_block_array(mem, count, size);
640   return cast(T**)mem;
641 }
642 */
643 
644 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
645 private void* make_block_array (void* mem, int count, int size) {
646   void** p = cast(void**)mem;
647   char* q = cast(char*)(p+count);
648   foreach (immutable i; 0..count) {
649     p[i] = q;
650     q += size;
651   }
652   return p;
653 }
654 
655 private T* setup_malloc(T) (VorbisDecoder f, uint sz) {
656   sz *= T.sizeof;
657   /*
658   f.setup_memory_required += sz;
659   if (f.alloc.alloc_buffer) {
660     void* p = cast(char*)f.alloc.alloc_buffer+f.setup_offset;
661     if (f.setup_offset+sz > f.temp_offset) return null;
662     f.setup_offset += sz;
663     return cast(T*)p;
664   }
665   */
666   auto res = f.alloc.alloc(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
667   if (res !is null) {
668     import core.stdc.string : memset;
669     memset(res, 0, sz+8);
670   }
671   return cast(T*)res;
672 }
673 
674 private void setup_free (VorbisDecoder f, void* p) {
675   //if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
676   if (p !is null) f.alloc.free(p, f);
677 }
678 
679 private void* setup_temp_malloc (VorbisDecoder f, uint sz) {
680   auto res = f.alloc.allocTemp(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
681   if (res !is null) {
682     import core.stdc.string : memset;
683     memset(res, 0, sz+8);
684   }
685   return res;
686 }
687 
688 private void setup_temp_free (VorbisDecoder f, void* p, uint sz) {
689   if (p !is null) f.alloc.freeTemp(p, (sz ? sz : 1)+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
690 }
691 
692 immutable uint[256] crc_table;
693 shared static this () {
694   enum CRC32_POLY = 0x04c11db7; // from spec
695   // init crc32 table
696   foreach (uint i; 0..256) {
697     uint s = i<<24;
698     foreach (immutable _; 0..8) s = (s<<1)^(s >= (1U<<31) ? CRC32_POLY : 0);
699     crc_table[i] = s;
700   }
701 }
702 
703 uint crc32_update (uint crc, ubyte b) {
704   static if (__VERSION__ > 2067) pragma(inline, true);
705   return (crc<<8)^crc_table[b^(crc>>24)];
706 }
707 
708 // used in setup, and for huffman that doesn't go fast path
709 private uint bit_reverse (uint n) {
710   static if (__VERSION__ > 2067) pragma(inline, true);
711   n = ((n&0xAAAAAAAA)>>1)|((n&0x55555555)<<1);
712   n = ((n&0xCCCCCCCC)>>2)|((n&0x33333333)<<2);
713   n = ((n&0xF0F0F0F0)>>4)|((n&0x0F0F0F0F)<<4);
714   n = ((n&0xFF00FF00)>>8)|((n&0x00FF00FF)<<8);
715   return (n>>16)|(n<<16);
716 }
717 
718 private float square (float x) {
719   static if (__VERSION__ > 2067) pragma(inline, true);
720   return x*x;
721 }
722 
723 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
724 // as required by the specification. fast(?) implementation from stb.h
725 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
726 immutable byte[16] log2_4 = [0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4];
727 private int ilog (int n) {
728   //static if (__VERSION__ > 2067) pragma(inline, true);
729   if (n < 0) return 0; // signed n returns 0
730   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
731   if (n < (1<<14)) {
732     if (n < (1<<4)) return 0+log2_4[n];
733     if (n < (1<<9)) return 5+log2_4[n>>5];
734     return 10+log2_4[n>>10];
735   } else if (n < (1<<24)) {
736     if (n < (1<<19)) return 15+log2_4[n>>15];
737     return 20+log2_4[n>>20];
738   } else {
739     if (n < (1<<29)) return 25+log2_4[n>>25];
740     return 30+log2_4[n>>30];
741   }
742 }
743 
744 
745 // code length assigned to a value with no huffman encoding
746 enum NO_CODE = 255;
747 
748 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
749 //
750 // these functions are only called at setup, and only a few times per file
751 private float float32_unpack (uint x) {
752   import core.math : ldexp;
753   //static if (__VERSION__ > 2067) pragma(inline, true);
754   // from the specification
755   uint mantissa = x&0x1fffff;
756   uint sign = x&0x80000000;
757   uint exp = (x&0x7fe00000)>>21;
758   double res = (sign ? -cast(double)mantissa : cast(double)mantissa);
759   return cast(float)ldexp(cast(float)res, cast(int)exp-788);
760 }
761 
762 // zlib & jpeg huffman tables assume that the output symbols
763 // can either be arbitrarily arranged, or have monotonically
764 // increasing frequencies--they rely on the lengths being sorted;
765 // this makes for a very simple generation algorithm.
766 // vorbis allows a huffman table with non-sorted lengths. This
767 // requires a more sophisticated construction, since symbols in
768 // order do not map to huffman codes "in order".
769 private void add_entry (Codebook* c, uint huff_code, int symbol, int count, ubyte len, uint* values) {
770   if (!c.sparse) {
771     c.codewords[symbol] = huff_code;
772   } else {
773     c.codewords[count] = huff_code;
774     c.codeword_lengths[count] = len;
775     values[count] = symbol;
776   }
777 }
778 
779 private int compute_codewords (Codebook* c, ubyte* len, int n, uint* values) {
780   import core.stdc.string : memset;
781 
782   int i, k, m = 0;
783   uint[32] available;
784 
785   memset(available.ptr, 0, available.sizeof);
786   // find the first entry
787   for (k = 0; k < n; ++k) if (len[k] < NO_CODE) break;
788   if (k == n) { assert(c.sorted_entries == 0); return true; }
789   // add to the list
790   add_entry(c, 0, k, m++, len[k], values);
791   // add all available leaves
792   for (i = 1; i <= len[k]; ++i) available[i] = 1U<<(32-i);
793   // note that the above code treats the first case specially,
794   // but it's really the same as the following code, so they
795   // could probably be combined (except the initial code is 0,
796   // and I use 0 in available[] to mean 'empty')
797   for (i = k+1; i < n; ++i) {
798     uint res;
799     int z = len[i];
800     if (z == NO_CODE) continue;
801     // find lowest available leaf (should always be earliest,
802     // which is what the specification calls for)
803     // note that this property, and the fact we can never have
804     // more than one free leaf at a given level, isn't totally
805     // trivial to prove, but it seems true and the assert never
806     // fires, so!
807     while (z > 0 && !available[z]) --z;
808     if (z == 0) return false;
809     res = available[z];
810     assert(z >= 0 && z < 32);
811     available[z] = 0;
812     ubyte xxx = len[i];
813     add_entry(c,
814       bit_reverse(res),
815       i,
816       m++,
817       xxx, // dmd bug: it reads 4 bytes without temp
818       values);
819     // propogate availability up the tree
820     if (z != len[i]) {
821       assert(len[i] >= 0 && len[i] < 32);
822       for (int y = len[i]; y > z; --y) {
823         assert(available[y] == 0);
824         available[y] = res+(1<<(32-y));
825       }
826     }
827   }
828   return true;
829 }
830 
831 // accelerated huffman table allows fast O(1) match of all symbols
832 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
833 private void compute_accelerated_huffman (Codebook* c) {
834   //for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) c.fast_huffman.ptr[i] = -1;
835   c.fast_huffman.ptr[0..FAST_HUFFMAN_TABLE_SIZE] = -1;
836   auto len = (c.sparse ? c.sorted_entries : c.entries);
837   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
838     if (len > 32767) len = 32767; // largest possible value we can encode!
839   }
840   foreach (uint i; 0..len) {
841     if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
842       uint z = (c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]);
843       // set table entries for all bit combinations in the higher bits
844       while (z < FAST_HUFFMAN_TABLE_SIZE) {
845         c.fast_huffman.ptr[z] = cast(typeof(c.fast_huffman[0]))i; //k8
846         z += 1<<c.codeword_lengths[i];
847       }
848     }
849   }
850 }
851 
852 extern(C) int uint32_compare (const void* p, const void* q) {
853   uint x = *cast(uint*)p;
854   uint y = *cast(uint*)q;
855   return (x < y ? -1 : x > y);
856 }
857 
858 private int include_in_sort (Codebook* c, uint len) {
859   if (c.sparse) { assert(len != NO_CODE); return true; }
860   if (len == NO_CODE) return false;
861   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return true;
862   return false;
863 }
864 
865 // if the fast table above doesn't work, we want to binary
866 // search them... need to reverse the bits
867 private void compute_sorted_huffman (Codebook* c, ubyte* lengths, uint* values) {
868   // build a list of all the entries
869   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
870   // this is kind of a frivolous optimization--I don't see any performance improvement,
871   // but it's like 4 extra lines of code, so.
872   if (!c.sparse) {
873     int k = 0;
874     foreach (uint i; 0..c.entries) if (include_in_sort(c, lengths[i])) c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
875     assert(k == c.sorted_entries);
876   } else {
877     foreach (uint i; 0..c.sorted_entries) c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
878   }
879 
880   qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
881   c.sorted_codewords[c.sorted_entries] = 0xffffffff;
882 
883   auto len = (c.sparse ? c.sorted_entries : c.entries);
884   // now we need to indicate how they correspond; we could either
885   //   #1: sort a different data structure that says who they correspond to
886   //   #2: for each sorted entry, search the original list to find who corresponds
887   //   #3: for each original entry, find the sorted entry
888   // #1 requires extra storage, #2 is slow, #3 can use binary search!
889   foreach (uint i; 0..len) {
890     auto huff_len = (c.sparse ? lengths[values[i]] : lengths[i]);
891     if (include_in_sort(c, huff_len)) {
892       uint code = bit_reverse(c.codewords[i]);
893       int x = 0, n = c.sorted_entries;
894       while (n > 1) {
895         // invariant: sc[x] <= code < sc[x+n]
896         int m = x+(n>>1);
897         if (c.sorted_codewords[m] <= code) {
898           x = m;
899           n -= (n>>1);
900         } else {
901           n >>= 1;
902         }
903       }
904       assert(c.sorted_codewords[x] == code);
905       if (c.sparse) {
906         c.sorted_values[x] = values[i];
907         c.codeword_lengths[x] = huff_len;
908       } else {
909         c.sorted_values[x] = i;
910       }
911     }
912   }
913 }
914 
915 // only run while parsing the header (3 times)
916 private int vorbis_validate (const(void)* data) {
917   static if (__VERSION__ > 2067) pragma(inline, true);
918   immutable char[6] vorbis = "vorbis";
919   return ((cast(char*)data)[0..6] == vorbis[]);
920 }
921 
922 // called from setup only, once per code book
923 // (formula implied by specification)
924 private int lookup1_values (int entries, int dim) {
925   import core.stdc.math : lrintf;
926   import std.math : floor, exp, pow, log;
927   int r = cast(int)lrintf(floor(exp(cast(float)log(cast(float)entries)/dim)));
928   if (lrintf(floor(pow(cast(float)r+1, dim))) <= entries) ++r; // (int) cast for MinGW warning; floor() to avoid _ftol() when non-CRT
929   assert(pow(cast(float)r+1, dim) > entries);
930   assert(lrintf(floor(pow(cast(float)r, dim))) <= entries); // (int), floor() as above
931   return r;
932 }
933 
934 // called twice per file
935 private void compute_twiddle_factors (int n, float* A, float* B, float* C) {
936   import std.math : cos, sin, PI;
937   int n4 = n>>2, n8 = n>>3;
938   int k, k2;
939   for (k = k2 = 0; k < n4; ++k, k2 += 2) {
940     A[k2  ] = cast(float) cos(4*k*PI/n);
941     A[k2+1] = cast(float)-sin(4*k*PI/n);
942     B[k2  ] = cast(float) cos((k2+1)*PI/n/2)*0.5f;
943     B[k2+1] = cast(float) sin((k2+1)*PI/n/2)*0.5f;
944   }
945   for (k = k2 = 0; k < n8; ++k, k2 += 2) {
946     C[k2  ] = cast(float) cos(2*(k2+1)*PI/n);
947     C[k2+1] = cast(float)-sin(2*(k2+1)*PI/n);
948   }
949 }
950 
951 private void compute_window (int n, float* window) {
952   import std.math : sin, PI;
953   int n2 = n>>1;
954   foreach (int i; 0..n2) *window++ = cast(float)sin(0.5*PI*square(cast(float)sin((i-0+0.5)/n2*0.5*PI)));
955 }
956 
957 private void compute_bitreverse (int n, ushort* rev) {
958   int ld = ilog(n)-1; // ilog is off-by-one from normal definitions
959   int n8 = n>>3;
960   foreach (int i; 0..n8) *rev++ = cast(ushort)((bit_reverse(i)>>(32-ld+3))<<2); //k8
961 }
962 
963 private int init_blocksize (VorbisDecoder f, int b, int n) {
964   int n2 = n>>1, n4 = n>>2, n8 = n>>3;
965   f.A[b] = setup_malloc!float(f, n2);
966   f.B[b] = setup_malloc!float(f, n2);
967   f.C[b] = setup_malloc!float(f, n4);
968   if (f.A[b] is null || f.B[b] is null || f.C[b] is null) return error(f, STBVorbisError.outofmem);
969   compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
970   f.window[b] = setup_malloc!float(f, n2);
971   if (f.window[b] is null) return error(f, STBVorbisError.outofmem);
972   compute_window(n, f.window[b]);
973   f.bit_reverse[b] = setup_malloc!ushort(f, n8);
974   if (f.bit_reverse[b] is null) return error(f, STBVorbisError.outofmem);
975   compute_bitreverse(n, f.bit_reverse[b]);
976   return true;
977 }
978 
979 private void neighbors (ushort* x, int n, ushort* plow, ushort* phigh) {
980   int low = -1;
981   int high = 65536;
982   assert(n >= 0 && n <= ushort.max);
983   foreach (ushort i; 0..cast(ushort)n) {
984     if (x[i] > low  && x[i] < x[n]) { *plow = i; low = x[i]; }
985     if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
986   }
987 }
988 
989 // this has been repurposed so y is now the original index instead of y
990 struct Point {
991   ushort x, y;
992 }
993 
994 extern(C) int point_compare (const void *p, const void *q) {
995   auto a = cast(const(Point)*)p;
996   auto b = cast(const(Point)*)q;
997   return (a.x < b.x ? -1 : a.x > b.x);
998 }
999 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1000 
1001 // ///////////////////////////////////////////////////////////////////// //
1002 private ubyte get8 (VorbisDecoder f) {
1003   ubyte b = void;
1004   if (!f.eof) {
1005     if (f.rawRead((&b)[0..1]) != 1) { f.eof = true; b = 0; }
1006   }
1007   return b;
1008 }
1009 
1010 private uint get32 (VorbisDecoder f) {
1011   uint x = 0;
1012   if (!f.eof) {
1013     version(LittleEndian) {
1014       if (f.rawRead((&x)[0..1]) != x.sizeof) { f.eof = true; x = 0; }
1015     } else {
1016       x = get8(f);
1017       x |= cast(uint)get8(f)<<8;
1018       x |= cast(uint)get8(f)<<16;
1019       x |= cast(uint)get8(f)<<24;
1020     }
1021   }
1022   return x;
1023 }
1024 
1025 private bool getn (VorbisDecoder f, void* data, int n) {
1026   if (f.eof || n < 0) return false;
1027   if (n == 0) return true;
1028   if (f.rawRead(data[0..n]) != n) { f.eof = true; return false; }
1029   return true;
1030 }
1031 
1032 private void skip (VorbisDecoder f, int n) {
1033   if (f.eof || n <= 0) return;
1034   f.rawSkip(n);
1035 }
1036 
1037 private void set_file_offset (VorbisDecoder f, uint loc) {
1038   /+if (f.push_mode) return;+/
1039   f.eof = false;
1040   if (loc >= 0x80000000) { f.eof = true; return; }
1041   f.rawSeek(loc);
1042 }
1043 
1044 
1045 immutable char[4] ogg_page_header = "OggS"; //[ 0x4f, 0x67, 0x67, 0x53 ];
1046 
1047 private bool capture_pattern (VorbisDecoder f) {
1048   static if (__VERSION__ > 2067) pragma(inline, true);
1049   char[4] sign = void;
1050   if (!getn(f, sign.ptr, 4)) return false;
1051   return (sign == "OggS");
1052 }
1053 
1054 enum PAGEFLAG_continued_packet = 1;
1055 enum PAGEFLAG_first_page = 2;
1056 enum PAGEFLAG_last_page = 4;
1057 
1058 private int start_page_no_capturepattern (VorbisDecoder f) {
1059   uint loc0, loc1, n;
1060   // stream structure version
1061   if (get8(f) != 0) return error(f, STBVorbisError.invalid_stream_structure_version);
1062   // header flag
1063   f.page_flag = get8(f);
1064   // absolute granule position
1065   loc0 = get32(f);
1066   loc1 = get32(f);
1067   // @TODO: validate loc0, loc1 as valid positions?
1068   // stream serial number -- vorbis doesn't interleave, so discard
1069   get32(f);
1070   //if (f.serial != get32(f)) return error(f, STBVorbisError.incorrect_stream_serial_number);
1071   // page sequence number
1072   n = get32(f);
1073   f.last_page = n;
1074   // CRC32
1075   get32(f);
1076   // page_segments
1077   f.segment_count = get8(f);
1078   if (!getn(f, f.segments.ptr, f.segment_count)) return error(f, STBVorbisError.unexpected_eof);
1079   // assume we _don't_ know any the sample position of any segments
1080   f.end_seg_with_known_loc = -2;
1081   if (loc0 != ~0U || loc1 != ~0U) {
1082     int i;
1083     // determine which packet is the last one that will complete
1084     for (i = f.segment_count-1; i >= 0; --i) if (f.segments.ptr[i] < 255) break;
1085     // 'i' is now the index of the _last_ segment of a packet that ends
1086     if (i >= 0) {
1087       f.end_seg_with_known_loc = i;
1088       f.known_loc_for_packet = loc0;
1089     }
1090   }
1091   if (f.first_decode) {
1092     int len;
1093     ProbedPage p;
1094     len = 0;
1095     foreach (int i; 0..f.segment_count) len += f.segments.ptr[i];
1096     len += 27+f.segment_count;
1097     p.page_start = f.first_audio_page_offset;
1098     p.page_end = p.page_start+len;
1099     p.last_decoded_sample = loc0;
1100     f.p_first = p;
1101   }
1102   f.next_seg = 0;
1103   return true;
1104 }
1105 
1106 private int start_page (VorbisDecoder f) {
1107   if (!capture_pattern(f)) return error(f, STBVorbisError.missing_capture_pattern);
1108   return start_page_no_capturepattern(f);
1109 }
1110 
1111 private int start_packet (VorbisDecoder f) {
1112   while (f.next_seg == -1) {
1113     if (!start_page(f)) return false;
1114     if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.continued_packet_flag_invalid);
1115   }
1116   f.last_seg = false;
1117   f.valid_bits = 0;
1118   f.packet_bytes = 0;
1119   f.bytes_in_seg = 0;
1120   // f.next_seg is now valid
1121   return true;
1122 }
1123 
1124 private int maybe_start_packet (VorbisDecoder f) {
1125   if (f.next_seg == -1) {
1126     auto x = get8(f);
1127     if (f.eof) return false; // EOF at page boundary is not an error!
1128     if (0x4f != x      ) return error(f, STBVorbisError.missing_capture_pattern);
1129     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1130     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1131     if (0x53 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1132     if (!start_page_no_capturepattern(f)) return false;
1133     if (f.page_flag&PAGEFLAG_continued_packet) {
1134       // set up enough state that we can read this packet if we want,
1135       // e.g. during recovery
1136       f.last_seg = false;
1137       f.bytes_in_seg = 0;
1138       return error(f, STBVorbisError.continued_packet_flag_invalid);
1139     }
1140   }
1141   return start_packet(f);
1142 }
1143 
1144 private int next_segment (VorbisDecoder f) {
1145   if (f.last_seg) return 0;
1146   if (f.next_seg == -1) {
1147     f.last_seg_which = f.segment_count-1; // in case start_page fails
1148     if (!start_page(f)) { f.last_seg = 1; return 0; }
1149     if (!(f.page_flag&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.continued_packet_flag_invalid);
1150   }
1151   auto len = f.segments.ptr[f.next_seg++];
1152   if (len < 255) {
1153     f.last_seg = true;
1154     f.last_seg_which = f.next_seg-1;
1155   }
1156   if (f.next_seg >= f.segment_count) f.next_seg = -1;
1157   debug(stb_vorbis) assert(f.bytes_in_seg == 0);
1158   f.bytes_in_seg = len;
1159   return len;
1160 }
1161 
1162 enum EOP = (-1);
1163 enum INVALID_BITS = (-1);
1164 
1165 private int get8_packet_raw (VorbisDecoder f) {
1166   if (!f.bytes_in_seg) {  // CLANG!
1167     if (f.last_seg) return EOP;
1168     else if (!next_segment(f)) return EOP;
1169   }
1170   debug(stb_vorbis) assert(f.bytes_in_seg > 0);
1171   --f.bytes_in_seg;
1172   ++f.packet_bytes;
1173   return get8(f);
1174 }
1175 
1176 private int get8_packet (VorbisDecoder f) {
1177   int x = get8_packet_raw(f);
1178   f.valid_bits = 0;
1179   return x;
1180 }
1181 
1182 private uint get32_packet (VorbisDecoder f) {
1183   uint x = get8_packet(f), b;
1184   if (x == EOP) return EOP;
1185   if ((b = get8_packet(f)) == EOP) return EOP;
1186   x += b<<8;
1187   if ((b = get8_packet(f)) == EOP) return EOP;
1188   x += b<<16;
1189   if ((b = get8_packet(f)) == EOP) return EOP;
1190   x += b<<24;
1191   return x;
1192 }
1193 
1194 private void flush_packet (VorbisDecoder f) {
1195   while (get8_packet_raw(f) != EOP) {}
1196 }
1197 
1198 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1199 // as the huffman decoder?
1200 private uint get_bits_main (VorbisDecoder f, int n) {
1201   uint z;
1202   if (f.valid_bits < 0) return 0;
1203   if (f.valid_bits < n) {
1204     if (n > 24) {
1205       // the accumulator technique below would not work correctly in this case
1206       z = get_bits_main(f, 24);
1207       z += get_bits_main(f, n-24)<<24;
1208       return z;
1209     }
1210     if (f.valid_bits == 0) f.acc = 0;
1211     while (f.valid_bits < n) {
1212       z = get8_packet_raw(f);
1213       if (z == EOP) {
1214         f.valid_bits = INVALID_BITS;
1215         return 0;
1216       }
1217       f.acc += z<<f.valid_bits;
1218       f.valid_bits += 8;
1219     }
1220   }
1221   if (f.valid_bits < 0) return 0;
1222   z = f.acc&((1<<n)-1);
1223   f.acc >>= n;
1224   f.valid_bits -= n;
1225   return z;
1226 }
1227 
1228 // chooses minimal possible integer type
1229 private auto get_bits(ubyte n) (VorbisDecoder f) if (n >= 1 && n <= 64) {
1230   static if (n <= 8) return cast(ubyte)get_bits_main(f, n);
1231   else static if (n <= 16) return cast(ushort)get_bits_main(f, n);
1232   else static if (n <= 32) return cast(uint)get_bits_main(f, n);
1233   else static if (n <= 64) return cast(ulong)get_bits_main(f, n);
1234   else static assert(0, "wtf?!");
1235 }
1236 
1237 // chooses minimal possible integer type, assume no overflow
1238 private auto get_bits_add_no(ubyte n) (VorbisDecoder f, ubyte add) if (n >= 1 && n <= 64) {
1239   static if (n <= 8) return cast(ubyte)(get_bits_main(f, n)+add);
1240   else static if (n <= 16) return cast(ushort)(get_bits_main(f, n)+add);
1241   else static if (n <= 32) return cast(uint)(get_bits_main(f, n)+add);
1242   else static if (n <= 64) return cast(ulong)(get_bits_main(f, n)+add);
1243   else static assert(0, "wtf?!");
1244 }
1245 
1246 // @OPTIMIZE: primary accumulator for huffman
1247 // expand the buffer to as many bits as possible without reading off end of packet
1248 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1249 // e.g. cache them locally and decode locally
1250 //private /*__forceinline*/ void prep_huffman (VorbisDecoder f)
1251 enum PrepHuffmanMixin = q{
1252   if (f.valid_bits <= 24) {
1253     if (f.valid_bits == 0) f.acc = 0;
1254     int phmz = void;
1255     do {
1256       if (f.last_seg && !f.bytes_in_seg) break;
1257       phmz = get8_packet_raw(f);
1258       if (phmz == EOP) break;
1259       f.acc += cast(uint)phmz<<f.valid_bits;
1260       f.valid_bits += 8;
1261     } while (f.valid_bits <= 24);
1262   }
1263 };
1264 
1265 enum VorbisPacket {
1266   id = 1,
1267   comment = 3,
1268   setup = 5,
1269 }
1270 
1271 private int codebook_decode_scalar_raw (VorbisDecoder f, Codebook *c) {
1272   mixin(PrepHuffmanMixin);
1273 
1274   if (c.codewords is null && c.sorted_codewords is null) return -1;
1275   // cases to use binary search: sorted_codewords && !c.codewords
1276   //                             sorted_codewords && c.entries > 8
1277   auto cond = (c.entries > 8 ? c.sorted_codewords !is null : !c.codewords);
1278   if (cond) {
1279     // binary search
1280     uint code = bit_reverse(f.acc);
1281     int x = 0, n = c.sorted_entries, len;
1282     while (n > 1) {
1283       // invariant: sc[x] <= code < sc[x+n]
1284       int m = x+(n>>1);
1285       if (c.sorted_codewords[m] <= code) {
1286         x = m;
1287         n -= (n>>1);
1288       } else {
1289         n >>= 1;
1290       }
1291     }
1292     // x is now the sorted index
1293     if (!c.sparse) x = c.sorted_values[x];
1294     // x is now sorted index if sparse, or symbol otherwise
1295     len = c.codeword_lengths[x];
1296     if (f.valid_bits >= len) {
1297       f.acc >>= len;
1298       f.valid_bits -= len;
1299       return x;
1300     }
1301     f.valid_bits = 0;
1302     return -1;
1303   }
1304   // if small, linear search
1305   debug(stb_vorbis) assert(!c.sparse);
1306   foreach (uint i; 0..c.entries) {
1307     if (c.codeword_lengths[i] == NO_CODE) continue;
1308     if (c.codewords[i] == (f.acc&((1<<c.codeword_lengths[i])-1))) {
1309       if (f.valid_bits >= c.codeword_lengths[i]) {
1310         f.acc >>= c.codeword_lengths[i];
1311         f.valid_bits -= c.codeword_lengths[i];
1312         return i;
1313       }
1314       f.valid_bits = 0;
1315       return -1;
1316     }
1317   }
1318   error(f, STBVorbisError.invalid_stream);
1319   f.valid_bits = 0;
1320   return -1;
1321 }
1322 
1323 
1324 template DECODE_RAW(string var, string c) {
1325   enum DECODE_RAW = q{
1326     if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) { mixin(PrepHuffmanMixin); }
1327     // fast huffman table lookup
1328     ${i} = f.acc&FAST_HUFFMAN_TABLE_MASK;
1329     ${i} = ${c}.fast_huffman.ptr[${i}];
1330     if (${i} >= 0) {
1331       auto ${__temp_prefix__}n = ${c}.codeword_lengths[${i}];
1332       f.acc >>= ${__temp_prefix__}n;
1333       f.valid_bits -= ${__temp_prefix__}n;
1334       if (f.valid_bits < 0) { f.valid_bits = 0; ${i} = -1; }
1335     } else {
1336       ${i} = codebook_decode_scalar_raw(f, ${c});
1337     }
1338   }.cmacroFixVars!("i", "c")(var, c);
1339 }
1340 
1341 enum DECODE(string var, string c) = q{
1342   ${DECODE_RAW}
1343   if (${c}.sparse) ${var} = ${c}.sorted_values[${var}];
1344 }.cmacroFixVars!("var", "c", "DECODE_RAW")(var, c, DECODE_RAW!(var, c));
1345 
1346 
1347 version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1348   alias DECODE_VQ = DECODE;
1349 } else {
1350   alias DECODE_VQ = DECODE_RAW;
1351 }
1352 
1353 
1354 
1355 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1356 // where we avoid one addition
1357 enum CODEBOOK_ELEMENT(string c, string off) = "("~c~".multiplicands["~off~"])";
1358 enum CODEBOOK_ELEMENT_FAST(string c, string off) = "("~c~".multiplicands["~off~"])";
1359 enum CODEBOOK_ELEMENT_BASE(string c) = "(0)";
1360 
1361 
1362 private int codebook_decode_start (VorbisDecoder f, Codebook* c) {
1363   int z = -1;
1364   // type 0 is only legal in a scalar context
1365   if (c.lookup_type == 0) {
1366     error(f, STBVorbisError.invalid_stream);
1367   } else {
1368     mixin(DECODE_VQ!("z", "c"));
1369     debug(stb_vorbis) if (c.sparse) assert(z < c.sorted_entries);
1370     if (z < 0) {  // check for EOP
1371       if (!f.bytes_in_seg && f.last_seg) return z;
1372       error(f, STBVorbisError.invalid_stream);
1373     }
1374   }
1375   return z;
1376 }
1377 
1378 private int codebook_decode (VorbisDecoder f, Codebook* c, float* output, int len) {
1379   int z = codebook_decode_start(f, c);
1380   if (z < 0) return false;
1381   if (len > c.dimensions) len = c.dimensions;
1382 
1383   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1384     if (c.lookup_type == 1) {
1385       float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1386       int div = 1;
1387       foreach (immutable i; 0..len) {
1388         int off = (z/div)%c.lookup_values;
1389         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1390         output[i] += val;
1391         if (c.sequence_p) last = val+c.minimum_value;
1392         div *= c.lookup_values;
1393       }
1394       return true;
1395     }
1396   }
1397 
1398   z *= c.dimensions;
1399   if (c.sequence_p) {
1400     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1401     foreach (immutable i; 0..len) {
1402       float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1403       output[i] += val;
1404       last = val+c.minimum_value;
1405     }
1406   } else {
1407     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1408     foreach (immutable i; 0..len) output[i] += mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1409   }
1410 
1411   return true;
1412 }
1413 
1414 private int codebook_decode_step (VorbisDecoder f, Codebook* c, float* output, int len, int step) {
1415   int z = codebook_decode_start(f, c);
1416   float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1417   if (z < 0) return false;
1418   if (len > c.dimensions) len = c.dimensions;
1419 
1420   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1421     if (c.lookup_type == 1) {
1422       int div = 1;
1423       foreach (immutable i; 0..len) {
1424         int off = (z/div)%c.lookup_values;
1425         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1426         output[i*step] += val;
1427         if (c.sequence_p) last = val;
1428         div *= c.lookup_values;
1429       }
1430       return true;
1431     }
1432   }
1433 
1434   z *= c.dimensions;
1435   foreach (immutable i; 0..len) {
1436     float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1437     output[i*step] += val;
1438     if (c.sequence_p) last = val;
1439   }
1440 
1441   return true;
1442 }
1443 
1444 private int codebook_decode_deinterleave_repeat (VorbisDecoder f, Codebook* c, ref float*[STB_VORBIS_MAX_CHANNELS] outputs, int ch, int* c_inter_p, int* p_inter_p, int len, int total_decode) {
1445   int c_inter = *c_inter_p;
1446   int p_inter = *p_inter_p;
1447   int z, effective = c.dimensions;
1448 
1449   // type 0 is only legal in a scalar context
1450   if (c.lookup_type == 0) return error(f, STBVorbisError.invalid_stream);
1451 
1452   while (total_decode > 0) {
1453     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1454     mixin(DECODE_VQ!("z", "c"));
1455     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
1456       debug(stb_vorbis) assert(!c.sparse || z < c.sorted_entries);
1457     }
1458     if (z < 0) {
1459       if (!f.bytes_in_seg && f.last_seg) return false;
1460       return error(f, STBVorbisError.invalid_stream);
1461     }
1462 
1463     // if this will take us off the end of the buffers, stop short!
1464     // we check by computing the length of the virtual interleaved
1465     // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1466     // and the length we'll be using (effective)
1467     if (c_inter+p_inter*ch+effective > len*ch) effective = len*ch-(p_inter*ch-c_inter);
1468 
1469     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1470       if (c.lookup_type == 1) {
1471         int div = 1;
1472         foreach (immutable i; 0..effective) {
1473           int off = (z/div)%c.lookup_values;
1474           float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1475           if (outputs.ptr[c_inter]) outputs.ptr[c_inter].ptr[p_inter] += val;
1476           if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1477           if (c.sequence_p) last = val;
1478           div *= c.lookup_values;
1479         }
1480         goto skipit;
1481       }
1482     }
1483     z *= c.dimensions;
1484     if (c.sequence_p) {
1485       foreach (immutable i; 0..effective) {
1486         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1487         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1488         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1489         last = val;
1490       }
1491     } else {
1492       foreach (immutable i; 0..effective) {
1493         float val = mixin(CODEBOOK_ELEMENT_FAST!("c","z+i"))+last;
1494         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1495         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1496       }
1497     }
1498    skipit:
1499     total_decode -= effective;
1500   }
1501   *c_inter_p = c_inter;
1502   *p_inter_p = p_inter;
1503   return true;
1504 }
1505 
1506 //private int predict_point (int x, int x0, int x1, int y0, int y1)
1507 enum predict_point(string dest, string x, string x0, string x1, string y0, string y1) = q{{
1508   //import std.math : abs;
1509   int dy = ${y1}-${y0};
1510   int adx = ${x1}-${x0};
1511   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1512   int err = /*abs(dy)*/(dy < 0 ? -dy : dy)*(${x}-${x0});
1513   int off = err/adx;
1514   /*return*/${dest} = (dy < 0 ? ${y0}-off : ${y0}+off);
1515 }}.cmacroFixVars!("dest", "x", "x0", "x1", "y0", "y1")(dest, x, x0, x1, y0, y1);
1516 
1517 // the following table is block-copied from the specification
1518 immutable float[256] inverse_db_table = [
1519   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1520   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1521   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1522   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1523   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1524   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1525   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1526   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1527   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1528   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1529   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1530   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1531   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1532   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1533   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1534   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1535   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1536   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1537   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1538   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1539   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1540   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1541   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1542   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1543   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1544   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1545   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1546   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1547   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1548   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1549   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1550   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1551   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1552   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1553   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1554   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1555   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1556   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1557   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1558   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1559   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1560   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1561   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1562   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1563   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1564   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1565   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1566   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1567   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1568   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1569   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1570   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1571   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1572   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1573   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1574   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1575   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1576   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1577   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1578   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1579   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1580   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1581   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1582   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1583 ];
1584 
1585 
1586 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1587 // note that you must produce bit-identical output to decode correctly;
1588 // this specific sequence of operations is specified in the spec (it's
1589 // drawing integer-quantized frequency-space lines that the encoder
1590 // expects to be exactly the same)
1591 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1592 // have to divide in the setup? sigh.
1593 version(STB_VORBIS_NO_DEFER_FLOOR) {
1594   enum LINE_OP(string a, string b) = a~" = "~b~";";
1595 } else {
1596   enum LINE_OP(string a, string b) = a~" *= "~b~";";
1597 }
1598 
1599 version(STB_VORBIS_DIVIDE_TABLE) {
1600   enum DIVTAB_NUMER = 32;
1601   enum DIVTAB_DENOM = 64;
1602   byte[DIVTAB_DENOM][DIVTAB_NUMER] integer_divide_table; // 2KB
1603 }
1604 
1605 // nobranch abs trick
1606 enum ABS(string v) = q{(((${v})+((${v})>>31))^((${v})>>31))}.cmacroFixVars!"v"(v);
1607 
1608 // this is forceinline, but dmd inliner sux
1609 // but hey, i have my k00l macrosystem!
1610 //void draw_line (float* ${output}, int ${x0}, int ${y0}, int ${x1}, int ${y1}, int ${n})
1611 enum draw_line(string output, string x0, string y0, string x1, string y1, string n) = q{{
1612   int ${__temp_prefix__}dy = ${y1}-${y0};
1613   int ${__temp_prefix__}adx = ${x1}-${x0};
1614   int ${__temp_prefix__}ady = mixin(ABS!"${__temp_prefix__}dy");
1615   int ${__temp_prefix__}base;
1616   int ${__temp_prefix__}x = ${x0}, ${__temp_prefix__}y = ${y0};
1617   int ${__temp_prefix__}err = 0;
1618   int ${__temp_prefix__}sy;
1619 
1620   version(STB_VORBIS_DIVIDE_TABLE) {
1621     if (${__temp_prefix__}adx < DIVTAB_DENOM && ${__temp_prefix__}ady < DIVTAB_NUMER) {
1622       if (${__temp_prefix__}dy < 0) {
1623         ${__temp_prefix__}base = -integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1624         ${__temp_prefix__}sy = ${__temp_prefix__}base-1;
1625       } else {
1626         ${__temp_prefix__}base = integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1627         ${__temp_prefix__}sy = ${__temp_prefix__}base+1;
1628       }
1629     } else {
1630       ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1631       ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1632     }
1633   } else {
1634     ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1635     ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1636   }
1637   ${__temp_prefix__}ady -= mixin(ABS!"${__temp_prefix__}base")*${__temp_prefix__}adx;
1638   if (${x1} > ${n}) ${x1} = ${n};
1639   if (${__temp_prefix__}x < ${x1}) {
1640     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1641     for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1642       ${__temp_prefix__}err += ${__temp_prefix__}ady;
1643       if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1644         ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1645         ${__temp_prefix__}y += ${__temp_prefix__}sy;
1646       } else {
1647         ${__temp_prefix__}y += ${__temp_prefix__}base;
1648       }
1649       mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1650     }
1651   }
1652   /*
1653   mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1654   for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1655     ${__temp_prefix__}err += ${__temp_prefix__}ady;
1656     if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1657       ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1658       ${__temp_prefix__}y += ${__temp_prefix__}sy;
1659     } else {
1660       ${__temp_prefix__}y += ${__temp_prefix__}base;
1661     }
1662     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1663   }
1664   */
1665 }}.cmacroFixVars!("output", "x0", "y0", "x1", "y1", "n")(output, x0, y0, x1, y1, n);
1666 
1667 private int residue_decode (VorbisDecoder f, Codebook* book, float* target, int offset, int n, int rtype) {
1668   if (rtype == 0) {
1669     int step = n/book.dimensions;
1670     foreach (immutable k; 0..step) if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) return false;
1671   } else {
1672     for (int k = 0; k < n; ) {
1673       if (!codebook_decode(f, book, target+offset, n-k)) return false;
1674       k += book.dimensions;
1675       offset += book.dimensions;
1676     }
1677   }
1678   return true;
1679 }
1680 
1681 private void decode_residue (VorbisDecoder f, ref float*[STB_VORBIS_MAX_CHANNELS] residue_buffers, int ch, int n, int rn, ubyte* do_not_decode) {
1682   import core.stdc.stdlib : alloca;
1683   import core.stdc.string : memset;
1684 
1685   Residue* r = f.residue_config+rn;
1686   int rtype = f.residue_types.ptr[rn];
1687   int c = r.classbook;
1688   int classwords = f.codebooks[c].dimensions;
1689   int n_read = r.end-r.begin;
1690   int part_read = n_read/r.part_size;
1691   uint temp_alloc_point = temp_alloc_save(f);
1692   version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1693     int** classifications = cast(int**)mixin(temp_block_array!("f.vrchannels", "part_read*int.sizeof"));
1694   } else {
1695     ubyte*** part_classdata = cast(ubyte***)mixin(temp_block_array!("f.vrchannels", "part_read*cast(int)(ubyte*).sizeof"));
1696   }
1697 
1698   //stb_prof(2);
1699   foreach (immutable i; 0..ch) if (!do_not_decode[i]) memset(residue_buffers.ptr[i], 0, float.sizeof*n);
1700 
1701   if (rtype == 2 && ch != 1) {
1702     int j = void;
1703     for (j = 0; j < ch; ++j) if (!do_not_decode[j]) break;
1704     if (j == ch) goto done;
1705 
1706     //stb_prof(3);
1707     foreach (immutable pass; 0..8) {
1708       int pcount = 0, class_set = 0;
1709       if (ch == 2) {
1710         //stb_prof(13);
1711         while (pcount < part_read) {
1712           int z = r.begin+pcount*r.part_size;
1713           int c_inter = (z&1), p_inter = z>>1;
1714           if (pass == 0) {
1715             Codebook *cc = f.codebooks+r.classbook;
1716             int q;
1717             mixin(DECODE!("q", "cc"));
1718             if (q == EOP) goto done;
1719             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1720               for (int i = classwords-1; i >= 0; --i) {
1721                 classifications[0].ptr[i+pcount] = q%r.classifications;
1722                 q /= r.classifications;
1723               }
1724             } else {
1725               part_classdata[0][class_set] = r.classdata[q];
1726             }
1727           }
1728           //stb_prof(5);
1729           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1730             int zz = r.begin+pcount*r.part_size;
1731             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1732               int cc = classifications[0].ptr[pcount];
1733             } else {
1734               int cc = part_classdata[0][class_set][i];
1735             }
1736             int b = r.residue_books[cc].ptr[pass];
1737             if (b >= 0) {
1738               Codebook* book = f.codebooks+b;
1739               //stb_prof(20); // accounts for X time
1740               version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1741                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1742               } else {
1743                 // saves 1%
1744                 //if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r.part_size)) goto done; // according to C source
1745                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1746               }
1747               //stb_prof(7);
1748             } else {
1749               zz += r.part_size;
1750               c_inter = zz&1;
1751               p_inter = zz>>1;
1752             }
1753           }
1754           //stb_prof(8);
1755           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1756             ++class_set;
1757           }
1758         }
1759       } else if (ch == 1) {
1760         while (pcount < part_read) {
1761           int z = r.begin+pcount*r.part_size;
1762           int c_inter = 0, p_inter = z;
1763           if (pass == 0) {
1764             Codebook* cc = f.codebooks+r.classbook;
1765             int q;
1766             mixin(DECODE!("q", "cc"));
1767             if (q == EOP) goto done;
1768             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1769               for (int i = classwords-1; i >= 0; --i) {
1770                 classifications[0].ptr[i+pcount] = q%r.classifications;
1771                 q /= r.classifications;
1772               }
1773             } else {
1774               part_classdata[0][class_set] = r.classdata[q];
1775             }
1776           }
1777           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1778             int zz = r.begin+pcount*r.part_size;
1779             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1780               int cc = classifications[0].ptr[pcount];
1781             } else {
1782               int cc = part_classdata[0][class_set][i];
1783             }
1784             int b = r.residue_books[cc].ptr[pass];
1785             if (b >= 0) {
1786               Codebook* book = f.codebooks+b;
1787               //stb_prof(22);
1788               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1789               //stb_prof(3);
1790             } else {
1791               zz += r.part_size;
1792               c_inter = 0;
1793               p_inter = zz;
1794             }
1795           }
1796           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1797             ++class_set;
1798           }
1799         }
1800       } else {
1801         while (pcount < part_read) {
1802           int z = r.begin+pcount*r.part_size;
1803           int c_inter = z%ch, p_inter = z/ch;
1804           if (pass == 0) {
1805             Codebook* cc = f.codebooks+r.classbook;
1806             int q;
1807             mixin(DECODE!("q", "cc"));
1808             if (q == EOP) goto done;
1809             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1810               for (int i = classwords-1; i >= 0; --i) {
1811                 classifications[0].ptr[i+pcount] = q%r.classifications;
1812                 q /= r.classifications;
1813               }
1814             } else {
1815               part_classdata[0][class_set] = r.classdata[q];
1816             }
1817           }
1818           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1819             int zz = r.begin+pcount*r.part_size;
1820             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1821               int cc = classifications[0].ptr[pcount];
1822             } else {
1823               int cc = part_classdata[0][class_set][i];
1824             }
1825             int b = r.residue_books[cc].ptr[pass];
1826             if (b >= 0) {
1827               Codebook* book = f.codebooks+b;
1828               //stb_prof(22);
1829               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1830               //stb_prof(3);
1831             } else {
1832               zz += r.part_size;
1833               c_inter = zz%ch;
1834               p_inter = zz/ch;
1835             }
1836           }
1837           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1838             ++class_set;
1839           }
1840         }
1841       }
1842     }
1843     goto done;
1844   }
1845   //stb_prof(9);
1846 
1847   foreach (immutable pass; 0..8) {
1848     int pcount = 0, class_set=0;
1849     while (pcount < part_read) {
1850       if (pass == 0) {
1851         foreach (immutable j; 0..ch) {
1852           if (!do_not_decode[j]) {
1853             Codebook* cc = f.codebooks+r.classbook;
1854             int temp;
1855             mixin(DECODE!("temp", "cc"));
1856             if (temp == EOP) goto done;
1857             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1858               for (int i = classwords-1; i >= 0; --i) {
1859                 classifications[j].ptr[i+pcount] = temp%r.classifications;
1860                 temp /= r.classifications;
1861               }
1862             } else {
1863               part_classdata[j][class_set] = r.classdata[temp];
1864             }
1865           }
1866         }
1867       }
1868       for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1869         foreach (immutable j; 0..ch) {
1870           if (!do_not_decode[j]) {
1871             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1872               int cc = classifications[j].ptr[pcount];
1873             } else {
1874               int cc = part_classdata[j][class_set][i];
1875             }
1876             int b = r.residue_books[cc].ptr[pass];
1877             if (b >= 0) {
1878               float* target = residue_buffers.ptr[j];
1879               int offset = r.begin+pcount*r.part_size;
1880               int nn = r.part_size;
1881               Codebook* book = f.codebooks+b;
1882               if (!residue_decode(f, book, target, offset, nn, rtype)) goto done;
1883             }
1884           }
1885         }
1886       }
1887       version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1888         ++class_set;
1889       }
1890     }
1891   }
1892  done:
1893   //stb_prof(0);
1894   version(STB_VORBIS_DIVIDES_IN_RESIDUE) temp_free(f, classifications); else temp_free(f, part_classdata);
1895   temp_alloc_restore(f, temp_alloc_point);
1896 }
1897 
1898 
1899 // the following were split out into separate functions while optimizing;
1900 // they could be pushed back up but eh. __forceinline showed no change;
1901 // they're probably already being inlined.
1902 private void imdct_step3_iter0_loop (int n, float* e, int i_off, int k_off, float* A) {
1903   float* ee0 = e+i_off;
1904   float* ee2 = ee0+k_off;
1905   debug(stb_vorbis) assert((n&3) == 0);
1906   foreach (immutable _; 0..n>>2) {
1907     float k00_20, k01_21;
1908     k00_20 = ee0[ 0]-ee2[ 0];
1909     k01_21 = ee0[-1]-ee2[-1];
1910     ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0]+ee2[ 0];
1911     ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1]+ee2[-1];
1912     ee2[ 0] = k00_20*A[0]-k01_21*A[1];
1913     ee2[-1] = k01_21*A[0]+k00_20*A[1];
1914     A += 8;
1915 
1916     k00_20 = ee0[-2]-ee2[-2];
1917     k01_21 = ee0[-3]-ee2[-3];
1918     ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2]+ee2[-2];
1919     ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3]+ee2[-3];
1920     ee2[-2] = k00_20*A[0]-k01_21*A[1];
1921     ee2[-3] = k01_21*A[0]+k00_20*A[1];
1922     A += 8;
1923 
1924     k00_20 = ee0[-4]-ee2[-4];
1925     k01_21 = ee0[-5]-ee2[-5];
1926     ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4]+ee2[-4];
1927     ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5]+ee2[-5];
1928     ee2[-4] = k00_20*A[0]-k01_21*A[1];
1929     ee2[-5] = k01_21*A[0]+k00_20*A[1];
1930     A += 8;
1931 
1932     k00_20 = ee0[-6]-ee2[-6];
1933     k01_21 = ee0[-7]-ee2[-7];
1934     ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6]+ee2[-6];
1935     ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7]+ee2[-7];
1936     ee2[-6] = k00_20*A[0]-k01_21*A[1];
1937     ee2[-7] = k01_21*A[0]+k00_20*A[1];
1938     A += 8;
1939     ee0 -= 8;
1940     ee2 -= 8;
1941   }
1942 }
1943 
1944 private void imdct_step3_inner_r_loop (int lim, float* e, int d0, int k_off, float* A, int k1) {
1945   float k00_20, k01_21;
1946   float* e0 = e+d0;
1947   float* e2 = e0+k_off;
1948   foreach (immutable _; 0..lim>>2) {
1949     k00_20 = e0[-0]-e2[-0];
1950     k01_21 = e0[-1]-e2[-1];
1951     e0[-0] += e2[-0];//e0[-0] = e0[-0]+e2[-0];
1952     e0[-1] += e2[-1];//e0[-1] = e0[-1]+e2[-1];
1953     e2[-0] = (k00_20)*A[0]-(k01_21)*A[1];
1954     e2[-1] = (k01_21)*A[0]+(k00_20)*A[1];
1955 
1956     A += k1;
1957 
1958     k00_20 = e0[-2]-e2[-2];
1959     k01_21 = e0[-3]-e2[-3];
1960     e0[-2] += e2[-2];//e0[-2] = e0[-2]+e2[-2];
1961     e0[-3] += e2[-3];//e0[-3] = e0[-3]+e2[-3];
1962     e2[-2] = (k00_20)*A[0]-(k01_21)*A[1];
1963     e2[-3] = (k01_21)*A[0]+(k00_20)*A[1];
1964 
1965     A += k1;
1966 
1967     k00_20 = e0[-4]-e2[-4];
1968     k01_21 = e0[-5]-e2[-5];
1969     e0[-4] += e2[-4];//e0[-4] = e0[-4]+e2[-4];
1970     e0[-5] += e2[-5];//e0[-5] = e0[-5]+e2[-5];
1971     e2[-4] = (k00_20)*A[0]-(k01_21)*A[1];
1972     e2[-5] = (k01_21)*A[0]+(k00_20)*A[1];
1973 
1974     A += k1;
1975 
1976     k00_20 = e0[-6]-e2[-6];
1977     k01_21 = e0[-7]-e2[-7];
1978     e0[-6] += e2[-6];//e0[-6] = e0[-6]+e2[-6];
1979     e0[-7] += e2[-7];//e0[-7] = e0[-7]+e2[-7];
1980     e2[-6] = (k00_20)*A[0]-(k01_21)*A[1];
1981     e2[-7] = (k01_21)*A[0]+(k00_20)*A[1];
1982 
1983     e0 -= 8;
1984     e2 -= 8;
1985 
1986     A += k1;
1987   }
1988 }
1989 
1990 private void imdct_step3_inner_s_loop (int n, float* e, int i_off, int k_off, float* A, int a_off, int k0) {
1991   float A0 = A[0];
1992   float A1 = A[0+1];
1993   float A2 = A[0+a_off];
1994   float A3 = A[0+a_off+1];
1995   float A4 = A[0+a_off*2+0];
1996   float A5 = A[0+a_off*2+1];
1997   float A6 = A[0+a_off*3+0];
1998   float A7 = A[0+a_off*3+1];
1999   float k00, k11;
2000   float *ee0 = e  +i_off;
2001   float *ee2 = ee0+k_off;
2002   foreach (immutable _; 0..n) {
2003     k00 = ee0[ 0]-ee2[ 0];
2004     k11 = ee0[-1]-ee2[-1];
2005     ee0[ 0] = ee0[ 0]+ee2[ 0];
2006     ee0[-1] = ee0[-1]+ee2[-1];
2007     ee2[ 0] = (k00)*A0-(k11)*A1;
2008     ee2[-1] = (k11)*A0+(k00)*A1;
2009 
2010     k00 = ee0[-2]-ee2[-2];
2011     k11 = ee0[-3]-ee2[-3];
2012     ee0[-2] = ee0[-2]+ee2[-2];
2013     ee0[-3] = ee0[-3]+ee2[-3];
2014     ee2[-2] = (k00)*A2-(k11)*A3;
2015     ee2[-3] = (k11)*A2+(k00)*A3;
2016 
2017     k00 = ee0[-4]-ee2[-4];
2018     k11 = ee0[-5]-ee2[-5];
2019     ee0[-4] = ee0[-4]+ee2[-4];
2020     ee0[-5] = ee0[-5]+ee2[-5];
2021     ee2[-4] = (k00)*A4-(k11)*A5;
2022     ee2[-5] = (k11)*A4+(k00)*A5;
2023 
2024     k00 = ee0[-6]-ee2[-6];
2025     k11 = ee0[-7]-ee2[-7];
2026     ee0[-6] = ee0[-6]+ee2[-6];
2027     ee0[-7] = ee0[-7]+ee2[-7];
2028     ee2[-6] = (k00)*A6-(k11)*A7;
2029     ee2[-7] = (k11)*A6+(k00)*A7;
2030 
2031     ee0 -= k0;
2032     ee2 -= k0;
2033   }
2034 }
2035 
2036 // this was forceinline
2037 //void iter_54(float *z)
2038 enum iter_54(string z) = q{{
2039   auto ${__temp_prefix__}z = (${z});
2040   float ${__temp_prefix__}k00, ${__temp_prefix__}k11, ${__temp_prefix__}k22, ${__temp_prefix__}k33;
2041   float ${__temp_prefix__}y0, ${__temp_prefix__}y1, ${__temp_prefix__}y2, ${__temp_prefix__}y3;
2042 
2043   ${__temp_prefix__}k00 = ${__temp_prefix__}z[ 0]-${__temp_prefix__}z[-4];
2044   ${__temp_prefix__}y0  = ${__temp_prefix__}z[ 0]+${__temp_prefix__}z[-4];
2045   ${__temp_prefix__}y2  = ${__temp_prefix__}z[-2]+${__temp_prefix__}z[-6];
2046   ${__temp_prefix__}k22 = ${__temp_prefix__}z[-2]-${__temp_prefix__}z[-6];
2047 
2048   ${__temp_prefix__}z[-0] = ${__temp_prefix__}y0+${__temp_prefix__}y2;   // z0+z4+z2+z6
2049   ${__temp_prefix__}z[-2] = ${__temp_prefix__}y0-${__temp_prefix__}y2;   // z0+z4-z2-z6
2050 
2051   // done with ${__temp_prefix__}y0, ${__temp_prefix__}y2
2052 
2053   ${__temp_prefix__}k33 = ${__temp_prefix__}z[-3]-${__temp_prefix__}z[-7];
2054 
2055   ${__temp_prefix__}z[-4] = ${__temp_prefix__}k00+${__temp_prefix__}k33; // z0-z4+z3-z7
2056   ${__temp_prefix__}z[-6] = ${__temp_prefix__}k00-${__temp_prefix__}k33; // z0-z4-z3+z7
2057 
2058   // done with ${__temp_prefix__}k33
2059 
2060   ${__temp_prefix__}k11 = ${__temp_prefix__}z[-1]-${__temp_prefix__}z[-5];
2061   ${__temp_prefix__}y1  = ${__temp_prefix__}z[-1]+${__temp_prefix__}z[-5];
2062   ${__temp_prefix__}y3  = ${__temp_prefix__}z[-3]+${__temp_prefix__}z[-7];
2063 
2064   ${__temp_prefix__}z[-1] = ${__temp_prefix__}y1+${__temp_prefix__}y3;   // z1+z5+z3+z7
2065   ${__temp_prefix__}z[-3] = ${__temp_prefix__}y1-${__temp_prefix__}y3;   // z1+z5-z3-z7
2066   ${__temp_prefix__}z[-5] = ${__temp_prefix__}k11-${__temp_prefix__}k22; // z1-z5+z2-z6
2067   ${__temp_prefix__}z[-7] = ${__temp_prefix__}k11+${__temp_prefix__}k22; // z1-z5-z2+z6
2068 }}.cmacroFixVars!"z"(z);
2069 
2070 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2071 {
2072     int a_off = base_n >> 3;
2073     float A2 = A[0+a_off];
2074     float *z = e + i_off;
2075     float *base = z - 16 * n;
2076 
2077     while (z > base) {
2078         float k00,k11;
2079         float l00,l11;
2080 
2081         k00    = z[-0] - z[ -8];
2082         k11    = z[-1] - z[ -9];
2083         l00    = z[-2] - z[-10];
2084         l11    = z[-3] - z[-11];
2085         z[ -0] = z[-0] + z[ -8];
2086         z[ -1] = z[-1] + z[ -9];
2087         z[ -2] = z[-2] + z[-10];
2088         z[ -3] = z[-3] + z[-11];
2089         z[ -8] = k00;
2090         z[ -9] = k11;
2091         z[-10] = (l00+l11) * A2;
2092         z[-11] = (l11-l00) * A2;
2093 
2094         k00    = z[ -4] - z[-12];
2095         k11    = z[ -5] - z[-13];
2096         l00    = z[ -6] - z[-14];
2097         l11    = z[ -7] - z[-15];
2098         z[ -4] = z[ -4] + z[-12];
2099         z[ -5] = z[ -5] + z[-13];
2100         z[ -6] = z[ -6] + z[-14];
2101         z[ -7] = z[ -7] + z[-15];
2102         z[-12] = k11;
2103         z[-13] = -k00;
2104         z[-14] = (l11-l00) * A2;
2105         z[-15] = (l00+l11) * -A2;
2106 
2107         mixin(iter_54!"z");
2108         mixin(iter_54!"z-8");
2109         z -= 16;
2110     }
2111 }
2112 
2113 private void inverse_mdct (float* buffer, int n, VorbisDecoder f, int blocktype) {
2114   import core.stdc.stdlib : alloca;
2115 
2116   int n2 = n>>1, n4 = n>>2, n8 = n>>3, l;
2117   int ld;
2118   // @OPTIMIZE: reduce register pressure by using fewer variables?
2119   int save_point = temp_alloc_save(f);
2120   float *buf2;
2121   buf2 = cast(float*)mixin(temp_alloc!("n2*float.sizeof"));
2122   float *u = null, v = null;
2123   // twiddle factors
2124   float *A = f.A.ptr[blocktype];
2125 
2126   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2127   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2128 
2129   // kernel from paper
2130 
2131 
2132   // merged:
2133   //   copy and reflect spectral data
2134   //   step 0
2135 
2136   // note that it turns out that the items added together during
2137   // this step are, in fact, being added to themselves (as reflected
2138   // by step 0). inexplicable inefficiency! this became obvious
2139   // once I combined the passes.
2140 
2141   // so there's a missing 'times 2' here (for adding X to itself).
2142   // this propogates through linearly to the end, where the numbers
2143   // are 1/2 too small, and need to be compensated for.
2144 
2145   {
2146     float* d, e, AA, e_stop;
2147     d = &buf2[n2-2];
2148     AA = A;
2149     e = &buffer[0];
2150     e_stop = &buffer[n2];
2151     while (e != e_stop) {
2152       d[1] = (e[0]*AA[0]-e[2]*AA[1]);
2153       d[0] = (e[0]*AA[1]+e[2]*AA[0]);
2154       d -= 2;
2155       AA += 2;
2156       e += 4;
2157     }
2158     e = &buffer[n2-3];
2159     while (d >= buf2) {
2160       d[1] = (-e[2]*AA[0]- -e[0]*AA[1]);
2161       d[0] = (-e[2]*AA[1]+ -e[0]*AA[0]);
2162       d -= 2;
2163       AA += 2;
2164       e -= 4;
2165     }
2166   }
2167 
2168   // now we use symbolic names for these, so that we can
2169   // possibly swap their meaning as we change which operations
2170   // are in place
2171 
2172   u = buffer;
2173   v = buf2;
2174 
2175   // step 2    (paper output is w, now u)
2176   // this could be in place, but the data ends up in the wrong
2177   // place... _somebody_'s got to swap it, so this is nominated
2178   {
2179     float* AA = &A[n2-8];
2180     float* d0, d1, e0, e1;
2181     e0 = &v[n4];
2182     e1 = &v[0];
2183     d0 = &u[n4];
2184     d1 = &u[0];
2185     while (AA >= A) {
2186       float v40_20, v41_21;
2187 
2188       v41_21 = e0[1]-e1[1];
2189       v40_20 = e0[0]-e1[0];
2190       d0[1]  = e0[1]+e1[1];
2191       d0[0]  = e0[0]+e1[0];
2192       d1[1]  = v41_21*AA[4]-v40_20*AA[5];
2193       d1[0]  = v40_20*AA[4]+v41_21*AA[5];
2194 
2195       v41_21 = e0[3]-e1[3];
2196       v40_20 = e0[2]-e1[2];
2197       d0[3]  = e0[3]+e1[3];
2198       d0[2]  = e0[2]+e1[2];
2199       d1[3]  = v41_21*AA[0]-v40_20*AA[1];
2200       d1[2]  = v40_20*AA[0]+v41_21*AA[1];
2201 
2202       AA -= 8;
2203 
2204       d0 += 4;
2205       d1 += 4;
2206       e0 += 4;
2207       e1 += 4;
2208     }
2209   }
2210 
2211   // step 3
2212   ld = ilog(n)-1; // ilog is off-by-one from normal definitions
2213 
2214   // optimized step 3:
2215 
2216   // the original step3 loop can be nested r inside s or s inside r;
2217   // it's written originally as s inside r, but this is dumb when r
2218   // iterates many times, and s few. So I have two copies of it and
2219   // switch between them halfway.
2220 
2221   // this is iteration 0 of step 3
2222   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*0, -(n>>3), A);
2223   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*1, -(n>>3), A);
2224 
2225   // this is iteration 1 of step 3
2226   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*0, -(n>>4), A, 16);
2227   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*1, -(n>>4), A, 16);
2228   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*2, -(n>>4), A, 16);
2229   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*3, -(n>>4), A, 16);
2230 
2231   l = 2;
2232   for (; l < (ld-3)>>1; ++l) {
2233     int k0 = n>>(l+2), k0_2 = k0>>1;
2234     int lim = 1<<(l+1);
2235     foreach (int i; 0..lim) imdct_step3_inner_r_loop(n>>(l+4), u, n2-1-k0*i, -k0_2, A, 1<<(l+3));
2236   }
2237 
2238   for (; l < ld-6; ++l) {
2239     int k0 = n>>(l+2), k1 = 1<<(l+3), k0_2 = k0>>1;
2240     int rlim = n>>(l+6);
2241     int lim = 1<<(l+1);
2242     int i_off;
2243     float *A0 = A;
2244     i_off = n2-1;
2245     foreach (immutable _; 0..rlim) {
2246       imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2247       A0 += k1*4;
2248       i_off -= 8;
2249     }
2250   }
2251 
2252   // iterations with count:
2253   //   ld-6,-5,-4 all interleaved together
2254   //       the big win comes from getting rid of needless flops
2255   //         due to the constants on pass 5 & 4 being all 1 and 0;
2256   //       combining them to be simultaneous to improve cache made little difference
2257   imdct_step3_inner_s_loop_ld654(n>>5, u, n2-1, A, n);
2258 
2259   // output is u
2260 
2261   // step 4, 5, and 6
2262   // cannot be in-place because of step 5
2263   {
2264     ushort *bitrev = f.bit_reverse.ptr[blocktype];
2265     // weirdly, I'd have thought reading sequentially and writing
2266     // erratically would have been better than vice-versa, but in
2267     // fact that's not what my testing showed. (That is, with
2268     // j = bitreverse(i), do you read i and write j, or read j and write i.)
2269     float *d0 = &v[n4-4];
2270     float *d1 = &v[n2-4];
2271     int k4;
2272     while (d0 >= v) {
2273       k4 = bitrev[0];
2274       d1[3] = u[k4+0];
2275       d1[2] = u[k4+1];
2276       d0[3] = u[k4+2];
2277       d0[2] = u[k4+3];
2278 
2279       k4 = bitrev[1];
2280       d1[1] = u[k4+0];
2281       d1[0] = u[k4+1];
2282       d0[1] = u[k4+2];
2283       d0[0] = u[k4+3];
2284 
2285       d0 -= 4;
2286       d1 -= 4;
2287       bitrev += 2;
2288     }
2289   }
2290   // (paper output is u, now v)
2291 
2292 
2293   // data must be in buf2
2294   debug(stb_vorbis) assert(v == buf2);
2295 
2296   // step 7   (paper output is v, now v)
2297   // this is now in place
2298   {
2299     float a02, a11, b0, b1, b2, b3;
2300     float* C = f.C.ptr[blocktype];
2301     float* d, e;
2302     d = v;
2303     e = v+n2-4;
2304     while (d < e) {
2305       a02 = d[0]-e[2];
2306       a11 = d[1]+e[3];
2307 
2308       b0 = C[1]*a02+C[0]*a11;
2309       b1 = C[1]*a11-C[0]*a02;
2310 
2311       b2 = d[0]+e[ 2];
2312       b3 = d[1]-e[ 3];
2313 
2314       d[0] = b2+b0;
2315       d[1] = b3+b1;
2316       e[2] = b2-b0;
2317       e[3] = b1-b3;
2318 
2319       a02 = d[2]-e[0];
2320       a11 = d[3]+e[1];
2321 
2322       b0 = C[3]*a02+C[2]*a11;
2323       b1 = C[3]*a11-C[2]*a02;
2324 
2325       b2 = d[2]+e[ 0];
2326       b3 = d[3]-e[ 1];
2327 
2328       d[2] = b2+b0;
2329       d[3] = b3+b1;
2330       e[0] = b2-b0;
2331       e[1] = b1-b3;
2332 
2333       C += 4;
2334       d += 4;
2335       e -= 4;
2336     }
2337   }
2338 
2339   // data must be in buf2
2340 
2341 
2342   // step 8+decode   (paper output is X, now buffer)
2343   // this generates pairs of data a la 8 and pushes them directly through
2344   // the decode kernel (pushing rather than pulling) to avoid having
2345   // to make another pass later
2346 
2347   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2348   {
2349     float p0, p1, p2, p3;
2350     float* d0, d1, d2, d3;
2351     float* B = f.B.ptr[blocktype]+n2-8;
2352     float* e = buf2+n2-8;
2353     d0 = &buffer[0];
2354     d1 = &buffer[n2-4];
2355     d2 = &buffer[n2];
2356     d3 = &buffer[n-4];
2357     while (e >= v) {
2358       p3 =  e[6]*B[7]-e[7]*B[6];
2359       p2 = -e[6]*B[6]-e[7]*B[7];
2360 
2361       d0[0] =   p3;
2362       d1[3] =  -p3;
2363       d2[0] =   p2;
2364       d3[3] =   p2;
2365 
2366       p1 =  e[4]*B[5]-e[5]*B[4];
2367       p0 = -e[4]*B[4]-e[5]*B[5];
2368 
2369       d0[1] =   p1;
2370       d1[2] = - p1;
2371       d2[1] =   p0;
2372       d3[2] =   p0;
2373 
2374       p3 =  e[2]*B[3]-e[3]*B[2];
2375       p2 = -e[2]*B[2]-e[3]*B[3];
2376 
2377       d0[2] =   p3;
2378       d1[1] = - p3;
2379       d2[2] =   p2;
2380       d3[1] =   p2;
2381 
2382       p1 =  e[0]*B[1]-e[1]*B[0];
2383       p0 = -e[0]*B[0]-e[1]*B[1];
2384 
2385       d0[3] =   p1;
2386       d1[0] = - p1;
2387       d2[3] =   p0;
2388       d3[0] =   p0;
2389 
2390       B -= 8;
2391       e -= 8;
2392       d0 += 4;
2393       d2 += 4;
2394       d1 -= 4;
2395       d3 -= 4;
2396     }
2397   }
2398 
2399   temp_free(f, buf2);
2400   temp_alloc_restore(f, save_point);
2401 }
2402 
2403 private float *get_window (VorbisDecoder f, int len) {
2404   len <<= 1;
2405   if (len == f.blocksize_0) return f.window.ptr[0];
2406   if (len == f.blocksize_1) return f.window.ptr[1];
2407   assert(0);
2408 }
2409 
2410 version(STB_VORBIS_NO_DEFER_FLOOR) {
2411   alias YTYPE = int;
2412 } else {
2413   alias YTYPE = short;
2414 }
2415 
2416 private int do_floor (VorbisDecoder f, Mapping* map, int i, int n, float* target, YTYPE* finalY, ubyte* step2_flag) {
2417   int n2 = n>>1;
2418   int s = map.chan[i].mux, floor;
2419   floor = map.submap_floor.ptr[s];
2420   if (f.floor_types.ptr[floor] == 0) {
2421     return error(f, STBVorbisError.invalid_stream);
2422   } else {
2423     Floor1* g = &f.floor_config[floor].floor1;
2424     int lx = 0, ly = finalY[0]*g.floor1_multiplier;
2425     foreach (immutable q; 1..g.values) {
2426       int j = g.sorted_order.ptr[q];
2427       version(STB_VORBIS_NO_DEFER_FLOOR) {
2428         auto cond = step2_flag[j];
2429       } else {
2430         auto cond = (finalY[j] >= 0);
2431       }
2432       if (cond) {
2433         int hy = finalY[j]*g.floor1_multiplier;
2434         int hx = g.Xlist.ptr[j];
2435         if (lx != hx) { mixin(draw_line!("target", "lx", "ly", "hx", "hy", "n2")); }
2436         lx = hx; ly = hy;
2437       }
2438     }
2439     if (lx < n2) {
2440       // optimization of: draw_line(target, lx, ly, n, ly, n2);
2441       foreach (immutable j; lx..n2) { mixin(LINE_OP!("target[j]", "inverse_db_table[ly]")); }
2442     }
2443   }
2444   return true;
2445 }
2446 
2447 // The meaning of "left" and "right"
2448 //
2449 // For a given frame:
2450 //     we compute samples from 0..n
2451 //     window_center is n/2
2452 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2453 //     all of the samples from left_end to right_start can be output without mixing; however,
2454 //        this interval is 0-length except when transitioning between short and long frames
2455 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2456 //        which we don't have, so those get saved in a buffer
2457 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2458 //        has to be the same as frame N+1's left_end-left_start (which they are by
2459 //        construction)
2460 
2461 private int vorbis_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
2462   Mode *m;
2463   int i, n, prev, next, window_center;
2464   f.channel_buffer_start = f.channel_buffer_end = 0;
2465 
2466  retry:
2467   if (f.eof) return false;
2468   if (!maybe_start_packet(f)) return false;
2469   // check packet type
2470   if (get_bits!1(f) != 0) {
2471     /+if (f.push_mode) return error(f, STBVorbisError.bad_packet_type);+/
2472     while (EOP != get8_packet(f)) {}
2473     goto retry;
2474   }
2475 
2476   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2477 
2478   i = get_bits_main(f, ilog(f.mode_count-1));
2479   if (i == EOP) return false;
2480   if (i >= f.mode_count) return false;
2481   *mode = i;
2482   m = f.mode_config.ptr+i;
2483   if (m.blockflag) {
2484     n = f.blocksize_1;
2485     prev = get_bits!1(f);
2486     next = get_bits!1(f);
2487   } else {
2488     prev = next = 0;
2489     n = f.blocksize_0;
2490   }
2491 
2492   // WINDOWING
2493   window_center = n>>1;
2494   if (m.blockflag && !prev) {
2495     *p_left_start = (n-f.blocksize_0)>>2;
2496     *p_left_end   = (n+f.blocksize_0)>>2;
2497   } else {
2498     *p_left_start = 0;
2499     *p_left_end   = window_center;
2500   }
2501   if (m.blockflag && !next) {
2502     *p_right_start = (n*3-f.blocksize_0)>>2;
2503     *p_right_end   = (n*3+f.blocksize_0)>>2;
2504   } else {
2505     *p_right_start = window_center;
2506     *p_right_end   = n;
2507   }
2508   return true;
2509 }
2510 
2511 private int vorbis_decode_packet_rest (VorbisDecoder f, int* len, Mode* m, int left_start, int left_end, int right_start, int right_end, int* p_left) {
2512   import core.stdc.string : memcpy, memset;
2513 
2514   Mapping* map;
2515   int n, n2;
2516   int[256] zero_channel;
2517   int[256] really_zero_channel;
2518 
2519   // WINDOWING
2520   n = f.blocksize.ptr[m.blockflag];
2521   map = &f.mapping[m.mapping];
2522 
2523   // FLOORS
2524   n2 = n>>1;
2525 
2526   //stb_prof(1);
2527   foreach (immutable i; 0..f.vrchannels) {
2528     int s = map.chan[i].mux, floor;
2529     zero_channel[i] = false;
2530     floor = map.submap_floor.ptr[s];
2531     if (f.floor_types.ptr[floor] == 0) {
2532       return error(f, STBVorbisError.invalid_stream);
2533     } else {
2534       Floor1* g = &f.floor_config[floor].floor1;
2535       if (get_bits!1(f)) {
2536         short* finalY;
2537         ubyte[256] step2_flag = void;
2538         immutable int[4] range_list = [ 256, 128, 86, 64 ];
2539         int range = range_list[g.floor1_multiplier-1];
2540         int offset = 2;
2541         finalY = f.finalY.ptr[i];
2542         finalY[0] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2543         finalY[1] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2544         foreach (immutable j; 0..g.partitions) {
2545           int pclass = g.partition_class_list.ptr[j];
2546           int cdim = g.class_dimensions.ptr[pclass];
2547           int cbits = g.class_subclasses.ptr[pclass];
2548           int csub = (1<<cbits)-1;
2549           int cval = 0;
2550           if (cbits) {
2551             Codebook *cc = f.codebooks+g.class_masterbooks.ptr[pclass];
2552             mixin(DECODE!("cval", "cc"));
2553           }
2554           foreach (immutable k; 0..cdim) {
2555             int book = g.subclass_books.ptr[pclass].ptr[cval&csub];
2556             cval = cval>>cbits;
2557             if (book >= 0) {
2558               int temp;
2559               Codebook *cc = f.codebooks+book;
2560               mixin(DECODE!("temp", "cc"));
2561               finalY[offset++] = cast(short)temp; //k8
2562             } else {
2563               finalY[offset++] = 0;
2564             }
2565           }
2566         }
2567         if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2568         step2_flag[0] = step2_flag[1] = 1;
2569         foreach (immutable j; 2..g.values) {
2570           int low = g.neighbors.ptr[j].ptr[0];
2571           int high = g.neighbors.ptr[j].ptr[1];
2572           //neighbors(g.Xlist, j, &low, &high);
2573           int pred = void;
2574           mixin(predict_point!("pred", "g.Xlist.ptr[j]", "g.Xlist.ptr[low]", "g.Xlist.ptr[high]", "finalY[low]", "finalY[high]"));
2575           int val = finalY[j];
2576           int highroom = range-pred;
2577           int lowroom = pred;
2578           auto room = (highroom < lowroom ? highroom : lowroom)*2;
2579           if (val) {
2580             step2_flag[low] = step2_flag[high] = 1;
2581             step2_flag[j] = 1;
2582             if (val >= room) {
2583               finalY[j] = cast(short)(highroom > lowroom ? val-lowroom+pred : pred-val+highroom-1); //k8
2584             } else {
2585               finalY[j] = cast(short)(val&1 ? pred-((val+1)>>1) : pred+(val>>1)); //k8
2586             }
2587           } else {
2588             step2_flag[j] = 0;
2589             finalY[j] = cast(short)pred; //k8
2590           }
2591         }
2592 
2593         version(STB_VORBIS_NO_DEFER_FLOOR) {
2594           do_floor(f, map, i, n, f.floor_buffers.ptr[i], finalY, step2_flag);
2595         } else {
2596           // defer final floor computation until _after_ residue
2597           foreach (immutable j; 0..g.values) if (!step2_flag[j]) finalY[j] = -1;
2598         }
2599       } else {
2600   error:
2601         zero_channel[i] = true;
2602       }
2603       // So we just defer everything else to later
2604       // at this point we've decoded the floor into buffer
2605     }
2606   }
2607   //stb_prof(0);
2608   // at this point we've decoded all floors
2609 
2610   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2611 
2612   // re-enable coupled channels if necessary
2613   memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof*f.vrchannels);
2614   foreach (immutable i; 0..map.coupling_steps) {
2615     if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2616       zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = false;
2617     }
2618   }
2619 
2620   // RESIDUE DECODE
2621   foreach (immutable i; 0..map.submaps) {
2622     float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2623     ubyte[256] do_not_decode = void;
2624     int ch = 0;
2625     foreach (immutable j; 0..f.vrchannels) {
2626       if (map.chan[j].mux == i) {
2627         if (zero_channel[j]) {
2628           do_not_decode[ch] = true;
2629           residue_buffers.ptr[ch] = null;
2630         } else {
2631           do_not_decode[ch] = false;
2632           residue_buffers.ptr[ch] = f.channel_buffers.ptr[j];
2633         }
2634         ++ch;
2635       }
2636     }
2637     int r = map.submap_residue.ptr[i];
2638     decode_residue(f, residue_buffers, ch, n2, r, do_not_decode.ptr);
2639   }
2640 
2641   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2642 
2643    // INVERSE COUPLING
2644   //stb_prof(14);
2645   foreach_reverse (immutable i; 0..map.coupling_steps) {
2646     int n2n = n>>1;
2647     float* mm = f.channel_buffers.ptr[map.chan[i].magnitude];
2648     float* a = f.channel_buffers.ptr[map.chan[i].angle];
2649     foreach (immutable j; 0..n2n) {
2650       float a2, m2;
2651       if (mm[j] > 0) {
2652         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]-a[j]; } else { a2 = mm[j]; m2 = mm[j]+a[j]; }
2653       } else {
2654         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]+a[j]; } else { a2 = mm[j]; m2 = mm[j]-a[j]; }
2655       }
2656       mm[j] = m2;
2657       a[j] = a2;
2658     }
2659   }
2660 
2661   // finish decoding the floors
2662   version(STB_VORBIS_NO_DEFER_FLOOR) {
2663     foreach (immutable i; 0..f.vrchannels) {
2664       if (really_zero_channel[i]) {
2665         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2666       } else {
2667         foreach (immutable j; 0..n2) f.channel_buffers.ptr[i].ptr[j] *= f.floor_buffers.ptr[i].ptr[j];
2668       }
2669     }
2670   } else {
2671     //stb_prof(15);
2672     foreach (immutable i; 0..f.vrchannels) {
2673       if (really_zero_channel[i]) {
2674         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2675       } else {
2676         do_floor(f, map, i, n, f.channel_buffers.ptr[i], f.finalY.ptr[i], null);
2677       }
2678     }
2679   }
2680 
2681   // INVERSE MDCT
2682   //stb_prof(16);
2683   foreach (immutable i; 0..f.vrchannels) inverse_mdct(f.channel_buffers.ptr[i], n, f, m.blockflag);
2684   //stb_prof(0);
2685 
2686   // this shouldn't be necessary, unless we exited on an error
2687   // and want to flush to get to the next packet
2688   flush_packet(f);
2689 
2690   if (f.first_decode) {
2691     // assume we start so first non-discarded sample is sample 0
2692     // this isn't to spec, but spec would require us to read ahead
2693     // and decode the size of all current frames--could be done,
2694     // but presumably it's not a commonly used feature
2695     f.current_loc = -n2; // start of first frame is positioned for discard
2696     // we might have to discard samples "from" the next frame too,
2697     // if we're lapping a large block then a small at the start?
2698     f.discard_samples_deferred = n-right_end;
2699     f.current_loc_valid = true;
2700     f.first_decode = false;
2701   } else if (f.discard_samples_deferred) {
2702     if (f.discard_samples_deferred >= right_start-left_start) {
2703       f.discard_samples_deferred -= (right_start-left_start);
2704       left_start = right_start;
2705       *p_left = left_start;
2706     } else {
2707       left_start += f.discard_samples_deferred;
2708       *p_left = left_start;
2709       f.discard_samples_deferred = 0;
2710     }
2711   } else if (f.previous_length == 0 && f.current_loc_valid) {
2712     // we're recovering from a seek... that means we're going to discard
2713     // the samples from this packet even though we know our position from
2714     // the last page header, so we need to update the position based on
2715     // the discarded samples here
2716     // but wait, the code below is going to add this in itself even
2717     // on a discard, so we don't need to do it here...
2718   }
2719 
2720   // check if we have ogg information about the sample # for this packet
2721   if (f.last_seg_which == f.end_seg_with_known_loc) {
2722     // if we have a valid current loc, and this is final:
2723     if (f.current_loc_valid && (f.page_flag&PAGEFLAG_last_page)) {
2724       uint current_end = f.known_loc_for_packet-(n-right_end);
2725       // then let's infer the size of the (probably) short final frame
2726       if (current_end < f.current_loc+right_end) {
2727         if (current_end < f.current_loc+(right_end-left_start)) {
2728           // negative truncation, that's impossible!
2729           *len = 0;
2730         } else {
2731           *len = current_end-f.current_loc;
2732         }
2733         *len += left_start;
2734         if (*len > right_end) *len = right_end; // this should never happen
2735         f.current_loc += *len;
2736         return true;
2737       }
2738     }
2739     // otherwise, just set our sample loc
2740     // guess that the ogg granule pos refers to the _middle_ of the
2741     // last frame?
2742     // set f.current_loc to the position of left_start
2743     f.current_loc = f.known_loc_for_packet-(n2-left_start);
2744     f.current_loc_valid = true;
2745   }
2746   if (f.current_loc_valid) f.current_loc += (right_start-left_start);
2747 
2748   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2749 
2750   *len = right_end;  // ignore samples after the window goes to 0
2751   return true;
2752 }
2753 
2754 private int vorbis_decode_packet (VorbisDecoder f, int* len, int* p_left, int* p_right) {
2755   int mode, left_end, right_end;
2756   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2757   return vorbis_decode_packet_rest(f, len, f.mode_config.ptr+mode, *p_left, left_end, *p_right, right_end, p_left);
2758 }
2759 
2760 private int vorbis_finish_frame (VorbisDecoder f, int len, int left, int right) {
2761   // we use right&left (the start of the right- and left-window sin()-regions)
2762   // to determine how much to return, rather than inferring from the rules
2763   // (same result, clearer code); 'left' indicates where our sin() window
2764   // starts, therefore where the previous window's right edge starts, and
2765   // therefore where to start mixing from the previous buffer. 'right'
2766   // indicates where our sin() ending-window starts, therefore that's where
2767   // we start saving, and where our returned-data ends.
2768 
2769   // mixin from previous window
2770   if (f.previous_length) {
2771     int n = f.previous_length;
2772     float *w = get_window(f, n);
2773     foreach (immutable i; 0..f.vrchannels) {
2774       foreach (immutable j; 0..n) {
2775         (f.channel_buffers.ptr[i])[left+j] =
2776           (f.channel_buffers.ptr[i])[left+j]*w[    j]+
2777           (f.previous_window.ptr[i])[     j]*w[n-1-j];
2778       }
2779     }
2780   }
2781 
2782   auto prev = f.previous_length;
2783 
2784   // last half of this data becomes previous window
2785   f.previous_length = len-right;
2786 
2787   // @OPTIMIZE: could avoid this copy by double-buffering the
2788   // output (flipping previous_window with channel_buffers), but
2789   // then previous_window would have to be 2x as large, and
2790   // channel_buffers couldn't be temp mem (although they're NOT
2791   // currently temp mem, they could be (unless we want to level
2792   // performance by spreading out the computation))
2793   foreach (immutable i; 0..f.vrchannels) {
2794     for (uint j = 0; right+j < len; ++j) (f.previous_window.ptr[i])[j] = (f.channel_buffers.ptr[i])[right+j];
2795   }
2796 
2797   if (!prev) {
2798     // there was no previous packet, so this data isn't valid...
2799     // this isn't entirely true, only the would-have-overlapped data
2800     // isn't valid, but this seems to be what the spec requires
2801     return 0;
2802   }
2803 
2804   // truncate a short frame
2805   if (len < right) right = len;
2806 
2807   f.samples_output += right-left;
2808 
2809   return right-left;
2810 }
2811 
2812 private bool vorbis_pump_first_frame (VorbisDecoder f) {
2813   int len, right, left;
2814   if (vorbis_decode_packet(f, &len, &left, &right)) {
2815     vorbis_finish_frame(f, len, left, right);
2816     return true;
2817   }
2818   return false;
2819 }
2820 
2821 /+ k8: i don't need that, so it's dead
2822 private int is_whole_packet_present (VorbisDecoder f, int end_page) {
2823   import core.stdc.string : memcmp;
2824 
2825   // make sure that we have the packet available before continuing...
2826   // this requires a full ogg parse, but we know we can fetch from f.stream
2827 
2828   // instead of coding this out explicitly, we could save the current read state,
2829   // read the next packet with get8() until end-of-packet, check f.eof, then
2830   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
2831   // of state to restore (primarily the page segment table)
2832 
2833   int s = f.next_seg, first = true;
2834   ubyte *p = f.stream;
2835 
2836   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
2837     for (; s < f.segment_count; ++s) {
2838       p += f.segments[s];
2839       if (f.segments[s] < 255) break; // stop at first short segment
2840     }
2841     // either this continues, or it ends it...
2842     if (end_page && s < f.segment_count-1) return error(f, STBVorbisError.invalid_stream);
2843     if (s == f.segment_count) s = -1; // set 'crosses page' flag
2844     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2845     first = false;
2846   }
2847   while (s == -1) {
2848     ubyte* q = void;
2849     int n = void;
2850     // check that we have the page header ready
2851     if (p+26 >= f.stream_end) return error(f, STBVorbisError.need_more_data);
2852     // validate the page
2853     if (memcmp(p, ogg_page_header.ptr, 4)) return error(f, STBVorbisError.invalid_stream);
2854     if (p[4] != 0) return error(f, STBVorbisError.invalid_stream);
2855     if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
2856       if (f.previous_length && (p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2857       // if no previous length, we're resynching, so we can come in on a continued-packet,
2858       // which we'll just drop
2859     } else {
2860       if (!(p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2861     }
2862     n = p[26]; // segment counts
2863     q = p+27; // q points to segment table
2864     p = q+n; // advance past header
2865     // make sure we've read the segment table
2866     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2867     for (s = 0; s < n; ++s) {
2868       p += q[s];
2869       if (q[s] < 255) break;
2870     }
2871     if (end_page && s < n-1) return error(f, STBVorbisError.invalid_stream);
2872     if (s == n) s = -1; // set 'crosses page' flag
2873     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2874     first = false;
2875   }
2876   return true;
2877 }
2878 +/
2879 
2880 private int start_decoder (VorbisDecoder f) {
2881   import core.stdc.string : memcpy, memset;
2882 
2883   ubyte[6] header;
2884   ubyte x, y;
2885   int len, max_submaps = 0;
2886   int longest_floorlist = 0;
2887 
2888   // first page, first packet
2889 
2890   if (!start_page(f)) return false;
2891   // validate page flag
2892   if (!(f.page_flag&PAGEFLAG_first_page)) return error(f, STBVorbisError.invalid_first_page);
2893   if (f.page_flag&PAGEFLAG_last_page) return error(f, STBVorbisError.invalid_first_page);
2894   if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.invalid_first_page);
2895   // check for expected packet length
2896   if (f.segment_count != 1) return error(f, STBVorbisError.invalid_first_page);
2897   if (f.segments[0] != 30) return error(f, STBVorbisError.invalid_first_page);
2898   // read packet
2899   // check packet header
2900   if (get8(f) != VorbisPacket.id) return error(f, STBVorbisError.invalid_first_page);
2901   if (!getn(f, header.ptr, 6)) return error(f, STBVorbisError.unexpected_eof);
2902   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_first_page);
2903   // vorbis_version
2904   if (get32(f) != 0) return error(f, STBVorbisError.invalid_first_page);
2905   f.vrchannels = get8(f); if (!f.vrchannels) return error(f, STBVorbisError.invalid_first_page);
2906   if (f.vrchannels > STB_VORBIS_MAX_CHANNELS) return error(f, STBVorbisError.too_many_channels);
2907   f.sample_rate = get32(f); if (!f.sample_rate) return error(f, STBVorbisError.invalid_first_page);
2908   get32(f); // bitrate_maximum
2909   get32(f); // bitrate_nominal
2910   get32(f); // bitrate_minimum
2911   x = get8(f);
2912   {
2913     int log0 = x&15;
2914     int log1 = x>>4;
2915     f.blocksize_0 = 1<<log0;
2916     f.blocksize_1 = 1<<log1;
2917     if (log0 < 6 || log0 > 13) return error(f, STBVorbisError.invalid_setup);
2918     if (log1 < 6 || log1 > 13) return error(f, STBVorbisError.invalid_setup);
2919     if (log0 > log1) return error(f, STBVorbisError.invalid_setup);
2920   }
2921 
2922   // framing_flag
2923   x = get8(f);
2924   if (!(x&1)) return error(f, STBVorbisError.invalid_first_page);
2925 
2926   // second packet! (comments)
2927   if (!start_page(f)) return false;
2928 
2929   // read comments
2930   if (!start_packet(f)) return false;
2931 
2932   if (f.read_comments) {
2933     /+if (f.push_mode) {
2934       if (!is_whole_packet_present(f, true)) {
2935         // convert error in ogg header to write type
2936         if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2937         return false;
2938       }
2939     }+/
2940     if (get8_packet(f) != VorbisPacket.comment) return error(f, STBVorbisError.invalid_setup);
2941     foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2942     if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2943 
2944     // skip vendor id
2945     uint vidsize = get32_packet(f);
2946     //{ import core.stdc.stdio; printf("vendor size: %u\n", vidsize); }
2947     if (vidsize == EOP) return error(f, STBVorbisError.invalid_setup);
2948     while (vidsize--) get8_packet(f);
2949 
2950     // read comments section
2951     uint cmtcount = get32_packet(f);
2952     if (cmtcount == EOP) return error(f, STBVorbisError.invalid_setup);
2953     if (cmtcount > 0) {
2954       uint cmtsize = 32768; // this should be enough for everyone
2955       f.comment_data = setup_malloc!ubyte(f, cmtsize);
2956       if (f.comment_data is null) return error(f, STBVorbisError.outofmem);
2957       auto cmtpos = 0;
2958       auto d = f.comment_data;
2959       while (cmtcount--) {
2960         uint linelen = get32_packet(f);
2961         //{ import core.stdc.stdio; printf("linelen: %u; lines left: %u\n", linelen, cmtcount); }
2962         if (linelen == EOP || linelen > ushort.max-2) break;
2963         if (linelen == 0) { continue; }
2964         if (cmtpos+2+linelen > cmtsize) break;
2965         cmtpos += linelen+2;
2966         *d++ = (linelen+2)&0xff;
2967         *d++ = ((linelen+2)>>8)&0xff;
2968         while (linelen--) {
2969           auto b = get8_packet(f);
2970           if (b == EOP) return error(f, STBVorbisError.outofmem);
2971           *d++ = cast(ubyte)b;
2972         }
2973         //{ import core.stdc.stdio; printf("%u bytes of comments read\n", cmtpos); }
2974         f.comment_size = cmtpos;
2975       }
2976     }
2977     flush_packet(f);
2978     f.comment_rewind();
2979   } else {
2980     // skip comments
2981     do {
2982       len = next_segment(f);
2983       skip(f, len);
2984       f.bytes_in_seg = 0;
2985     } while (len);
2986   }
2987 
2988   // third packet!
2989   if (!start_packet(f)) return false;
2990 
2991   /+if (f.push_mode) {
2992     if (!is_whole_packet_present(f, true)) {
2993       // convert error in ogg header to write type
2994       if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2995       return false;
2996     }
2997   }+/
2998 
2999   if (get8_packet(f) != VorbisPacket.setup) return error(f, STBVorbisError.invalid_setup);
3000   foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
3001   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
3002 
3003   // codebooks
3004   f.codebook_count = get_bits!8(f)+1;
3005   f.codebooks = setup_malloc!Codebook(f, f.codebook_count);
3006   static assert((*f.codebooks).sizeof == Codebook.sizeof);
3007   if (f.codebooks is null) return error(f, STBVorbisError.outofmem);
3008   memset(f.codebooks, 0, (*f.codebooks).sizeof*f.codebook_count);
3009   foreach (immutable i; 0..f.codebook_count) {
3010     uint* values;
3011     int ordered, sorted_count;
3012     int total = 0;
3013     ubyte* lengths;
3014     Codebook* c = f.codebooks+i;
3015     x = get_bits!8(f); if (x != 0x42) return error(f, STBVorbisError.invalid_setup);
3016     x = get_bits!8(f); if (x != 0x43) return error(f, STBVorbisError.invalid_setup);
3017     x = get_bits!8(f); if (x != 0x56) return error(f, STBVorbisError.invalid_setup);
3018     x = get_bits!8(f);
3019     c.dimensions = (get_bits!8(f)<<8)+x;
3020     x = get_bits!8(f);
3021     y = get_bits!8(f);
3022     c.entries = (get_bits!8(f)<<16)+(y<<8)+x;
3023     ordered = get_bits!1(f);
3024     c.sparse = (ordered ? 0 : get_bits!1(f));
3025 
3026     if (c.dimensions == 0 && c.entries != 0) return error(f, STBVorbisError.invalid_setup);
3027 
3028     if (c.sparse) {
3029       lengths = cast(ubyte*)setup_temp_malloc(f, c.entries);
3030     } else {
3031       lengths = c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3032     }
3033 
3034     if (lengths is null) return error(f, STBVorbisError.outofmem);
3035 
3036     if (ordered) {
3037       int current_entry = 0;
3038       int current_length = get_bits_add_no!5(f, 1);
3039       while (current_entry < c.entries) {
3040         int limit = c.entries-current_entry;
3041         int n = get_bits_main(f, ilog(limit));
3042         if (current_entry+n > cast(int)c.entries) return error(f, STBVorbisError.invalid_setup);
3043         memset(lengths+current_entry, current_length, n);
3044         current_entry += n;
3045         ++current_length;
3046       }
3047     } else {
3048       foreach (immutable j; 0..c.entries) {
3049         int present = (c.sparse ? get_bits!1(f) : 1);
3050         if (present) {
3051           lengths[j] = get_bits_add_no!5(f, 1);
3052           ++total;
3053           if (lengths[j] == 32) return error(f, STBVorbisError.invalid_setup);
3054         } else {
3055           lengths[j] = NO_CODE;
3056         }
3057       }
3058     }
3059 
3060     if (c.sparse && total >= c.entries>>2) {
3061       // convert sparse items to non-sparse!
3062       if (c.entries > cast(int)f.setup_temp_memory_required) f.setup_temp_memory_required = c.entries;
3063       c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3064       if (c.codeword_lengths is null) return error(f, STBVorbisError.outofmem);
3065       memcpy(c.codeword_lengths, lengths, c.entries);
3066       setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
3067       lengths = c.codeword_lengths;
3068       c.sparse = 0;
3069     }
3070 
3071     // compute the size of the sorted tables
3072     if (c.sparse) {
3073       sorted_count = total;
3074     } else {
3075       sorted_count = 0;
3076       version(STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH) {} else {
3077         foreach (immutable j; 0..c.entries) if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) ++sorted_count;
3078       }
3079     }
3080 
3081     c.sorted_entries = sorted_count;
3082     values = null;
3083 
3084     if (!c.sparse) {
3085       c.codewords = setup_malloc!uint(f, c.entries);
3086       if (!c.codewords) return error(f, STBVorbisError.outofmem);
3087     } else {
3088       if (c.sorted_entries) {
3089         c.codeword_lengths = setup_malloc!ubyte(f, c.sorted_entries);
3090         if (!c.codeword_lengths) return error(f, STBVorbisError.outofmem);
3091         c.codewords = cast(uint*)setup_temp_malloc(f, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3092         if (!c.codewords) return error(f, STBVorbisError.outofmem);
3093         values = cast(uint*)setup_temp_malloc(f, cast(int)(*values).sizeof*c.sorted_entries);
3094         if (!values) return error(f, STBVorbisError.outofmem);
3095       }
3096       uint size = c.entries+cast(int)((*c.codewords).sizeof+(*values).sizeof)*c.sorted_entries;
3097       if (size > f.setup_temp_memory_required) f.setup_temp_memory_required = size;
3098     }
3099 
3100     if (!compute_codewords(c, lengths, c.entries, values)) {
3101       if (c.sparse) setup_temp_free(f, values, 0);
3102       return error(f, STBVorbisError.invalid_setup);
3103     }
3104 
3105     if (c.sorted_entries) {
3106       // allocate an extra slot for sentinels
3107       c.sorted_codewords = setup_malloc!uint(f, c.sorted_entries+1);
3108       if (c.sorted_codewords is null) return error(f, STBVorbisError.outofmem);
3109       // allocate an extra slot at the front so that c.sorted_values[-1] is defined
3110       // so that we can catch that case without an extra if
3111       c.sorted_values = setup_malloc!int(f, c.sorted_entries+1);
3112       if (c.sorted_values is null) return error(f, STBVorbisError.outofmem);
3113       ++c.sorted_values;
3114       c.sorted_values[-1] = -1;
3115       compute_sorted_huffman(c, lengths, values);
3116     }
3117 
3118     if (c.sparse) {
3119       setup_temp_free(f, values, cast(int)(*values).sizeof*c.sorted_entries);
3120       setup_temp_free(f, c.codewords, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3121       setup_temp_free(f, lengths, c.entries);
3122       c.codewords = null;
3123     }
3124 
3125     compute_accelerated_huffman(c);
3126 
3127     c.lookup_type = get_bits!4(f);
3128     if (c.lookup_type > 2) return error(f, STBVorbisError.invalid_setup);
3129     if (c.lookup_type > 0) {
3130       ushort* mults;
3131       c.minimum_value = float32_unpack(get_bits!32(f));
3132       c.delta_value = float32_unpack(get_bits!32(f));
3133       c.value_bits = get_bits_add_no!4(f, 1);
3134       c.sequence_p = get_bits!1(f);
3135       if (c.lookup_type == 1) {
3136         c.lookup_values = lookup1_values(c.entries, c.dimensions);
3137       } else {
3138         c.lookup_values = c.entries*c.dimensions;
3139       }
3140       if (c.lookup_values == 0) return error(f, STBVorbisError.invalid_setup);
3141       mults = cast(ushort*)setup_temp_malloc(f, cast(int)(mults[0]).sizeof*c.lookup_values);
3142       if (mults is null) return error(f, STBVorbisError.outofmem);
3143       foreach (immutable j; 0..cast(int)c.lookup_values) {
3144         int q = get_bits_main(f, c.value_bits);
3145         if (q == EOP) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.invalid_setup); }
3146         mults[j] = cast(ushort)q; //k8
3147       }
3148 
3149       version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
3150         if (c.lookup_type == 1) {
3151           int sparse = c.sparse; //len
3152           float last = 0;
3153           // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3154           if (sparse) {
3155             if (c.sorted_entries == 0) goto skip;
3156             c.multiplicands = setup_malloc!codetype(f, c.sorted_entries*c.dimensions);
3157           } else {
3158             c.multiplicands = setup_malloc!codetype(f, c.entries*c.dimensions);
3159           }
3160           if (c.multiplicands is null) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3161           foreach (immutable j; 0..(sparse ? c.sorted_entries : c.entries)) {
3162             uint z = (sparse ? c.sorted_values[j] : j);
3163             uint div = 1;
3164             foreach (immutable k; 0..c.dimensions) {
3165               int off = (z/div)%c.lookup_values;
3166               float val = mults[off];
3167               val = val*c.delta_value+c.minimum_value+last;
3168               c.multiplicands[j*c.dimensions+k] = val;
3169               if (c.sequence_p) last = val;
3170               if (k+1 < c.dimensions) {
3171                  if (div > uint.max/cast(uint)c.lookup_values) {
3172                     setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3173                     return error(f, STBVorbisError.invalid_setup);
3174                  }
3175                  div *= c.lookup_values;
3176               }
3177             }
3178           }
3179           c.lookup_type = 2;
3180           goto skip;
3181         }
3182         //else
3183       }
3184       {
3185         float last = 0;
3186         c.multiplicands = setup_malloc!codetype(f, c.lookup_values);
3187         if (c.multiplicands is null) { setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3188         foreach (immutable j; 0..cast(int)c.lookup_values) {
3189           float val = mults[j]*c.delta_value+c.minimum_value+last;
3190           c.multiplicands[j] = val;
3191           if (c.sequence_p) last = val;
3192         }
3193       }
3194      //version(STB_VORBIS_DIVIDES_IN_CODEBOOK)
3195      skip: // this is versioned out in C
3196       setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3197     }
3198   }
3199 
3200   // time domain transfers (notused)
3201   x = get_bits_add_no!6(f, 1);
3202   foreach (immutable i; 0..x) {
3203     auto z = get_bits!16(f);
3204     if (z != 0) return error(f, STBVorbisError.invalid_setup);
3205   }
3206 
3207   // Floors
3208   f.floor_count = get_bits_add_no!6(f, 1);
3209   f.floor_config = setup_malloc!Floor(f, f.floor_count);
3210   if (f.floor_config is null) return error(f, STBVorbisError.outofmem);
3211   foreach (immutable i; 0..f.floor_count) {
3212     f.floor_types[i] = get_bits!16(f);
3213     if (f.floor_types[i] > 1) return error(f, STBVorbisError.invalid_setup);
3214     if (f.floor_types[i] == 0) {
3215       Floor0* g = &f.floor_config[i].floor0;
3216       g.order = get_bits!8(f);
3217       g.rate = get_bits!16(f);
3218       g.bark_map_size = get_bits!16(f);
3219       g.amplitude_bits = get_bits!6(f);
3220       g.amplitude_offset = get_bits!8(f);
3221       g.number_of_books = get_bits_add_no!4(f, 1);
3222       foreach (immutable j; 0..g.number_of_books) g.book_list[j] = get_bits!8(f);
3223       return error(f, STBVorbisError.feature_not_supported);
3224     } else {
3225       Point[31*8+2] p;
3226       Floor1 *g = &f.floor_config[i].floor1;
3227       int max_class = -1;
3228       g.partitions = get_bits!5(f);
3229       foreach (immutable j; 0..g.partitions) {
3230         g.partition_class_list[j] = get_bits!4(f);
3231         if (g.partition_class_list[j] > max_class) max_class = g.partition_class_list[j];
3232       }
3233       foreach (immutable j; 0..max_class+1) {
3234         g.class_dimensions[j] = get_bits_add_no!3(f, 1);
3235         g.class_subclasses[j] = get_bits!2(f);
3236         if (g.class_subclasses[j]) {
3237           g.class_masterbooks[j] = get_bits!8(f);
3238           if (g.class_masterbooks[j] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3239         }
3240         foreach (immutable k; 0..1<<g.class_subclasses[j]) {
3241           g.subclass_books[j].ptr[k] = get_bits!8(f)-1;
3242           if (g.subclass_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3243         }
3244       }
3245       g.floor1_multiplier = get_bits_add_no!2(f, 1);
3246       g.rangebits = get_bits!4(f);
3247       g.Xlist[0] = 0;
3248       g.Xlist[1] = cast(ushort)(1<<g.rangebits); //k8
3249       g.values = 2;
3250       foreach (immutable j; 0..g.partitions) {
3251         int c = g.partition_class_list[j];
3252         foreach (immutable k; 0..g.class_dimensions[c]) {
3253           g.Xlist[g.values] = cast(ushort)get_bits_main(f, g.rangebits); //k8
3254           ++g.values;
3255         }
3256       }
3257       assert(g.values <= ushort.max);
3258       // precompute the sorting
3259       foreach (ushort j; 0..cast(ushort)g.values) {
3260         p[j].x = g.Xlist[j];
3261         p[j].y = j;
3262       }
3263       qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
3264       foreach (uint j; 0..g.values) g.sorted_order.ptr[j] = cast(ubyte)p.ptr[j].y;
3265       // precompute the neighbors
3266       foreach (uint j; 2..g.values) {
3267         ushort low = void, hi = void;
3268         neighbors(g.Xlist.ptr, j, &low, &hi);
3269         assert(low <= ubyte.max);
3270         assert(hi <= ubyte.max);
3271         g.neighbors[j].ptr[0] = cast(ubyte)low;
3272         g.neighbors[j].ptr[1] = cast(ubyte)hi;
3273       }
3274       if (g.values > longest_floorlist) longest_floorlist = g.values;
3275     }
3276   }
3277 
3278   // Residue
3279   f.residue_count = get_bits_add_no!6(f, 1);
3280   f.residue_config = setup_malloc!Residue(f, f.residue_count);
3281   if (f.residue_config is null) return error(f, STBVorbisError.outofmem);
3282   memset(f.residue_config, 0, f.residue_count*(f.residue_config[0]).sizeof);
3283   foreach (immutable i; 0..f.residue_count) {
3284     ubyte[64] residue_cascade;
3285     Residue* r = f.residue_config+i;
3286     f.residue_types[i] = get_bits!16(f);
3287     if (f.residue_types[i] > 2) return error(f, STBVorbisError.invalid_setup);
3288     r.begin = get_bits!24(f);
3289     r.end = get_bits!24(f);
3290     if (r.end < r.begin) return error(f, STBVorbisError.invalid_setup);
3291     r.part_size = get_bits_add_no!24(f, 1);
3292     r.classifications = get_bits_add_no!6(f, 1);
3293     r.classbook = get_bits!8(f);
3294     if (r.classbook >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3295     foreach (immutable j; 0..r.classifications) {
3296       ubyte high_bits = 0;
3297       ubyte low_bits = get_bits!3(f);
3298       if (get_bits!1(f)) high_bits = get_bits!5(f);
3299       assert(high_bits*8+low_bits <= ubyte.max);
3300       residue_cascade[j] = cast(ubyte)(high_bits*8+low_bits);
3301     }
3302     static assert(r.residue_books[0].sizeof == 16);
3303     r.residue_books = setup_malloc!(short[8])(f, r.classifications);
3304     if (r.residue_books is null) return error(f, STBVorbisError.outofmem);
3305     foreach (immutable j; 0..r.classifications) {
3306       foreach (immutable k; 0..8) {
3307         if (residue_cascade[j]&(1<<k)) {
3308           r.residue_books[j].ptr[k] = get_bits!8(f);
3309           if (r.residue_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3310         } else {
3311           r.residue_books[j].ptr[k] = -1;
3312         }
3313       }
3314     }
3315     // precompute the classifications[] array to avoid inner-loop mod/divide
3316     // call it 'classdata' since we already have r.classifications
3317     r.classdata = setup_malloc!(ubyte*)(f, f.codebooks[r.classbook].entries);
3318     if (!r.classdata) return error(f, STBVorbisError.outofmem);
3319     memset(r.classdata, 0, (*r.classdata).sizeof*f.codebooks[r.classbook].entries);
3320     foreach (immutable j; 0..f.codebooks[r.classbook].entries) {
3321       int classwords = f.codebooks[r.classbook].dimensions;
3322       int temp = j;
3323       r.classdata[j] = setup_malloc!ubyte(f, classwords);
3324       if (r.classdata[j] is null) return error(f, STBVorbisError.outofmem);
3325       foreach_reverse (immutable k; 0..classwords) {
3326         assert(temp%r.classifications >= 0 && temp%r.classifications <= ubyte.max);
3327         r.classdata[j][k] = cast(ubyte)(temp%r.classifications);
3328         temp /= r.classifications;
3329       }
3330     }
3331   }
3332 
3333   f.mapping_count = get_bits_add_no!6(f, 1);
3334   f.mapping = setup_malloc!Mapping(f, f.mapping_count);
3335   if (f.mapping is null) return error(f, STBVorbisError.outofmem);
3336   memset(f.mapping, 0, f.mapping_count*(*f.mapping).sizeof);
3337   foreach (immutable i; 0..f.mapping_count) {
3338     Mapping* m = f.mapping+i;
3339     int mapping_type = get_bits!16(f);
3340     if (mapping_type != 0) return error(f, STBVorbisError.invalid_setup);
3341     m.chan = setup_malloc!MappingChannel(f, f.vrchannels);
3342     if (m.chan is null) return error(f, STBVorbisError.outofmem);
3343     m.submaps = (get_bits!1(f) ? get_bits_add_no!4(f, 1) : 1);
3344     if (m.submaps > max_submaps) max_submaps = m.submaps;
3345     if (get_bits!1(f)) {
3346       m.coupling_steps = get_bits_add_no!8(f, 1);
3347       foreach (immutable k; 0..m.coupling_steps) {
3348         m.chan[k].magnitude = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3349         m.chan[k].angle = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3350         if (m.chan[k].magnitude >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3351         if (m.chan[k].angle     >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3352         if (m.chan[k].magnitude == m.chan[k].angle) return error(f, STBVorbisError.invalid_setup);
3353       }
3354     } else {
3355       m.coupling_steps = 0;
3356     }
3357 
3358     // reserved field
3359     if (get_bits!2(f)) return error(f, STBVorbisError.invalid_setup);
3360     if (m.submaps > 1) {
3361       foreach (immutable j; 0..f.vrchannels) {
3362         m.chan[j].mux = get_bits!4(f);
3363         if (m.chan[j].mux >= m.submaps) return error(f, STBVorbisError.invalid_setup);
3364       }
3365     } else {
3366       // @SPECIFICATION: this case is missing from the spec
3367       foreach (immutable j; 0..f.vrchannels) m.chan[j].mux = 0;
3368     }
3369     foreach (immutable j; 0..m.submaps) {
3370       get_bits!8(f); // discard
3371       m.submap_floor[j] = get_bits!8(f);
3372       m.submap_residue[j] = get_bits!8(f);
3373       if (m.submap_floor[j] >= f.floor_count) return error(f, STBVorbisError.invalid_setup);
3374       if (m.submap_residue[j] >= f.residue_count) return error(f, STBVorbisError.invalid_setup);
3375     }
3376   }
3377 
3378   // Modes
3379   f.mode_count = get_bits_add_no!6(f, 1);
3380   foreach (immutable i; 0..f.mode_count) {
3381     Mode* m = f.mode_config.ptr+i;
3382     m.blockflag = get_bits!1(f);
3383     m.windowtype = get_bits!16(f);
3384     m.transformtype = get_bits!16(f);
3385     m.mapping = get_bits!8(f);
3386     if (m.windowtype != 0) return error(f, STBVorbisError.invalid_setup);
3387     if (m.transformtype != 0) return error(f, STBVorbisError.invalid_setup);
3388     if (m.mapping >= f.mapping_count) return error(f, STBVorbisError.invalid_setup);
3389   }
3390 
3391   flush_packet(f);
3392 
3393   f.previous_length = 0;
3394 
3395   foreach (immutable i; 0..f.vrchannels) {
3396     f.channel_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1);
3397     f.previous_window.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3398     f.finalY.ptr[i]          = setup_malloc!short(f, longest_floorlist);
3399     if (f.channel_buffers.ptr[i] is null || f.previous_window.ptr[i] is null || f.finalY.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3400     version(STB_VORBIS_NO_DEFER_FLOOR) {
3401       f.floor_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3402       if (f.floor_buffers.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3403     }
3404   }
3405 
3406   if (!init_blocksize(f, 0, f.blocksize_0)) return false;
3407   if (!init_blocksize(f, 1, f.blocksize_1)) return false;
3408   f.blocksize.ptr[0] = f.blocksize_0;
3409   f.blocksize.ptr[1] = f.blocksize_1;
3410 
3411   version(STB_VORBIS_DIVIDE_TABLE) {
3412     if (integer_divide_table[1].ptr[1] == 0) {
3413       foreach (immutable i; 0..DIVTAB_NUMER) foreach (immutable j; 1..DIVTAB_DENOM) integer_divide_table[i].ptr[j] = i/j;
3414     }
3415   }
3416 
3417   // compute how much temporary memory is needed
3418 
3419   // 1.
3420   {
3421     uint imdct_mem = (f.blocksize_1*cast(uint)(float).sizeof>>1);
3422     uint classify_mem;
3423     int max_part_read = 0;
3424     foreach (immutable i; 0..f.residue_count) {
3425       Residue* r = f.residue_config+i;
3426       int n_read = r.end-r.begin;
3427       int part_read = n_read/r.part_size;
3428       if (part_read > max_part_read) max_part_read = part_read;
3429     }
3430     version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
3431       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(int*).sizeof);
3432     } else {
3433       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(ubyte*).sizeof);
3434     }
3435     f.temp_memory_required = classify_mem;
3436     if (imdct_mem > f.temp_memory_required) f.temp_memory_required = imdct_mem;
3437   }
3438 
3439   f.first_decode = true;
3440 
3441   /+
3442   if (f.alloc.alloc_buffer) {
3443     debug(stb_vorbis) assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3444     // check if there's enough temp memory so we don't error later
3445     if (f.setup_offset+ /*(*f).sizeof+*/ f.temp_memory_required > cast(uint)f.temp_offset) return error(f, STBVorbisError.outofmem);
3446   }
3447   +/
3448 
3449   f.first_audio_page_offset = f.fileOffset();
3450 
3451   return true;
3452 }
3453 
3454 /+
3455 private int vorbis_search_for_page_pushdata (VorbisDecoder f, ubyte* data, int data_len) {
3456   import core.stdc.string : memcmp;
3457 
3458   foreach (immutable i; 0..f.page_crc_tests) f.scan.ptr[i].bytes_done = 0;
3459 
3460   // if we have room for more scans, search for them first, because
3461   // they may cause us to stop early if their header is incomplete
3462   if (f.page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
3463     if (data_len < 4) return 0;
3464     data_len -= 3; // need to look for 4-byte sequence, so don't miss one that straddles a boundary
3465     foreach (immutable i; 0..data_len) {
3466       if (data[i] == 0x4f) {
3467         if (memcmp(data+i, ogg_page_header.ptr, 4) == 0) {
3468           // make sure we have the whole page header
3469           if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
3470             // only read up to this page start, so hopefully we'll
3471             // have the whole page header start next time
3472             data_len = i;
3473             break;
3474           }
3475           // ok, we have it all; compute the length of the page
3476           auto len = 27+data[i+26];
3477           foreach (immutable j; 0..data[i+26]) len += data[i+27+j];
3478           // scan everything up to the embedded crc (which we must 0)
3479           uint crc = 0;
3480           foreach (immutable j; 0..22) crc = crc32_update(crc, data[i+j]);
3481           // now process 4 0-bytes
3482           foreach (immutable j; 22..26) crc = crc32_update(crc, 0);
3483           // len is the total number of bytes we need to scan
3484           auto n = f.page_crc_tests++;
3485           f.scan.ptr[n].bytes_left = len-/*j*/26;
3486           f.scan.ptr[n].crc_so_far = crc;
3487           f.scan.ptr[n].goal_crc = data[i+22]+(data[i+23]<<8)+(data[i+24]<<16)+(data[i+25]<<24);
3488           // if the last frame on a page is continued to the next, then
3489           // we can't recover the sample_loc immediately
3490           if (data[i+27+data[i+26]-1] == 255) {
3491             f.scan.ptr[n].sample_loc = ~0;
3492           } else {
3493             f.scan.ptr[n].sample_loc = data[i+6]+(data[i+7]<<8)+(data[i+8]<<16)+(data[i+9]<<24);
3494           }
3495           f.scan.ptr[n].bytes_done = i+26/*j*/;
3496           if (f.page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) break;
3497           // keep going if we still have room for more
3498         }
3499       }
3500     }
3501   }
3502 
3503   for (uint i = 0; i < f.page_crc_tests; ) {
3504     int nn = f.scan.ptr[i].bytes_done;
3505     int m = f.scan.ptr[i].bytes_left;
3506     if (m > data_len-nn) m = data_len-nn;
3507     // m is the bytes to scan in the current chunk
3508     uint crc = f.scan.ptr[i].crc_so_far;
3509     foreach (immutable j; 0..m) crc = crc32_update(crc, data[nn+j]);
3510     f.scan.ptr[i].bytes_left -= m;
3511     f.scan.ptr[i].crc_so_far = crc;
3512     if (f.scan.ptr[i].bytes_left == 0) {
3513       // does it match?
3514       if (f.scan.ptr[i].crc_so_far == f.scan.ptr[i].goal_crc) {
3515         // Houston, we have page
3516         data_len = nn+m; // consumption amount is wherever that scan ended
3517         f.page_crc_tests = -1; // drop out of page scan mode
3518         f.previous_length = 0; // decode-but-don't-output one frame
3519         f.next_seg = -1;       // start a new page
3520         f.current_loc = f.scan.ptr[i].sample_loc; // set the current sample location to the amount we'd have decoded had we decoded this page
3521         f.current_loc_valid = f.current_loc != ~0U;
3522         return data_len;
3523       }
3524       // delete entry
3525       f.scan.ptr[i] = f.scan.ptr[--f.page_crc_tests];
3526     } else {
3527       ++i;
3528     }
3529   }
3530 
3531   return data_len;
3532 }
3533 +/
3534 
3535 private uint vorbis_find_page (VorbisDecoder f, uint* end, uint* last) {
3536   for (;;) {
3537     if (f.eof) return 0;
3538     auto n = get8(f);
3539     if (n == 0x4f) { // page header candidate
3540       uint retry_loc = f.fileOffset;
3541       // check if we're off the end of a file_section stream
3542       if (retry_loc-25 > f.stream_len) return 0;
3543       // check the rest of the header
3544       int i = void;
3545       for (i = 1; i < 4; ++i) if (get8(f) != ogg_page_header[i]) break;
3546       if (f.eof) return 0;
3547       if (i == 4) {
3548         ubyte[27] header;
3549         //for (i=0; i < 4; ++i) header[i] = ogg_page_header[i];
3550         header[0..4] = cast(immutable(ubyte)[])ogg_page_header[0..4];
3551         for (i = 4; i < 27; ++i) header[i] = get8(f);
3552         if (f.eof) return 0;
3553         if (header[4] != 0) goto invalid;
3554         uint goal = header[22]+(header[23]<<8)+(header[24]<<16)+(header[25]<<24);
3555         for (i = 22; i < 26; ++i) header[i] = 0;
3556         uint crc = 0;
3557         for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]);
3558         uint len = 0;
3559         for (i = 0; i < header[26]; ++i) {
3560           auto s = get8(f);
3561           crc = crc32_update(crc, s);
3562           len += s;
3563         }
3564         if (len && f.eof) return 0;
3565         for (i = 0; i < len; ++i) crc = crc32_update(crc, get8(f));
3566         // finished parsing probable page
3567         if (crc == goal) {
3568           // we could now check that it's either got the last
3569           // page flag set, OR it's followed by the capture
3570           // pattern, but I guess TECHNICALLY you could have
3571           // a file with garbage between each ogg page and recover
3572           // from it automatically? So even though that paranoia
3573           // might decrease the chance of an invalid decode by
3574           // another 2^32, not worth it since it would hose those
3575           // invalid-but-useful files?
3576           if (end) *end = f.fileOffset;
3577           if (last) *last = (header[5]&0x04 ? 1 : 0);
3578           set_file_offset(f, retry_loc-1);
3579           return 1;
3580         }
3581       }
3582      invalid:
3583       // not a valid page, so rewind and look for next one
3584       set_file_offset(f, retry_loc);
3585     }
3586   }
3587   assert(0);
3588 }
3589 
3590 enum SAMPLE_unknown = 0xffffffff;
3591 
3592 // seeking is implemented with a binary search, which narrows down the range to
3593 // 64K, before using a linear search (because finding the synchronization
3594 // pattern can be expensive, and the chance we'd find the end page again is
3595 // relatively high for small ranges)
3596 //
3597 // two initial interpolation-style probes are used at the start of the search
3598 // to try to bound either side of the binary search sensibly, while still
3599 // working in O(log n) time if they fail.
3600 private int get_seek_page_info (VorbisDecoder f, ProbedPage* z) {
3601   ubyte[27] header;
3602   ubyte[255] lacing;
3603 
3604   // record where the page starts
3605   z.page_start = f.fileOffset;
3606 
3607   // parse the header
3608   getn(f, header.ptr, 27);
3609   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') return 0;
3610   getn(f, lacing.ptr, header[26]);
3611 
3612   // determine the length of the payload
3613   uint len = 0;
3614   foreach (immutable i; 0..header[26]) len += lacing[i];
3615 
3616   // this implies where the page ends
3617   z.page_end = z.page_start+27+header[26]+len;
3618 
3619   // read the last-decoded sample out of the data
3620   z.last_decoded_sample = header[6]+(header[7]<<8)+(header[8]<<16)+(header[9]<<24);
3621 
3622   // restore file state to where we were
3623   set_file_offset(f, z.page_start);
3624   return 1;
3625 }
3626 
3627 // rarely used function to seek back to the preceeding page while finding the start of a packet
3628 private int go_to_page_before (VorbisDecoder f, uint limit_offset) {
3629   uint previous_safe, end;
3630 
3631   // now we want to seek back 64K from the limit
3632   if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) {
3633     previous_safe = limit_offset-65536;
3634   } else {
3635     previous_safe = f.first_audio_page_offset;
3636   }
3637 
3638   set_file_offset(f, previous_safe);
3639 
3640   while (vorbis_find_page(f, &end, null)) {
3641     if (end >= limit_offset && f.fileOffset < limit_offset) return 1;
3642     set_file_offset(f, end);
3643   }
3644 
3645   return 0;
3646 }
3647 
3648 // implements the search logic for finding a page and starting decoding. if
3649 // the function succeeds, current_loc_valid will be true and current_loc will
3650 // be less than or equal to the provided sample number (the closer the
3651 // better).
3652 private int seek_to_sample_coarse (VorbisDecoder f, uint sample_number) {
3653   ProbedPage left, right, mid;
3654   int i, start_seg_with_known_loc, end_pos, page_start;
3655   uint delta, stream_length, padding;
3656   double offset, bytes_per_sample;
3657   int probe = 0;
3658 
3659   // find the last page and validate the target sample
3660   stream_length = f.streamLengthInSamples;
3661   if (stream_length == 0) return error(f, STBVorbisError.seek_without_length);
3662   if (sample_number > stream_length) return error(f, STBVorbisError.seek_invalid);
3663 
3664   // this is the maximum difference between the window-center (which is the
3665   // actual granule position value), and the right-start (which the spec
3666   // indicates should be the granule position (give or take one)).
3667   padding = ((f.blocksize_1-f.blocksize_0)>>2);
3668   if (sample_number < padding) sample_number = 0; else sample_number -= padding;
3669 
3670   left = f.p_first;
3671   while (left.last_decoded_sample == ~0U) {
3672     // (untested) the first page does not have a 'last_decoded_sample'
3673     set_file_offset(f, left.page_end);
3674     if (!get_seek_page_info(f, &left)) goto error;
3675   }
3676 
3677   right = f.p_last;
3678   debug(stb_vorbis) assert(right.last_decoded_sample != ~0U);
3679 
3680   // starting from the start is handled differently
3681   if (sample_number <= left.last_decoded_sample) {
3682     f.seekStart;
3683     return 1;
3684   }
3685 
3686   while (left.page_end != right.page_start) {
3687     debug(stb_vorbis) assert(left.page_end < right.page_start);
3688     // search range in bytes
3689     delta = right.page_start-left.page_end;
3690     if (delta <= 65536) {
3691       // there's only 64K left to search - handle it linearly
3692       set_file_offset(f, left.page_end);
3693     } else {
3694       if (probe < 2) {
3695         if (probe == 0) {
3696           // first probe (interpolate)
3697           double data_bytes = right.page_end-left.page_start;
3698           bytes_per_sample = data_bytes/right.last_decoded_sample;
3699           offset = left.page_start+bytes_per_sample*(sample_number-left.last_decoded_sample);
3700         } else {
3701           // second probe (try to bound the other side)
3702           double error = (cast(double)sample_number-mid.last_decoded_sample)*bytes_per_sample;
3703           if (error >= 0 && error <  8000) error =  8000;
3704           if (error <  0 && error > -8000) error = -8000;
3705           offset += error*2;
3706         }
3707 
3708         // ensure the offset is valid
3709         if (offset < left.page_end) offset = left.page_end;
3710         if (offset > right.page_start-65536) offset = right.page_start-65536;
3711 
3712         set_file_offset(f, cast(uint)offset);
3713       } else {
3714         // binary search for large ranges (offset by 32K to ensure
3715         // we don't hit the right page)
3716         set_file_offset(f, left.page_end+(delta/2)-32768);
3717       }
3718 
3719       if (!vorbis_find_page(f, null, null)) goto error;
3720     }
3721 
3722     for (;;) {
3723       if (!get_seek_page_info(f, &mid)) goto error;
3724       if (mid.last_decoded_sample != ~0U) break;
3725       // (untested) no frames end on this page
3726       set_file_offset(f, mid.page_end);
3727       debug(stb_vorbis) assert(mid.page_start < right.page_start);
3728     }
3729 
3730     // if we've just found the last page again then we're in a tricky file,
3731     // and we're close enough.
3732     if (mid.page_start == right.page_start) break;
3733 
3734     if (sample_number < mid.last_decoded_sample) right = mid; else left = mid;
3735 
3736     ++probe;
3737   }
3738 
3739   // seek back to start of the last packet
3740   page_start = left.page_start;
3741   set_file_offset(f, page_start);
3742   if (!start_page(f)) return error(f, STBVorbisError.seek_failed);
3743   end_pos = f.end_seg_with_known_loc;
3744   debug(stb_vorbis) assert(end_pos >= 0);
3745 
3746   for (;;) {
3747     for (i = end_pos; i > 0; --i) if (f.segments.ptr[i-1] != 255) break;
3748     start_seg_with_known_loc = i;
3749     if (start_seg_with_known_loc > 0 || !(f.page_flag&PAGEFLAG_continued_packet)) break;
3750     // (untested) the final packet begins on an earlier page
3751     if (!go_to_page_before(f, page_start)) goto error;
3752     page_start = f.fileOffset;
3753     if (!start_page(f)) goto error;
3754     end_pos = f.segment_count-1;
3755   }
3756 
3757   // prepare to start decoding
3758   f.current_loc_valid = false;
3759   f.last_seg = false;
3760   f.valid_bits = 0;
3761   f.packet_bytes = 0;
3762   f.bytes_in_seg = 0;
3763   f.previous_length = 0;
3764   f.next_seg = start_seg_with_known_loc;
3765 
3766   for (i = 0; i < start_seg_with_known_loc; ++i) skip(f, f.segments.ptr[i]);
3767 
3768   // start decoding (optimizable - this frame is generally discarded)
3769   if (!vorbis_pump_first_frame(f)) return 0;
3770   if (f.current_loc > sample_number) return error(f, STBVorbisError.seek_failed);
3771   return 1;
3772 
3773 error:
3774   // try to restore the file to a valid state
3775   f.seekStart;
3776   return error(f, STBVorbisError.seek_failed);
3777 }
3778 
3779 // the same as vorbis_decode_initial, but without advancing
3780 private int peek_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
3781   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) return 0;
3782 
3783   // either 1 or 2 bytes were read, figure out which so we can rewind
3784   int bits_read = 1+ilog(f.mode_count-1);
3785   if (f.mode_config.ptr[*mode].blockflag) bits_read += 2;
3786   int bytes_read = (bits_read+7)/8;
3787 
3788   f.bytes_in_seg += bytes_read;
3789   f.packet_bytes -= bytes_read;
3790   skip(f, -bytes_read);
3791   if (f.next_seg == -1) f.next_seg = f.segment_count-1; else --f.next_seg;
3792   f.valid_bits = 0;
3793 
3794   return 1;
3795 }
3796 
3797 // ////////////////////////////////////////////////////////////////////////// //
3798 // utility and supporting functions for getting s16 samples
3799 enum PLAYBACK_MONO  = (1<<0);
3800 enum PLAYBACK_LEFT  = (1<<1);
3801 enum PLAYBACK_RIGHT = (1<<2);
3802 
3803 enum L = (PLAYBACK_LEFT |PLAYBACK_MONO);
3804 enum C = (PLAYBACK_LEFT |PLAYBACK_RIGHT|PLAYBACK_MONO);
3805 enum R = (PLAYBACK_RIGHT|PLAYBACK_MONO);
3806 
3807 immutable byte[6][7] channel_position = [
3808   [ 0 ],
3809   [ C ],
3810   [ L, R ],
3811   [ L, C, R ],
3812   [ L, R, L, R ],
3813   [ L, C, R, L, R ],
3814   [ L, C, R, L, R, C ],
3815 ];
3816 
3817 
3818 version(STB_VORBIS_NO_FAST_SCALED_FLOAT) {
3819   enum declfcvar(string name) = "{}";
3820   template FAST_SCALED_FLOAT_TO_INT(string x, string s) {
3821     static assert(s == "15");
3822     enum FAST_SCALED_FLOAT_TO_INT = q{import core.stdc.math : lrintf; int v = lrintf((${x})*32768.0f);}.cmacroFixVars!"x"(x);
3823   }
3824 } else {
3825   //k8: actually, this is only marginally faster than using `lrintf()`, but anyway...
3826   align(1) union float_conv {
3827   align(1):
3828     float f;
3829     int i;
3830   }
3831   enum declfcvar(string name) = "float_conv "~name~" = void;";
3832   static assert(float_conv.i.sizeof == 4 && float_conv.f.sizeof == 4);
3833   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
3834   //#define check_endianness()
3835   enum MAGIC(string SHIFT) = q{(1.5f*(1<<(23-${SHIFT}))+0.5f/(1<<${SHIFT}))}.cmacroFixVars!("SHIFT")(SHIFT);
3836   enum ADDEND(string SHIFT) = q{(((150-${SHIFT})<<23)+(1<<22))}.cmacroFixVars!("SHIFT")(SHIFT);
3837   enum FAST_SCALED_FLOAT_TO_INT(string x, string s) = q{temp.f = (${x})+${MAGIC}; int v = temp.i-${ADDEND};}
3838     .cmacroFixVars!("x", "s", "MAGIC", "ADDEND")(x, s, MAGIC!(s), ADDEND!(s));
3839 }
3840 
3841 private void copy_samples (short* dest, float* src, int len) {
3842   //check_endianness();
3843   mixin(declfcvar!"temp");
3844   foreach (immutable _; 0..len) {
3845     mixin(FAST_SCALED_FLOAT_TO_INT!("*src", "15"));
3846     if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3847     *dest++ = cast(short)v; //k8
3848     ++src;
3849   }
3850 }
3851 
3852 private void compute_samples (int mask, short* output, int num_c, float** data, int d_offset, int len) {
3853   import core.stdc.string : memset;
3854   enum BUFFER_SIZE = 32;
3855   float[BUFFER_SIZE] buffer;
3856   int n = BUFFER_SIZE;
3857   //check_endianness();
3858   mixin(declfcvar!"temp");
3859   for (uint o = 0; o < len; o += BUFFER_SIZE) {
3860     memset(buffer.ptr, 0, (buffer).sizeof);
3861     if (o+n > len) n = len-o;
3862     foreach (immutable j; 0..num_c) {
3863       if (channel_position[num_c].ptr[j]&mask) foreach (immutable i; 0..n) buffer.ptr[i] += data[j][d_offset+o+i];
3864     }
3865     foreach (immutable i; 0..n) {
3866       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3867       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3868       output[o+i] = cast(short)v; //k8
3869     }
3870   }
3871 }
3872 
3873 private void compute_stereo_samples (short* output, int num_c, float** data, int d_offset, int len) {
3874   import core.stdc.string : memset;
3875 
3876   enum BUFFER_SIZE = 32;
3877   float[BUFFER_SIZE] buffer;
3878   int n = BUFFER_SIZE>>1;
3879   // o is the offset in the source data
3880   //check_endianness();
3881   mixin(declfcvar!"temp");
3882   for (uint o = 0; o < len; o += BUFFER_SIZE>>1) {
3883     // o2 is the offset in the output data
3884     int o2 = o<<1;
3885     memset(buffer.ptr, 0, buffer.sizeof);
3886     if (o+n > len) n = len-o;
3887     foreach (immutable j; 0..num_c) {
3888       int m = channel_position[num_c].ptr[j]&(PLAYBACK_LEFT|PLAYBACK_RIGHT);
3889       if (m == (PLAYBACK_LEFT|PLAYBACK_RIGHT)) {
3890         foreach (immutable i; 0..n) {
3891           buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3892           buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3893         }
3894       } else if (m == PLAYBACK_LEFT) {
3895         foreach (immutable i; 0..n) buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3896       } else if (m == PLAYBACK_RIGHT) {
3897         foreach (immutable i; 0..n) buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3898       }
3899     }
3900     foreach (immutable i; 0..n<<1) {
3901       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3902       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3903       output[o2+i] = cast(short)v; //k8
3904     }
3905   }
3906 }
3907 
3908 private void convert_samples_short (int buf_c, short** buffer, int b_offset, int data_c, float** data, int d_offset, int samples) {
3909   import core.stdc.string : memset;
3910 
3911   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3912     immutable int[2][3] channel_selector = [ [0,0], [PLAYBACK_MONO,0], [PLAYBACK_LEFT, PLAYBACK_RIGHT] ];
3913     foreach (immutable i; 0..buf_c) compute_samples(channel_selector[buf_c].ptr[i], buffer[i]+b_offset, data_c, data, d_offset, samples);
3914   } else {
3915     int limit = (buf_c < data_c ? buf_c : data_c);
3916     foreach (immutable i; 0..limit) copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
3917     foreach (immutable i; limit..buf_c) memset(buffer[i]+b_offset, 0, short.sizeof*samples);
3918   }
3919 }
3920 
3921 private void convert_channels_short_interleaved (int buf_c, short* buffer, int data_c, float** data, int d_offset, int len) {
3922   //check_endianness();
3923   mixin(declfcvar!"temp");
3924   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3925     debug(stb_vorbis) assert(buf_c == 2);
3926     foreach (immutable i; 0..buf_c) compute_stereo_samples(buffer, data_c, data, d_offset, len);
3927   } else {
3928     int limit = (buf_c < data_c ? buf_c : data_c);
3929     foreach (immutable j; 0..len) {
3930       foreach (immutable i; 0..limit) {
3931         float f = data[i][d_offset+j];
3932         mixin(FAST_SCALED_FLOAT_TO_INT!("f", "15"));//data[i][d_offset+j], 15);
3933         if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3934         *buffer++ = cast(short)v; //k8
3935       }
3936       foreach (immutable i; limit..buf_c) *buffer++ = 0;
3937     }
3938   }
3939 }
3940 } // @nogc
3941 
3942 
3943 public class VorbisDecoder {
3944   // return # of bytes read, 0 on eof, -1 on error
3945   // if called with `buf is null`, do `close()`
3946   alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3947 
3948   //TODO
3949   static struct Allocator {
3950   static nothrow @nogc: // because
3951     void* alloc (uint sz, VorbisDecoder vb) {
3952       import core.stdc.stdlib : malloc;
3953       return malloc(sz);
3954     }
3955     void free (void* p, VorbisDecoder vb) {
3956       import core.stdc.stdlib : free;
3957       free(p);
3958     }
3959     void* allocTemp (uint sz, VorbisDecoder vb) {
3960       import core.stdc.stdlib : malloc;
3961       return malloc(sz);
3962     }
3963     void freeTemp (void* p, uint sz, VorbisDecoder vb) {
3964       import core.stdc.stdlib : free;
3965       free(p);
3966     }
3967     uint tempSave (VorbisDecoder vb) { return 0; }
3968     void tempRestore (uint pos, VorbisDecoder vb) {}
3969   }
3970 
3971 nothrow @nogc:
3972 private:
3973   bool isOpened;
3974   readCB stmread;
3975   uint stlastofs = uint.max;
3976   uint stst;
3977   uint stpos;
3978   uint stend;
3979   bool stclose;
3980   FILE* stfl;
3981 
3982 private:
3983   //ubyte* stream;
3984   //ubyte* stream_start;
3985   //ubyte* stream_end;
3986   //uint stream_len;
3987 
3988   /+bool push_mode;+/
3989 
3990   uint first_audio_page_offset;
3991 
3992   ProbedPage p_first, p_last;
3993 
3994   // memory management
3995   Allocator alloc;
3996   int setup_offset;
3997   int temp_offset;
3998 
3999   // run-time results
4000   bool eof = true;
4001   STBVorbisError error;
4002 
4003   // header info
4004   int[2] blocksize;
4005   int blocksize_0, blocksize_1;
4006   int codebook_count;
4007   Codebook* codebooks;
4008   int floor_count;
4009   ushort[64] floor_types; // varies
4010   Floor* floor_config;
4011   int residue_count;
4012   ushort[64] residue_types; // varies
4013   Residue* residue_config;
4014   int mapping_count;
4015   Mapping* mapping;
4016   int mode_count;
4017   Mode[64] mode_config;  // varies
4018 
4019   uint total_samples;
4020 
4021   // decode buffer
4022   float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
4023   float*[STB_VORBIS_MAX_CHANNELS] outputs;
4024 
4025   float*[STB_VORBIS_MAX_CHANNELS] previous_window;
4026   int previous_length;
4027 
4028   version(STB_VORBIS_NO_DEFER_FLOOR) {
4029     float*[STB_VORBIS_MAX_CHANNELS] floor_buffers;
4030   } else {
4031     short*[STB_VORBIS_MAX_CHANNELS] finalY;
4032   }
4033 
4034   uint current_loc; // sample location of next frame to decode
4035   int current_loc_valid;
4036 
4037   // per-blocksize precomputed data
4038 
4039   // twiddle factors
4040   float*[2] A, B, C;
4041   float*[2] window;
4042   ushort*[2] bit_reverse;
4043 
4044   // current page/packet/segment streaming info
4045   uint serial; // stream serial number for verification
4046   int last_page;
4047   int segment_count;
4048   ubyte[255] segments;
4049   ubyte page_flag;
4050   ubyte bytes_in_seg;
4051   ubyte first_decode;
4052   int next_seg;
4053   int last_seg;  // flag that we're on the last segment
4054   int last_seg_which; // what was the segment number of the last seg?
4055   uint acc;
4056   int valid_bits;
4057   int packet_bytes;
4058   int end_seg_with_known_loc;
4059   uint known_loc_for_packet;
4060   int discard_samples_deferred;
4061   uint samples_output;
4062 
4063   // push mode scanning
4064   /+
4065   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
4066   CRCscan[STB_VORBIS_PUSHDATA_CRC_COUNT] scan;
4067   +/
4068 
4069   // sample-access
4070   int channel_buffer_start;
4071   int channel_buffer_end;
4072 
4073 private: // k8: 'cause i'm evil
4074   // user-accessible info
4075   uint sample_rate;
4076   int vrchannels;
4077 
4078   uint setup_memory_required;
4079   uint temp_memory_required;
4080   uint setup_temp_memory_required;
4081 
4082   bool read_comments;
4083   ubyte* comment_data;
4084   uint comment_size;
4085 
4086   // functions to get comment data
4087   uint comment_data_pos;
4088 
4089 private:
4090   int rawRead (void[] buf) {
4091     static if (__VERSION__ > 2067) pragma(inline, true);
4092     if (isOpened && buf.length > 0 && stpos < stend) {
4093       if (stend-stpos < buf.length) buf = buf[0..stend-stpos];
4094       auto rd = stmread(buf, stpos, this);
4095       if (rd > 0) stpos += rd;
4096       return rd;
4097     }
4098     return 0;
4099   }
4100   void rawSkip (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened && n > 0) { if ((stpos += n) > stend) stpos = stend; } }
4101   void rawSeek (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { stpos = stst+(n < 0 ? 0 : n); if (stpos > stend) stpos = stend; } }
4102   void rawClose () { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { isOpened = false; stmread(null, 0, this); } }
4103 
4104 final:
4105 private:
4106   void doInit () {
4107     import core.stdc.string : memset;
4108     /*
4109     if (z) {
4110       alloc = *z;
4111       alloc.alloc_buffer_length_in_bytes = (alloc.alloc_buffer_length_in_bytes+3)&~3;
4112       temp_offset = alloc.alloc_buffer_length_in_bytes;
4113     }
4114     */
4115     eof = false;
4116     error = STBVorbisError.no_error;
4117     /+stream = null;+/
4118     codebooks = null;
4119     /+page_crc_tests = -1;+/
4120   }
4121 
4122   static int stflRead (void[] buf, uint ofs, VorbisDecoder vb) {
4123     if (buf !is null) {
4124       //{ import core.stdc.stdio; printf("stflRead: ofs=%u; len=%u\n", ofs, cast(uint)buf.length); }
4125       if (vb.stlastofs != ofs) {
4126         import core.stdc.stdio : fseek, SEEK_SET;
4127         vb.stlastofs = ofs;
4128         fseek(vb.stfl, ofs, SEEK_SET);
4129       }
4130       import core.stdc.stdio : fread;
4131       return cast(int)fread(buf.ptr, 1, buf.length, vb.stfl);
4132     } else {
4133       if (vb.stclose) {
4134         import core.stdc.stdio : fclose;
4135         if (vb.stfl !is null) fclose(vb.stfl);
4136       }
4137       vb.stfl = null;
4138       return 0;
4139     }
4140   }
4141 
4142 public:
4143   this () {}
4144   ~this () { close(); }
4145 
4146   this (int asize, readCB rcb) {
4147   	assert(rcb !is null);
4148 	stend = (asize > 0 ? asize : 0);
4149 	stmread = rcb;
4150 	isOpened = true;
4151 	eof = false;
4152 	read_comments = true;
4153 	if (start_decoder(this)) {
4154 		vorbis_pump_first_frame(this);
4155 		return;
4156 	}
4157   }
4158   this (FILE* fl, bool doclose=true) { open(fl, doclose); }
4159   this (const(char)[] filename) { open(filename); }
4160 
4161   @property bool closed () { return !isOpened; }
4162 
4163   void open (FILE *fl, bool doclose=true) {
4164     import core.stdc.stdio : ftell, fseek, SEEK_SET, SEEK_END;
4165     close();
4166     if (fl is null) { error = STBVorbisError.invalid_stream; return; }
4167     stclose = doclose;
4168     stst = stpos = cast(uint)ftell(fl);
4169     fseek(fl, 0, SEEK_END);
4170     stend = cast(uint)ftell(fl);
4171     stlastofs = stlastofs.max;
4172     stclose = false;
4173     stfl = fl;
4174     import std.functional : toDelegate;
4175     stmread = toDelegate(&stflRead);
4176     isOpened = true;
4177     eof = false;
4178     read_comments = true;
4179     if (start_decoder(this)) {
4180       vorbis_pump_first_frame(this);
4181       return;
4182     }
4183     auto err = error;
4184     close();
4185     error = err;
4186   }
4187 
4188   void open (const(char)[] filename) {
4189     import core.stdc.stdio : fopen;
4190     import std.internal.cstring; // sorry
4191     close();
4192     FILE* fl = fopen(filename.tempCString, "rb");
4193     if (fl is null) { error = STBVorbisError.file_open_failure; return; }
4194     open(fl, true);
4195   }
4196 
4197   /+
4198   void openPushdata(void* data, int data_len, // the memory available for decoding
4199                     int* data_used)           // only defined on success
4200   {
4201     close();
4202     eof = false;
4203     stream = cast(ubyte*)data;
4204     stream_end = stream+data_len;
4205     push_mode = true;
4206     if (!start_decoder(this)) {
4207       auto err = error;
4208       if (eof) err = STBVorbisError.need_more_data; else close();
4209       error = err;
4210       return;
4211     }
4212     *data_used = stream-(cast(ubyte*)data);
4213     error = STBVorbisError.no_error;
4214   }
4215   +/
4216 
4217   void close () {
4218     import core.stdc.string : memset;
4219 
4220     setup_free(this, this.comment_data);
4221     if (this.residue_config) {
4222       foreach (immutable i; 0..this.residue_count) {
4223         Residue* r = this.residue_config+i;
4224         if (r.classdata) {
4225           foreach (immutable j; 0..this.codebooks[r.classbook].entries) setup_free(this, r.classdata[j]);
4226           setup_free(this, r.classdata);
4227         }
4228         setup_free(this, r.residue_books);
4229       }
4230     }
4231 
4232     if (this.codebooks) {
4233       foreach (immutable i; 0..this.codebook_count) {
4234         Codebook* c = this.codebooks+i;
4235         setup_free(this, c.codeword_lengths);
4236         setup_free(this, c.multiplicands);
4237         setup_free(this, c.codewords);
4238         setup_free(this, c.sorted_codewords);
4239         // c.sorted_values[-1] is the first entry in the array
4240         setup_free(this, c.sorted_values ? c.sorted_values-1 : null);
4241       }
4242       setup_free(this, this.codebooks);
4243     }
4244     setup_free(this, this.floor_config);
4245     setup_free(this, this.residue_config);
4246     if (this.mapping) {
4247       foreach (immutable i; 0..this.mapping_count) setup_free(this, this.mapping[i].chan);
4248       setup_free(this, this.mapping);
4249     }
4250     foreach (immutable i; 0..(this.vrchannels > STB_VORBIS_MAX_CHANNELS ? STB_VORBIS_MAX_CHANNELS : this.vrchannels)) {
4251       setup_free(this, this.channel_buffers.ptr[i]);
4252       setup_free(this, this.previous_window.ptr[i]);
4253       version(STB_VORBIS_NO_DEFER_FLOOR) setup_free(this, this.floor_buffers.ptr[i]);
4254       setup_free(this, this.finalY.ptr[i]);
4255     }
4256     foreach (immutable i; 0..2) {
4257       setup_free(this, this.A.ptr[i]);
4258       setup_free(this, this.B.ptr[i]);
4259       setup_free(this, this.C.ptr[i]);
4260       setup_free(this, this.window.ptr[i]);
4261       setup_free(this, this.bit_reverse.ptr[i]);
4262     }
4263 
4264     rawClose();
4265     isOpened = false;
4266     stmread = null;
4267     stlastofs = uint.max;
4268     stst = 0;
4269     stpos = 0;
4270     stend = 0;
4271     stclose = false;
4272     stfl = null;
4273 
4274     sample_rate = 0;
4275     vrchannels = 0;
4276 
4277     setup_memory_required = 0;
4278     temp_memory_required = 0;
4279     setup_temp_memory_required = 0;
4280 
4281     read_comments = 0;
4282     comment_data = null;
4283     comment_size = 0;
4284 
4285     comment_data_pos = 0;
4286 
4287     /+
4288     stream = null;
4289     stream_start = null;
4290     stream_end = null;
4291     +/
4292 
4293     //stream_len = 0;
4294 
4295     /+push_mode = false;+/
4296 
4297     first_audio_page_offset = 0;
4298 
4299     p_first = p_first.init;
4300     p_last = p_last.init;
4301 
4302     setup_offset = 0;
4303     temp_offset = 0;
4304 
4305     eof = true;
4306     error = STBVorbisError.no_error;
4307 
4308     blocksize[] = 0;
4309     blocksize_0 = 0;
4310     blocksize_1 = 0;
4311     codebook_count = 0;
4312     codebooks = null;
4313     floor_count = 0;
4314     floor_types[] = 0;
4315     floor_config = null;
4316     residue_count = 0;
4317     residue_types[] = 0;
4318     residue_config = null;
4319     mapping_count = 0;
4320     mapping = null;
4321     mode_count = 0;
4322     mode_config[] = Mode.init;
4323 
4324     total_samples = 0;
4325 
4326     channel_buffers[] = null;
4327     outputs[] = null;
4328 
4329     previous_window[] = null;
4330     previous_length = 0;
4331 
4332     version(STB_VORBIS_NO_DEFER_FLOOR) {
4333       floor_buffers[] = null;
4334     } else {
4335       finalY[] = null;
4336     }
4337 
4338     current_loc = 0;
4339     current_loc_valid = 0;
4340 
4341     A[] = null;
4342     B[] = null;
4343     C[] = null;
4344     window[] = null;
4345     bit_reverse = null;
4346 
4347     serial = 0;
4348     last_page = 0;
4349     segment_count = 0;
4350     segments[] = 0;
4351     page_flag = 0;
4352     bytes_in_seg = 0;
4353     first_decode = 0;
4354     next_seg = 0;
4355     last_seg = 0;
4356     last_seg_which = 0;
4357     acc = 0;
4358     valid_bits = 0;
4359     packet_bytes = 0;
4360     end_seg_with_known_loc = 0;
4361     known_loc_for_packet = 0;
4362     discard_samples_deferred = 0;
4363     samples_output = 0;
4364 
4365     /+
4366     page_crc_tests = -1;
4367     scan[] = CRCscan.init;
4368     +/
4369 
4370     channel_buffer_start = 0;
4371     channel_buffer_end = 0;
4372   }
4373 
4374   @property const pure {
4375     int getSampleOffset () { return (current_loc_valid ? current_loc : -1); }
4376 
4377     @property ubyte chans () { return (isOpened ? cast(ubyte)this.vrchannels : 0); }
4378     @property uint sampleRate () { return (isOpened ? this.sample_rate : 0); }
4379     @property uint maxFrameSize () { return (isOpened ? this.blocksize_1>>1 : 0); }
4380 
4381     @property uint getSetupMemoryRequired () { return (isOpened ? this.setup_memory_required : 0); }
4382     @property uint getSetupTempMemoryRequired () { return (isOpened ? this.setup_temp_memory_required : 0); }
4383     @property uint getTempMemoryRequired () { return (isOpened ? this.temp_memory_required : 0); }
4384   }
4385 
4386   // will clear last error
4387   @property int lastError () {
4388     int e = error;
4389     error = STBVorbisError.no_error;
4390     return e;
4391   }
4392 
4393   // PUSHDATA API
4394   /+
4395   void flushPushdata () {
4396     if (push_mode) {
4397       previous_length = 0;
4398       page_crc_tests = 0;
4399       discard_samples_deferred = 0;
4400       current_loc_valid = false;
4401       first_decode = false;
4402       samples_output = 0;
4403       channel_buffer_start = 0;
4404       channel_buffer_end = 0;
4405     }
4406   }
4407 
4408   // return value: number of bytes we used
4409   int decodeFramePushdata(
4410            void* data, int data_len, // the memory available for decoding
4411            int* channels,            // place to write number of float* buffers
4412            float*** output,          // place to write float** array of float* buffers
4413            int* samples              // place to write number of output samples
4414        )
4415   {
4416     if (!this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);
4417 
4418     if (this.page_crc_tests >= 0) {
4419       *samples = 0;
4420       return vorbis_search_for_page_pushdata(this, cast(ubyte*)data, data_len);
4421     }
4422 
4423     this.stream = cast(ubyte*)data;
4424     this.stream_end = this.stream+data_len;
4425     this.error = STBVorbisError.no_error;
4426 
4427     // check that we have the entire packet in memory
4428     if (!is_whole_packet_present(this, false)) {
4429       *samples = 0;
4430       return 0;
4431     }
4432 
4433     int len, left, right;
4434 
4435     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4436       // save the actual error we encountered
4437       STBVorbisError error = this.error;
4438       if (error == STBVorbisError.bad_packet_type) {
4439         // flush and resynch
4440         this.error = STBVorbisError.no_error;
4441         while (get8_packet(this) != EOP) if (this.eof) break;
4442         *samples = 0;
4443         return this.stream-data;
4444       }
4445       if (error == STBVorbisError.continued_packet_flag_invalid) {
4446         if (this.previous_length == 0) {
4447           // we may be resynching, in which case it's ok to hit one
4448           // of these; just discard the packet
4449           this.error = STBVorbisError.no_error;
4450           while (get8_packet(this) != EOP) if (this.eof) break;
4451           *samples = 0;
4452           return this.stream-data;
4453         }
4454       }
4455       // if we get an error while parsing, what to do?
4456       // well, it DEFINITELY won't work to continue from where we are!
4457       flushPushdata();
4458       // restore the error that actually made us bail
4459       this.error = error;
4460       *samples = 0;
4461       return 1;
4462     }
4463 
4464     // success!
4465     len = vorbis_finish_frame(this, len, left, right);
4466     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4467 
4468     if (channels) *channels = this.vrchannels;
4469     *samples = len;
4470     *output = this.outputs.ptr;
4471     return this.stream-data;
4472   }
4473   +/
4474 
4475   public uint fileOffset () {
4476     if (/+push_mode ||+/ !isOpened) return 0;
4477     /+if (stream !is null) return cast(uint)(stream-stream_start);+/
4478     return (stpos > stst ? stpos-stst : 0);
4479   }
4480 
4481   public uint stream_len () { return stend-stst; }
4482 
4483   // DATA-PULLING API
4484   public int seekFrame (uint sample_number) {
4485     uint max_frame_samples;
4486 
4487     /+if (this.push_mode) return -.error(this, STBVorbisError.invalid_api_mixing);+/
4488 
4489     // fast page-level search
4490     if (!seek_to_sample_coarse(this, sample_number)) return 0;
4491 
4492     assert(this.current_loc_valid);
4493     assert(this.current_loc <= sample_number);
4494 
4495     // linear search for the relevant packet
4496     max_frame_samples = (this.blocksize_1*3-this.blocksize_0)>>2;
4497     while (this.current_loc < sample_number) {
4498       int left_start, left_end, right_start, right_end, mode, frame_samples;
4499       if (!peek_decode_initial(this, &left_start, &left_end, &right_start, &right_end, &mode)) return .error(this, STBVorbisError.seek_failed);
4500       // calculate the number of samples returned by the next frame
4501       frame_samples = right_start-left_start;
4502       if (this.current_loc+frame_samples > sample_number) {
4503         return 1; // the next frame will contain the sample
4504       } else if (this.current_loc+frame_samples+max_frame_samples > sample_number) {
4505         // there's a chance the frame after this could contain the sample
4506         vorbis_pump_first_frame(this);
4507       } else {
4508         // this frame is too early to be relevant
4509         this.current_loc += frame_samples;
4510         this.previous_length = 0;
4511         maybe_start_packet(this);
4512         flush_packet(this);
4513       }
4514     }
4515     // the next frame will start with the sample
4516     assert(this.current_loc == sample_number);
4517     return 1;
4518   }
4519 
4520   public int seek (uint sample_number) {
4521     if (!seekFrame(sample_number)) return 0;
4522     if (sample_number != this.current_loc) {
4523       int n;
4524       uint frame_start = this.current_loc;
4525       getFrameFloat(&n, null);
4526       assert(sample_number > frame_start);
4527       assert(this.channel_buffer_start+cast(int)(sample_number-frame_start) <= this.channel_buffer_end);
4528       this.channel_buffer_start += (sample_number-frame_start);
4529     }
4530     return 1;
4531   }
4532 
4533   public bool seekStart () {
4534     /+if (push_mode) { .error(this, STBVorbisError.invalid_api_mixing); return; }+/
4535     set_file_offset(this, first_audio_page_offset);
4536     previous_length = 0;
4537     first_decode = true;
4538     next_seg = -1;
4539     return vorbis_pump_first_frame(this);
4540   }
4541 
4542   public uint streamLengthInSamples () {
4543     uint restore_offset, previous_safe;
4544     uint end, last_page_loc;
4545 
4546     /+if (this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4547     if (!this.total_samples) {
4548       uint last;
4549       uint lo, hi;
4550       char[6] header;
4551 
4552       // first, store the current decode position so we can restore it
4553       restore_offset = fileOffset;
4554 
4555       // now we want to seek back 64K from the end (the last page must
4556       // be at most a little less than 64K, but let's allow a little slop)
4557       if (this.stream_len >= 65536 && this.stream_len-65536 >= this.first_audio_page_offset) {
4558         previous_safe = this.stream_len-65536;
4559       } else {
4560         previous_safe = this.first_audio_page_offset;
4561       }
4562 
4563       set_file_offset(this, previous_safe);
4564       // previous_safe is now our candidate 'earliest known place that seeking
4565       // to will lead to the final page'
4566 
4567       if (!vorbis_find_page(this, &end, &last)) {
4568         // if we can't find a page, we're hosed!
4569         this.error = STBVorbisError.cant_find_last_page;
4570         this.total_samples = 0xffffffff;
4571         goto done;
4572       }
4573 
4574       // check if there are more pages
4575       last_page_loc = fileOffset;
4576 
4577       // stop when the last_page flag is set, not when we reach eof;
4578       // this allows us to stop short of a 'file_section' end without
4579       // explicitly checking the length of the section
4580       while (!last) {
4581         set_file_offset(this, end);
4582         if (!vorbis_find_page(this, &end, &last)) {
4583           // the last page we found didn't have the 'last page' flag set. whoops!
4584           break;
4585         }
4586         previous_safe = last_page_loc+1;
4587         last_page_loc = fileOffset;
4588       }
4589 
4590       set_file_offset(this, last_page_loc);
4591 
4592       // parse the header
4593       getn(this, cast(ubyte*)header, 6);
4594       // extract the absolute granule position
4595       lo = get32(this);
4596       hi = get32(this);
4597       if (lo == 0xffffffff && hi == 0xffffffff) {
4598         this.error = STBVorbisError.cant_find_last_page;
4599         this.total_samples = SAMPLE_unknown;
4600         goto done;
4601       }
4602       if (hi) lo = 0xfffffffe; // saturate
4603       this.total_samples = lo;
4604 
4605       this.p_last.page_start = last_page_loc;
4606       this.p_last.page_end = end;
4607       this.p_last.last_decoded_sample = lo;
4608 
4609      done:
4610       set_file_offset(this, restore_offset);
4611     }
4612     return (this.total_samples == SAMPLE_unknown ? 0 : this.total_samples);
4613   }
4614 
4615   public float streamLengthInSeconds () {
4616     return (isOpened ? streamLengthInSamples()/cast(float)sample_rate : 0.0f);
4617   }
4618 
4619   public int getFrameFloat (int* channels, float*** output) {
4620     int len, right, left;
4621     /+if (push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4622 
4623     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4624       channel_buffer_start = channel_buffer_end = 0;
4625       return 0;
4626     }
4627 
4628     len = vorbis_finish_frame(this, len, left, right);
4629     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4630 
4631     channel_buffer_start = left;
4632     channel_buffer_end = left+len;
4633 
4634     if (channels) *channels = this.vrchannels;
4635     if (output) *output = this.outputs.ptr;
4636     return len;
4637   }
4638 
4639   /+
4640   public VorbisDecoder stb_vorbis_open_memory (const(void)* data, int len, int* error=null, stb_vorbis_alloc* alloc=null) {
4641     VorbisDecoder this;
4642     stb_vorbis_ctx p = void;
4643     if (data is null) return null;
4644     vorbis_init(&p, alloc);
4645     p.stream = cast(ubyte*)data;
4646     p.stream_end = cast(ubyte*)data+len;
4647     p.stream_start = cast(ubyte*)p.stream;
4648     p.stream_len = len;
4649     p.push_mode = false;
4650     if (start_decoder(&p)) {
4651       this = vorbis_alloc(&p);
4652       if (this) {
4653         *this = p;
4654         vorbis_pump_first_frame(this);
4655         return this;
4656       }
4657     }
4658     if (error) *error = p.error;
4659     vorbis_deinit(&p);
4660     return null;
4661   }
4662   +/
4663 
4664   // s16 samples API
4665   int getFrameShort (int num_c, short** buffer, int num_samples) {
4666     float** output;
4667     int len = getFrameFloat(null, &output);
4668     if (len > num_samples) len = num_samples;
4669     if (len) convert_samples_short(num_c, buffer, 0, vrchannels, output, 0, len);
4670     return len;
4671   }
4672 
4673   int getFrameShortInterleaved (int num_c, short* buffer, int num_shorts) {
4674     float** output;
4675     int len;
4676     if (num_c == 1) return getFrameShort(num_c, &buffer, num_shorts);
4677     len = getFrameFloat(null, &output);
4678     if (len) {
4679       if (len*num_c > num_shorts) len = num_shorts/num_c;
4680       convert_channels_short_interleaved(num_c, buffer, vrchannels, output, 0, len);
4681     }
4682     return len;
4683   }
4684 
4685   int getSamplesShortInterleaved (int channels, short* buffer, int num_shorts) {
4686     float** outputs;
4687     int len = num_shorts/channels;
4688     int n = 0;
4689     int z = this.vrchannels;
4690     if (z > channels) z = channels;
4691     while (n < len) {
4692       int k = channel_buffer_end-channel_buffer_start;
4693       if (n+k >= len) k = len-n;
4694       if (k) convert_channels_short_interleaved(channels, buffer, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4695       buffer += k*channels;
4696       n += k;
4697       channel_buffer_start += k;
4698       if (n == len) break;
4699       if (!getFrameFloat(null, &outputs)) break;
4700     }
4701     return n;
4702   }
4703 
4704   int getSamplesShort (int channels, short** buffer, int len) {
4705     float** outputs;
4706     int n = 0;
4707     int z = this.vrchannels;
4708     if (z > channels) z = channels;
4709     while (n < len) {
4710       int k = channel_buffer_end-channel_buffer_start;
4711       if (n+k >= len) k = len-n;
4712       if (k) convert_samples_short(channels, buffer, n, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4713       n += k;
4714       channel_buffer_start += k;
4715       if (n == len) break;
4716       if (!getFrameFloat(null, &outputs)) break;
4717     }
4718     return n;
4719   }
4720 
4721   /+
4722   public int stb_vorbis_decode_filename (string filename, int* channels, int* sample_rate, short** output) {
4723     import core.stdc.stdlib : malloc, realloc;
4724 
4725     int data_len, offset, total, limit, error;
4726     short* data;
4727     VorbisDecoder v = stb_vorbis_open_filename(filename, &error, null);
4728     if (v is null) return -1;
4729     limit = v.vrchannels*4096;
4730     *channels = v.vrchannels;
4731     if (sample_rate) *sample_rate = v.sample_rate;
4732     offset = data_len = 0;
4733     total = limit;
4734     data = cast(short*)malloc(total*(*data).sizeof);
4735     if (data is null) {
4736       stb_vorbis_close(v);
4737       return -2;
4738     }
4739     for (;;) {
4740       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4741       if (n == 0) break;
4742       data_len += n;
4743       offset += n*v.vrchannels;
4744       if (offset+limit > total) {
4745         short *data2;
4746         total *= 2;
4747         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4748         if (data2 is null) {
4749           import core.stdc.stdlib : free;
4750           free(data);
4751           stb_vorbis_close(v);
4752           return -2;
4753         }
4754         data = data2;
4755       }
4756     }
4757     *output = data;
4758     stb_vorbis_close(v);
4759     return data_len;
4760   }
4761 
4762   public int stb_vorbis_decode_memory (const(void)* mem, int len, int* channels, int* sample_rate, short** output) {
4763     import core.stdc.stdlib : malloc, realloc;
4764 
4765     int data_len, offset, total, limit, error;
4766     short* data;
4767     VorbisDecoder v = stb_vorbis_open_memory(mem, len, &error, null);
4768     if (v is null) return -1;
4769     limit = v.vrchannels*4096;
4770     *channels = v.vrchannels;
4771     if (sample_rate) *sample_rate = v.sample_rate;
4772     offset = data_len = 0;
4773     total = limit;
4774     data = cast(short*)malloc(total*(*data).sizeof);
4775     if (data is null) {
4776       stb_vorbis_close(v);
4777       return -2;
4778     }
4779     for (;;) {
4780       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4781       if (n == 0) break;
4782       data_len += n;
4783       offset += n*v.vrchannels;
4784       if (offset+limit > total) {
4785         short *data2;
4786         total *= 2;
4787         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4788         if (data2 is null) {
4789           import core.stdc.stdlib : free;
4790           free(data);
4791           stb_vorbis_close(v);
4792           return -2;
4793         }
4794         data = data2;
4795       }
4796     }
4797     *output = data;
4798     stb_vorbis_close(v);
4799     return data_len;
4800   }
4801 
4802   public int stb_vorbis_get_samples_float_interleaved (VorbisDecoder this, int channels, float* buffer, int num_floats) {
4803     float** outputs;
4804     int len = num_floats/channels;
4805     int n = 0;
4806     int z = this.vrchannels;
4807     if (z > channels) z = channels;
4808     while (n < len) {
4809       int k = this.channel_buffer_end-this.channel_buffer_start;
4810       if (n+k >= len) k = len-n;
4811       foreach (immutable j; 0..k) {
4812         foreach (immutable i; 0..z) *buffer++ = (this.channel_buffers.ptr[i])[this.channel_buffer_start+j];
4813         foreach (immutable i; z..channels) *buffer++ = 0;
4814       }
4815       n += k;
4816       this.channel_buffer_start += k;
4817       if (n == len) break;
4818       if (!stb_vorbis_get_frame_float(this, null, &outputs)) break;
4819     }
4820     return n;
4821   }
4822   +/
4823 
4824   public int getSamplesFloat (int achans, float** buffer, int num_samples) {
4825     import core.stdc.string : memcpy, memset;
4826     float** outputs;
4827     int n = 0;
4828     int z = vrchannels;
4829     if (z > achans) z = achans;
4830     while (n < num_samples) {
4831       int k = channel_buffer_end-channel_buffer_start;
4832       if (n+k >= num_samples) k = num_samples-n;
4833       if (k) {
4834         foreach (immutable i; 0..z) memcpy(buffer[i]+n, channel_buffers.ptr[i]+channel_buffer_start, float.sizeof*k);
4835         foreach (immutable i; z..achans) memset(buffer[i]+n, 0, float.sizeof*k);
4836       }
4837       n += k;
4838       channel_buffer_start += k;
4839       if (n == num_samples) break;
4840       if (!getFrameFloat(null, &outputs)) break;
4841     }
4842     return n;
4843   }
4844 
4845 private: // k8: 'cause i'm evil
4846   private enum cmt_len_size = 2;
4847   nothrow /*@trusted*/ @nogc {
4848     public @property bool comment_empty () const pure { return (comment_get_line_len == 0); }
4849 
4850     // 0: error
4851     // includes length itself
4852     private uint comment_get_line_len () const pure {
4853       if (comment_data_pos >= comment_size) return 0;
4854       if (comment_size-comment_data_pos < cmt_len_size) return 0;
4855       uint len = comment_data[comment_data_pos];
4856       len += cast(uint)comment_data[comment_data_pos+1]<<8;
4857       return (len >= cmt_len_size && comment_data_pos+len <= comment_size ? len : 0);
4858     }
4859 
4860     public bool comment_rewind () {
4861       comment_data_pos = 0;
4862       for (;;) {
4863         auto len = comment_get_line_len();
4864         if (!len) { comment_data_pos = comment_size; return false; }
4865         if (len != cmt_len_size) return true;
4866         comment_data_pos += len;
4867       }
4868     }
4869 
4870     // true: has something to read after skip
4871     public bool comment_skip () {
4872       comment_data_pos += comment_get_line_len();
4873       for (;;) {
4874         auto len = comment_get_line_len();
4875         if (!len) { comment_data_pos = comment_size; return false; }
4876         if (len != cmt_len_size) break;
4877         comment_data_pos += len;
4878       }
4879       return true;
4880     }
4881 
4882     public const(char)[] comment_line () {
4883       auto len = comment_get_line_len();
4884       if (len < cmt_len_size) return null;
4885       if (len == cmt_len_size) return "";
4886       return (cast(char*)comment_data+comment_data_pos+cmt_len_size)[0..len-cmt_len_size];
4887     }
4888 
4889     public const(char)[] comment_name () {
4890       auto line = comment_line();
4891       if (line.length == 0) return line;
4892       uint epos = 0;
4893       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4894       return (epos < line.length ? line[0..epos] : "");
4895     }
4896 
4897     public const(char)[] comment_value () {
4898       auto line = comment_line();
4899       if (line.length == 0) return line;
4900       uint epos = 0;
4901       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4902       return (epos < line.length ? line[epos+1..$] : line);
4903     }
4904   }
4905 }
4906 
4907 
4908 // ////////////////////////////////////////////////////////////////////////// //
4909 private:
4910 // cool helper to translate C defines
4911 template cmacroFixVars(T...) {
4912   /**
4913    * 64-bit implementation of fasthash
4914    *
4915    * Params:
4916    *   buf =  data buffer
4917    *   seed = the seed
4918    *
4919    * Returns:
4920    *   32-bit or 64-bit hash
4921    */
4922   size_t hashOf (const(void)* buf, size_t len, size_t seed=0) pure nothrow @trusted @nogc {
4923     enum Get8Bytes = q{
4924       cast(ulong)data[0]|
4925       (cast(ulong)data[1]<<8)|
4926       (cast(ulong)data[2]<<16)|
4927       (cast(ulong)data[3]<<24)|
4928       (cast(ulong)data[4]<<32)|
4929       (cast(ulong)data[5]<<40)|
4930       (cast(ulong)data[6]<<48)|
4931       (cast(ulong)data[7]<<56)
4932     };
4933     enum m = 0x880355f21e6d1965UL;
4934     auto data = cast(const(ubyte)*)buf;
4935     ulong h = seed;
4936     ulong t;
4937     foreach (immutable _; 0..len/8) {
4938       version(HasUnalignedOps) {
4939         if (__ctfe) {
4940           t = mixin(Get8Bytes);
4941         } else {
4942           t = *cast(ulong*)data;
4943         }
4944       } else {
4945         t = mixin(Get8Bytes);
4946       }
4947       data += 8;
4948       t ^= t>>23;
4949       t *= 0x2127599bf4325c37UL;
4950       t ^= t>>47;
4951       h ^= t;
4952       h *= m;
4953     }
4954 
4955     h ^= len*m;
4956     t = 0;
4957     switch (len&7) {
4958       case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
4959       case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
4960       case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
4961       case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
4962       case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
4963       case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
4964       case 1: t ^= cast(ulong)data[0]; goto default;
4965       default:
4966         t ^= t>>23;
4967         t *= 0x2127599bf4325c37UL;
4968         t ^= t>>47;
4969         h ^= t;
4970         h *= m;
4971         break;
4972     }
4973 
4974     h ^= h>>23;
4975     h *= 0x2127599bf4325c37UL;
4976     h ^= h>>47;
4977     static if (size_t.sizeof == 4) {
4978       // 32-bit hash
4979       // the following trick converts the 64-bit hashcode to Fermat
4980       // residue, which shall retain information from both the higher
4981       // and lower parts of hashcode.
4982       return cast(size_t)(h-(h>>32));
4983     } else {
4984       return h;
4985     }
4986   }
4987 
4988   string cmacroFixVars (string s, string[] names...) {
4989     assert(T.length == names.length, "cmacroFixVars: names and arguments count mismatch");
4990     enum tmpPfxName = "__temp_prefix__";
4991     string res;
4992     string tmppfx;
4993     uint pos = 0;
4994     // skip empty lines (for pretty printing)
4995     // trim trailing spaces
4996     while (s.length > 0 && s[$-1] <= ' ') s = s[0..$-1];
4997     uint linestpos = 0; // start of the current line
4998     while (pos < s.length) {
4999       if (s[pos] > ' ') break;
5000       if (s[pos] == '\n') linestpos = pos+1;
5001       ++pos;
5002     }
5003     pos = linestpos;
5004     while (pos+2 < s.length) {
5005       int epos = pos;
5006       while (epos+2 < s.length && (s[epos] != '$' || s[epos+1] != '{')) ++epos;
5007       if (epos > pos) {
5008         if (s.length-epos < 3) break;
5009         res ~= s[pos..epos];
5010         pos = epos;
5011       }
5012       assert(s[pos] == '$' && s[pos+1] == '{');
5013       pos += 2;
5014       bool found = false;
5015       if (s.length-pos >= tmpPfxName.length+1 && s[pos+tmpPfxName.length] == '}' && s[pos..pos+tmpPfxName.length] == tmpPfxName) {
5016         if (tmppfx.length == 0) {
5017           // generate temporary prefix
5018           auto hash = hashOf(s.ptr, s.length);
5019           immutable char[16] hexChars = "0123456789abcdef";
5020           tmppfx = "_temp_macro_var_";
5021           foreach_reverse (immutable idx; 0..size_t.sizeof*2) {
5022             tmppfx ~= hexChars[hash&0x0f];
5023             hash >>= 4;
5024           }
5025           tmppfx ~= "_";
5026         }
5027         pos += tmpPfxName.length+1;
5028         res ~= tmppfx;
5029         found = true;
5030       } else {
5031         foreach (immutable nidx, string oname; T) {
5032           static assert(oname.length > 0);
5033           if (s.length-pos >= oname.length+1 && s[pos+oname.length] == '}' && s[pos..pos+oname.length] == oname) {
5034             found = true;
5035             pos += oname.length+1;
5036             res ~= names[nidx];
5037             break;
5038           }
5039         }
5040       }
5041       assert(found, "unknown variable in macro");
5042     }
5043     if (pos < s.length) res ~= s[pos..$];
5044     return res;
5045   }
5046 }
5047 
5048 // ////////////////////////////////////////////////////////////////////////// //
5049 /* Version history
5050     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5051     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5052                            avoid discarding last frame of audio data
5053     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5054                            some more crash fixes when out of memory or with corrupt files
5055     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5056                            some crash fixes when out of memory or with corrupt files
5057     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5058     1.04    - 2014/08/27 - fix missing const-correct case in API
5059     1.03    - 2014/08/07 - Warning fixes
5060     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5061     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5062     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5063                            (API change) report sample rate for decode-full-file funcs
5064     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5065     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5066     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5067     0.99993 - remove assert that fired on legal files with empty tables
5068     0.99992 - rewind-to-start
5069     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5070     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5071     0.9998 - add a full-decode function with a memory source
5072     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5073     0.9996 - query length of vorbis stream in samples/seconds
5074     0.9995 - bugfix to another optimization that only happened in certain files
5075     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5076     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5077     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5078     0.9991 - performance improvement of IMDCT
5079     0.999 - (should have been 0.9990) performance improvement of IMDCT
5080     0.998 - no-CRT support from Casey Muratori
5081     0.997 - bugfixes for bugs found by Terje Mathisen
5082     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5083     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5084     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5085     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5086     0.992 - fixes for MinGW warning
5087     0.991 - turn fast-float-conversion on by default
5088     0.990 - fix push-mode seek recovery if you seek into the headers
5089     0.98b - fix to bad release of 0.98
5090     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5091     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5092     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5093     0.95 - clamping code for 16-bit functions
5094     0.94 - not publically released
5095     0.93 - fixed all-zero-floor case (was decoding garbage)
5096     0.92 - fixed a memory leak
5097     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5098     0.90 - first public release
5099 */
5100 
5101 /*
5102 ------------------------------------------------------------------------------
5103 This software is available under 2 licenses -- choose whichever you prefer.
5104 ------------------------------------------------------------------------------
5105 ALTERNATIVE A - MIT License
5106 Copyright (c) 2017 Sean Barrett
5107 Permission is hereby granted, free of charge, to any person obtaining a copy of
5108 this software and associated documentation files (the "Software"), to deal in
5109 the Software without restriction, including without limitation the rights to
5110 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5111 of the Software, and to permit persons to whom the Software is furnished to do
5112 so, subject to the following conditions:
5113 The above copyright notice and this permission notice shall be included in all
5114 copies or substantial portions of the Software.
5115 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5116 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5117 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5118 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5119 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5120 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5121 SOFTWARE.
5122 ------------------------------------------------------------------------------
5123 ALTERNATIVE B - Public Domain (www.unlicense.org)
5124 This is free and unencumbered software released into the public domain.
5125 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5126 software, either in source code form or as a compiled binary, for any purpose,
5127 commercial or non-commercial, and by any means.
5128 In jurisdictions that recognize copyright laws, the author or authors of this
5129 software dedicate any and all copyright interest in the software to the public
5130 domain. We make this dedication for the benefit of the public at large and to
5131 the detriment of our heirs and successors. We intend this dedication to be an
5132 overt act of relinquishment in perpetuity of all present and future rights to
5133 this software under copyright law.
5134 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5135 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5136 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5137 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5138 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5139 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5140 ------------------------------------------------------------------------------
5141 */