1 // helper program is in ~me/encodings.d to make more tables from wikipedia 2 3 /** 4 This is meant to help get data from the wild into utf8 strings 5 so you can work with them easily inside D. 6 7 The main function is convertToUtf8(), which takes a byte array 8 of your raw data (a byte array because it isn't really a D string 9 yet until it is utf8), and a runtime string telling it's current 10 encoding. 11 12 The current encoding argument is meant to come from the data's 13 metadata, and is flexible on exact format - it is case insensitive 14 and takes several variations on the names. 15 16 This way, you should be able to send it the encoding string directly 17 from an XML document, a HTTP header, or whatever you have, and it 18 ought to just work. 19 20 Example: 21 --- 22 auto data = cast(immutable(ubyte)[]) 23 std.file.read("my-windows-file.txt"); 24 string utf8String = convertToUtf8(data, "windows-1252"); 25 // utf8String can now be used 26 --- 27 28 29 The encodings currently implemented for decoding are: 30 $(LIST 31 * UTF-8 (a no-op; it simply casts the array to string) 32 * UTF-16, 33 * UTF-32, 34 * Windows-1252, 35 * ISO 8859 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, and 16. 36 * KOI8-R 37 ) 38 39 It treats ISO 8859-1, Latin-1, and Windows-1252 the same way, since 40 those labels are pretty much de-facto the same thing in wild documents (people mislabel them a lot and I found it more useful to just deal with it than to be pedantic). 41 42 This module currently makes no attempt to look at control characters. 43 */ 44 module arsd.characterencodings; 45 46 import std.string; 47 import std.array; 48 import std.conv; 49 50 // FIXME: use replacement char here instead 51 52 /// Like convertToUtf8, but if the encoding is unknown, it just strips all chars > 127 and calls it done instead of throwing 53 string convertToUtf8Lossy(immutable(ubyte)[] data, string dataCharacterEncoding) { 54 try { 55 auto ret = convertToUtf8(data, dataCharacterEncoding); 56 import std.utf; 57 validate(ret); 58 return ret; 59 } catch(Exception e) { 60 string ret; 61 foreach(b; data) 62 if(b < 128) 63 ret ~= b; 64 else 65 ret ~= '\uFFFD'; 66 return ret; 67 } 68 } 69 70 /// Takes data from a given character encoding and returns it as UTF-8 71 string convertToUtf8(immutable(ubyte)[] data, string dataCharacterEncoding) { 72 // just to normalize the passed string... 73 auto encoding = dataCharacterEncoding.toLower(); 74 encoding = encoding.replace(" ", ""); 75 encoding = encoding.replace("-", ""); 76 encoding = encoding.replace("_", ""); 77 // should be good enough. 78 79 switch(encoding) { 80 default: 81 throw new Exception("I don't know how to convert " ~ dataCharacterEncoding ~ " to UTF-8"); 82 // since the input is immutable, these are ok too. 83 // just want to cover all the bases with one runtime function. 84 case "utf16": 85 case "utf16le": 86 return to!string(cast(wstring) data); 87 case "utf32": 88 case "utf32le": 89 return to!string(cast(dstring) data); 90 // FIXME: does the big endian to little endian conversion work? 91 case "ascii": 92 case "usascii": // utf-8 is a superset of ascii 93 case "utf8": 94 return cast(string) data; 95 // and now the various 8 bit encodings we support. 96 case "windows1252": 97 return decodeImpl(data, ISO_8859_1, Windows_1252); 98 case "windows1251": 99 return decodeImpl(data, Windows_1251, Windows_1251_Lower); 100 case "koi8r": 101 return decodeImpl(data, KOI8_R, KOI8_R_Lower); 102 case "latin1": 103 case "iso88591": 104 // Why am I putting Windows_1252 here? A lot of 105 // stuff in the wild is mislabeled, so this will 106 // do some good in the Just Works department. 107 // Regardless, I don't handle the 108 // control char set in that zone anyway right now. 109 return decodeImpl(data, ISO_8859_1, Windows_1252); 110 case "iso88592": 111 return decodeImpl(data, ISO_8859_2); 112 case "iso88593": 113 return decodeImpl(data, ISO_8859_3); 114 case "iso88594": 115 return decodeImpl(data, ISO_8859_4); 116 case "iso88595": 117 return decodeImpl(data, ISO_8859_5); 118 case "iso88596": 119 return decodeImpl(data, ISO_8859_6); 120 case "iso88597": 121 return decodeImpl(data, ISO_8859_7); 122 case "iso88598": 123 return decodeImpl(data, ISO_8859_8); 124 case "iso88599": 125 return decodeImpl(data, ISO_8859_9); 126 case "iso885910": 127 return decodeImpl(data, ISO_8859_10); 128 case "iso885911": 129 return decodeImpl(data, ISO_8859_11); 130 case "iso885913": 131 return decodeImpl(data, ISO_8859_13); 132 case "iso885914": 133 return decodeImpl(data, ISO_8859_14); 134 case "iso885915": 135 return decodeImpl(data, ISO_8859_15); 136 case "iso885916": 137 return decodeImpl(data, ISO_8859_16); 138 } 139 140 assert(0); 141 } 142 143 /// Tries to determine the current encoding based on the content. 144 /// Only really helps with the UTF variants. 145 /// Returns null if it can't be reasonably sure. 146 string tryToDetermineEncoding(in ubyte[] rawdata) { 147 import std.utf; 148 try { 149 validate!string(cast(string) rawdata); 150 // the odds of non stuff validating as utf-8 are pretty low 151 return "UTF-8"; 152 } catch(UTFException t) { 153 // it's definitely not UTF-8! 154 // we'll look at the first few characters. If there's a 155 // BOM, it's probably UTF-16 or UTF-32 156 157 if(rawdata.length > 4) { 158 // not checking for utf8 bom; if it was that, we 159 // wouldn't be here. 160 if(rawdata[0] == 0xff && rawdata[1] == 0xfe) 161 return "UTF-16 LE"; 162 else if(rawdata[0] == 0xfe && rawdata[1] == 0xff) 163 return "UTF-16 BE"; 164 else if(rawdata[0] == 0x00 && rawdata[1] == 0x00 165 && rawdata[2] == 0xfe && rawdata[3] == 0xff) 166 return "UTF-32 BE"; 167 else if(rawdata[0] == 0xff && rawdata[1] == 0xfe 168 && rawdata[2] == 0x00 && rawdata[3] == 0x00) 169 return "UTF-32 LE"; 170 else { 171 // this space is intentionally left blank 172 } 173 } 174 } 175 176 // we don't know with enough confidence. The app will have to find another way. 177 return null; 178 } 179 180 // this function actually does the work, using the translation tables 181 // below. 182 string decodeImpl(in ubyte[] data, in dchar[] chars160to255, in dchar[] chars128to159 = null, in dchar[] chars0to127 = null) 183 in { 184 assert(chars160to255.length == 256 - 160); 185 assert(chars128to159 is null || chars128to159.length == 160 - 128); 186 assert(chars0to127 is null || chars0to127.length == 128 - 0); 187 } 188 out(ret) { 189 import std.utf; 190 validate(ret); 191 } 192 do { 193 string utf8; 194 195 /// I'm sure this could be a lot more efficient, but whatever, it 196 /// works. 197 foreach(octet; data) { 198 if(octet < 128) { 199 if(chars0to127 !is null) 200 utf8 ~= chars0to127[octet]; 201 else 202 utf8 ~= cast(char) octet; // ascii is the same 203 } else if(octet < 160) { 204 if(chars128to159 !is null) 205 utf8 ~= chars128to159[octet - 128]; 206 else 207 utf8 ~= " "; 208 } else { 209 utf8 ~= chars160to255[octet - 160]; 210 } 211 } 212 213 return utf8; 214 } 215 216 217 // Here come the translation tables. 218 219 // this table gives characters for decimal 128 through 159. 220 // the < 128 characters are the same as ascii, and > 159 the same as 221 // iso 8859 1, seen below. 222 immutable dchar[] Windows_1252 = [ 223 '€', ' ', '‚', 'ƒ', '„', '…', '†', '‡', 224 'ˆ', '‰', 'Š', '‹', 'Œ', ' ', 'Ž', ' ', 225 ' ', '‘', '’', '“', '”', '•', '–', '—', 226 '˜', '™', 'š', '›', 'œ', ' ', 'ž', 'Ÿ']; 227 228 // the following tables give the characters from decimal 160 up to 255 229 // in the given encodings. 230 231 immutable dchar[] ISO_8859_1 = [ 232 ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', 233 '¨', '©', 'ª', '«', '¬', '', '®', '¯', 234 '°', '±', '²', '³', '´', 'µ', '¶', '·', 235 '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 236 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 237 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 238 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 239 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 240 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 241 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 242 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 243 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ']; 244 245 immutable dchar[] ISO_8859_2 = [ 246 ' ', 'Ą', '˘', 'Ł', '¤', 'Ľ', 'Ś', '§', 247 '¨', 'Š', 'Ş', 'Ť', 'Ź', '', 'Ž', 'Ż', 248 '°', 'ą', '˛', 'ł', '´', 'ľ', 'ś', 'ˇ', 249 '¸', 'š', 'ş', 'ť', 'ź', '˝', 'ž', 'ż', 250 'Ŕ', 'Á', 'Â', 'Ă', 'Ä', 'Ĺ', 'Ć', 'Ç', 251 'Č', 'É', 'Ę', 'Ë', 'Ě', 'Í', 'Î', 'Ď', 252 'Đ', 'Ń', 'Ň', 'Ó', 'Ô', 'Ő', 'Ö', '×', 253 'Ř', 'Ů', 'Ú', 'Ű', 'Ü', 'Ý', 'Ţ', 'ß', 254 'ŕ', 'á', 'â', 'ă', 'ä', 'ĺ', 'ć', 'ç', 255 'č', 'é', 'ę', 'ë', 'ě', 'í', 'î', 'ď', 256 'đ', 'ń', 'ň', 'ó', 'ô', 'ő', 'ö', '÷', 257 'ř', 'ů', 'ú', 'ű', 'ü', 'ý', 'ţ', '˙']; 258 259 immutable dchar[] ISO_8859_3 = [ 260 ' ', 'Ħ', '˘', '£', '¤', ' ', 'Ĥ', '§', 261 '¨', 'İ', 'Ş', 'Ğ', 'Ĵ', '', ' ', 'Ż', 262 '°', 'ħ', '²', '³', '´', 'µ', 'ĥ', '·', 263 '¸', 'ı', 'ş', 'ğ', 'ĵ', '½', ' ', 'ż', 264 'À', 'Á', 'Â', ' ', 'Ä', 'Ċ', 'Ĉ', 'Ç', 265 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 266 ' ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ġ', 'Ö', '×', 267 'Ĝ', 'Ù', 'Ú', 'Û', 'Ü', 'Ŭ', 'Ŝ', 'ß', 268 'à', 'á', 'â', ' ', 'ä', 'ċ', 'ĉ', 'ç', 269 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 270 ' ', 'ñ', 'ò', 'ó', 'ô', 'ġ', 'ö', '÷', 271 'ĝ', 'ù', 'ú', 'û', 'ü', 'ŭ', 'ŝ', '˙']; 272 273 immutable dchar[] ISO_8859_4 = [ 274 ' ', 'Ą', 'ĸ', 'Ŗ', '¤', 'Ĩ', 'Ļ', '§', 275 '¨', 'Š', 'Ē', 'Ģ', 'Ŧ', '', 'Ž', '¯', 276 '°', 'ą', '˛', 'ŗ', '´', 'ĩ', 'ļ', 'ˇ', 277 '¸', 'š', 'ē', 'ģ', 'ŧ', 'Ŋ', 'ž', 'ŋ', 278 'Ā', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Į', 279 'Č', 'É', 'Ę', 'Ë', 'Ė', 'Í', 'Î', 'Ī', 280 'Đ', 'Ņ', 'Ō', 'Ķ', 'Ô', 'Õ', 'Ö', '×', 281 'Ø', 'Ų', 'Ú', 'Û', 'Ü', 'Ũ', 'Ū', 'ß', 282 'ā', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'į', 283 'č', 'é', 'ę', 'ë', 'ė', 'í', 'î', 'ī', 284 'đ', 'ņ', 'ō', 'ķ', 'ô', 'õ', 'ö', '÷', 285 'ø', 'ų', 'ú', 'û', 'ü', 'ũ', 'ū', '˙']; 286 287 immutable dchar[] ISO_8859_5 = [ 288 ' ', 'Ё', 'Ђ', 'Ѓ', 'Є', 'Ѕ', 'І', 'Ї', 289 'Ј', 'Љ', 'Њ', 'Ћ', 'Ќ', '', 'Ў', 'Џ', 290 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 291 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 292 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 293 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 294 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 295 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 296 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 297 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я', 298 '№', 'ё', 'ђ', 'ѓ', 'є', 'ѕ', 'і', 'ї', 299 'ј', 'љ', 'њ', 'ћ', 'ќ', '§', 'ў', 'џ']; 300 301 immutable dchar[] ISO_8859_6 = [ 302 ' ', ' ', ' ', ' ', '¤', ' ', ' ', ' ', 303 ' ', ' ', ' ', ' ', '،', '', ' ', ' ', 304 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 305 ' ', ' ', ' ', '؛', ' ', ' ', ' ', '؟', 306 ' ', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 307 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 308 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 309 'ظ', 'ع', 'غ', ' ', ' ', ' ', ' ', ' ', 310 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 311 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 312 'ِ', 'ّ', 'ْ', ' ', ' ', ' ', ' ', ' ', 313 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']; 314 315 immutable dchar[] ISO_8859_7 = [ 316 ' ', '‘', '’', '£', '€', '₯', '¦', '§', 317 '¨', '©', 'ͺ', '«', '¬', '', ' ', '―', 318 '°', '±', '²', '³', '΄', '΅', 'Ά', '·', 319 'Έ', 'Ή', 'Ί', '»', 'Ό', '½', 'Ύ', 'Ώ', 320 'ΐ', 'Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 321 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 322 'Π', 'Ρ', ' ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 323 'Ψ', 'Ω', 'Ϊ', 'Ϋ', 'ά', 'έ', 'ή', 'ί', 324 'ΰ', 'α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 325 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 326 'π', 'ρ', 'ς', 'σ', 'τ', 'υ', 'φ', 'χ', 327 'ψ', 'ω', 'ϊ', 'ϋ', 'ό', 'ύ', 'ώ', ' ']; 328 329 immutable dchar[] ISO_8859_8 = [ 330 ' ', ' ', '¢', '£', '¤', '¥', '¦', '§', 331 '¨', '©', '×', '«', '¬', '', '®', '¯', 332 '°', '±', '²', '³', '´', 'µ', '¶', '·', 333 '¸', '¹', '÷', '»', '¼', '½', '¾', ' ', 334 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 335 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 336 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 337 ' ', ' ', ' ', ' ', ' ', ' ', ' ', '‗', 338 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 339 'ט', 'י', 'ך', 'כ', 'ל', 'ם', 'מ', 'ן', 340 'נ', 'ס', 'ע', 'ף', 'פ', 'ץ', 'צ', 'ק', 341 // v v those are wrong 342 'ר', 'ש', 'ת', ' ', ' ', ' ', ' ', ' ']; // FIXME: those ones marked wrong are supposed to be left to right and right to left markers, not spaces. lol maybe it isn't wrong 343 344 immutable dchar[] ISO_8859_9 = [ 345 ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', 346 '¨', '©', 'ª', '«', '¬', '', '®', '¯', 347 '°', '±', '²', '³', '´', 'µ', '¶', '·', 348 '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 349 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 350 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 351 'Ğ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 352 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'İ', 'Ş', 'ß', 353 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 354 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 355 'ğ', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 356 'ø', 'ù', 'ú', 'û', 'ü', 'ı', 'ş', 'ÿ']; 357 358 immutable dchar[] ISO_8859_10 = [ 359 ' ', 'Ą', 'Ē', 'Ģ', 'Ī', 'Ĩ', 'Ķ', '§', 360 'Ļ', 'Đ', 'Š', 'Ŧ', 'Ž', '', 'Ū', 'Ŋ', 361 '°', 'ą', 'ē', 'ģ', 'ī', 'ĩ', 'ķ', '·', 362 'ļ', 'đ', 'š', 'ŧ', 'ž', '―', 'ū', 'ŋ', 363 'Ā', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Į', 364 'Č', 'É', 'Ę', 'Ë', 'Ė', 'Í', 'Î', 'Ï', 365 'Ð', 'Ņ', 'Ō', 'Ó', 'Ô', 'Õ', 'Ö', 'Ũ', 366 'Ø', 'Ų', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 367 'ā', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'į', 368 'č', 'é', 'ę', 'ë', 'ė', 'í', 'î', 'ï', 369 'ð', 'ņ', 'ō', 'ó', 'ô', 'õ', 'ö', 'ũ', 370 'ø', 'ų', 'ú', 'û', 'ü', 'ý', 'þ', 'ĸ']; 371 372 immutable dchar[] ISO_8859_11 = [ 373 ' ', 'ก', 'ข', 'ฃ', 'ค', 'ฅ', 'ฆ', 'ง', 374 'จ', 'ฉ', 'ช', 'ซ', 'ฌ', 'ญ', 'ฎ', 'ฏ', 375 'ฐ', 'ฑ', 'ฒ', 'ณ', 'ด', 'ต', 'ถ', 'ท', 376 'ธ', 'น', 'บ', 'ป', 'ผ', 'ฝ', 'พ', 'ฟ', 377 'ภ', 'ม', 'ย', 'ร', 'ฤ', 'ล', 'ฦ', 'ว', 378 'ศ', 'ษ', 'ส', 'ห', 'ฬ', 'อ', 'ฮ', 'ฯ', 379 'ะ', 'ั', 'า', 'ำ', 'ิ', 'ี', 'ึ', 'ื', 380 'ุ', 'ู', 'ฺ', ' ', ' ', ' ', ' ', '฿', 381 'เ', 'แ', 'โ', 'ใ', 'ไ', 'ๅ', 'ๆ', '็', 382 '่', '้', '๊', '๋', '์', 'ํ', '๎', '๏', 383 '๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', 384 '๘', '๙', '๚', '๛', ' ', ' ', ' ', ' ']; 385 386 immutable dchar[] ISO_8859_13 = [ 387 ' ', '”', '¢', '£', '¤', '„', '¦', '§', 388 'Ø', '©', 'Ŗ', '«', '¬', '', '®', 'Æ', 389 '°', '±', '²', '³', '“', 'µ', '¶', '·', 390 'ø', '¹', 'ŗ', '»', '¼', '½', '¾', 'æ', 391 'Ą', 'Į', 'Ā', 'Ć', 'Ä', 'Å', 'Ę', 'Ē', 392 'Č', 'É', 'Ź', 'Ė', 'Ģ', 'Ķ', 'Ī', 'Ļ', 393 'Š', 'Ń', 'Ņ', 'Ó', 'Ō', 'Ő', 'Ö', '×', 394 'Ų', 'Ł', 'Ś', 'Ū', 'Ü', 'Ż', 'Ž', 'ß', 395 'ą', 'į', 'ā', 'ć', 'ä', 'å', 'ę', 'ē', 396 'č', 'é', 'ź', 'ė', 'ģ', 'ķ', 'ī', 'ļ', 397 'š', 'ń', 'ņ', 'ó', 'ō', 'ő', 'ö', '÷', 398 'ų', 'ł', 'ś', 'ū', 'ü', 'ż', 'ž', '’']; 399 400 immutable dchar[] ISO_8859_14 = [ 401 ' ', 'Ḃ', 'ḃ', '£', 'Ċ', 'ċ', 'Ḋ', '§', 402 'Ẁ', '©', 'Ẃ', 'ḋ', 'Ỳ', '', '®', 'Ÿ', 403 'Ḟ', 'ḟ', 'Ġ', 'ġ', 'Ṁ', 'ṁ', '¶', 'Ṗ', 404 'ẁ', 'ṗ', 'ẃ', 'Ṡ', 'ỳ', 'Ẅ', 'ẅ', 'ṡ', 405 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 406 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 407 'Ŵ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', 'Ṫ', 408 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Ŷ', 'ß', 409 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 410 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 411 'ŵ', 'ñ', 'ò', 'ó', 'ô', 'ő', 'ö', 'ṫ', 412 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ŷ', 'ÿ']; 413 414 immutable dchar[] ISO_8859_15 = [ 415 ' ', '¡', '¢', '£', '€', '¥', 'Š', '§', 416 'š', '©', 'ª', '«', '¬', '', '®', '¯', 417 '°', '±', '²', '³', 'Ž', 'µ', '¶', '·', 418 'ž', '¹', 'º', '»', 'Œ', 'œ', 'Ÿ', '¿', 419 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 420 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 421 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', '×', 422 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 423 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 424 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 425 'ð', 'ñ', 'ò', 'ó', 'ô', 'ő', 'ö', '÷', 426 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ']; 427 428 immutable dchar[] ISO_8859_16 = [ 429 ' ', 'Ą', 'ą', 'Ł', '€', '„', 'Š', '§', 430 'š', '©', 'Ș', '«', 'Ź', '', 'ź', 'Ż', 431 '°', '±', 'Č', 'ł', 'Ž', '”', '¶', '·', 432 'ž', 'č', 'ș', '»', 'Œ', 'œ', 'Ÿ', 'ż', 433 'À', 'Á', 'Â', 'Ă', 'Ä', 'Ć', 'Æ', 'Ç', 434 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 435 'Ð', 'Ń', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', 'Ś', 436 'Ű', 'Ù', 'Ú', 'Û', 'Ü', 'Ę', 'Ț', 'ß', 437 'à', 'á', 'â', 'ă', 'ä', 'ć', 'æ', 'ç', 438 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 439 'đ', 'ń', 'ò', 'ó', 'ô', 'ő', 'ö', 'ś', 440 'ű', 'ù', 'ú', 'û', 'ü', 'ę', 'ț', 'ÿ']; 441 442 immutable dchar[] KOI8_R_Lower = [ 443 '─', '│', '┌', '┐', '└', '┘', '├', '┤', 444 '┬', '┴', '┼', '▀', '▄', '█', '▌', '▐', 445 '░', '▒', '▓', '⌠', '■', '∙', '√', '≈', 446 '≤', '≥', '\u00a0', '⌡', '°', '²', '·', '÷']; 447 448 immutable dchar[] KOI8_R = [ 449 '═', '║', '╒', 'ё', '╓', '╔', '╕', '╖', 450 '╗', '╘', '╙', '╚', '╛', '╜', '╝', '╞', 451 '╟', '╠', '╡', 'ё', '╢', '╣', '╤', '╥', 452 '╦', '╧', '╨', '╩', '╪', '╫', '╬', '©', 453 'ю', 'а', 'б', 'ц', 'д', 'е', 'ф', 'г', 454 'х', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 455 'п', 'я', 'р', 'с', 'т', 'у', 'ж', 'в', 456 'ь', 'ы', 'з', 'ш', 'э', 'щ', 'ч', 'ъ', 457 'ю', 'а', 'б', 'ц', 'д', 'е', 'ф', 'г', 458 'х', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 459 'п', 'я', 'р', 'с', 'т', 'у', 'ж', 'в', 460 'ь', 'ы', 'з', 'ш', 'э', 'щ', 'ч', 'ъ']; 461 462 immutable dchar[] Windows_1251_Lower = [ 463 'Ђ', 'Ѓ', '‚', 'ѓ', '„', '…', '†', '‡', 464 '€', '‰', 'Љ', '‹', 'Њ', 'Ќ', 'Ћ', 'Џ', 465 'ђ', '‘', '’', '“', '”', '•', '–', '—', 466 ' ', '™', 'љ', '›', 'њ', 'ќ', 'ћ', 'џ']; 467 468 immutable dchar[] Windows_1251 = [ 469 ' ', 'Ў', 'ў', 'Ј', '¤', 'Ґ', '¦', '§', 470 'Ё', '©', 'Є', '«', '¬', '', '®', 'Ї', 471 '°', '±', 'І', 'і', 'ґ', 'µ', '¶', '·', 472 'ё', '№', 'є', '»', 'ј', 'Ѕ', 'ѕ', 'ї', 473 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 474 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 475 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 476 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 477 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 478 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 479 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 480 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я']; 481