1 /++ 2 Create MIME emails with things like HTML, attachments, and send with convenience wrappers around std.net.curl's SMTP function, or read email from an mbox file. 3 4 For preparing and sending outgoing email, see [EmailMessage]. For processing incoming email or opening .eml files, mbox files, etc., see [IncomingEmailMessage]. 5 6 History: 7 Originally released as open source on August 11, 2012. The last-modified date of its predecessor file was January 2011. 8 9 Many of the public string members were overhauled on May 13, 2024. Compatibility methods are provided so your code will hopefully still work, but this also results in some stricter adherence to email encoding rules, so you should retest if you update after then. 10 11 Future_Directions: 12 I might merge `IncomingEmailMessage` and `EmailMessage` some day, it seems silly to have them completely separate like this. 13 +/ 14 module arsd.email; 15 16 import std.net.curl; 17 18 import std.base64; 19 import std.string; 20 import std.range; 21 import std.utf; 22 import std.array; 23 import std.algorithm.iteration; 24 25 import arsd.characterencodings; 26 27 public import arsd.core : FilePath; 28 29 // import std.uuid; 30 // smtpMessageBoundary = randomUUID().toString(); 31 32 // SEE ALSO: std.net.curl.SMTP 33 34 /++ 35 Credentials for a SMTP relay, as passed to [std.net.curl.SMTP]. 36 +/ 37 struct RelayInfo { 38 /++ 39 Should be as a url, such as `smtp://example.com` or `smtps://example.com`. You normally want smtp:// - even if you want TLS encryption, smtp uses STARTTLS so it gets that. smtps will only work if the server supports tls from the start, which is not always the case. 40 +/ 41 string server; 42 string username; /// 43 string password; /// 44 } 45 46 /++ 47 Representation of an email attachment. 48 +/ 49 struct MimeAttachment { 50 string type; /// e.g. `text/plain` 51 string filename; /// 52 const(ubyte)[] content; /// 53 string id; /// 54 } 55 56 /// 57 enum ToType { 58 to, 59 cc, 60 bcc 61 } 62 63 /++ 64 Structured representation of email users, including the name and email address as separate components. 65 66 `EmailRecipient` represents a single user, and `RecipientList` represents multiple users. A "recipient" may also be a from or reply to address. 67 68 69 `RecipientList` is a wrapper over `EmailRecipient[]` that provides overloads that take string arguments, for compatibility for users of previous versions of the `arsd.email` api. It should generally work as you expect if you just pretend it is a normal array though (and if it doesn't, you can get the internal array via the `recipients` member.) 70 71 History: 72 Added May 13, 2024 (dub v12.0) to replace the old plain, public strings and arrays of strings. 73 +/ 74 struct EmailRecipient { 75 /++ 76 The email user's name. It should not have quotes or any other encoding. 77 78 For example, `Adam D. Ruppe`. 79 +/ 80 string name; 81 /++ 82 The email address. It should not have brackets or any other encoding. 83 84 For example, `destructionator@gmail.com`. 85 +/ 86 string address; 87 88 /++ 89 Returns a string representing this email address, in a format suitable for inclusion in a message about to be saved or transmitted. 90 91 In many cases, this is easy to read for people too, but not in all cases. 92 +/ 93 string toProtocolString(string linesep = "\r\n") { 94 if(name.length) 95 return "\"" ~ encodeEmailHeaderContentForTransmit(name, linesep) ~ "\" <" ~ address ~ ">"; 96 return address; 97 } 98 99 /++ 100 Returns a string representing this email address, in a format suitable for being read by people. This is not necessarily reversible. 101 +/ 102 string toReadableString() { 103 if(name.length) 104 return "\"" ~ name ~ "\" <" ~ address ~ ">"; 105 return address; 106 } 107 108 /++ 109 Construct an `EmailRecipient` either from a name and address (preferred!) or from an encoded string as found in an email header. 110 111 Examples: 112 113 `EmailRecipient("Adam D. Ruppe", "destructionator@gmail.com")` or `EmailRecipient(`"Adam D. Ruppe" <destructionator@gmail.com>`); 114 +/ 115 this(string name, string address) { 116 this.name = name; 117 this.address = address; 118 } 119 120 /// ditto 121 this(string str) { 122 this = str; 123 } 124 125 /++ 126 Provided for compatibility for users of old versions of `arsd.email` - does implicit conversion from `EmailRecipient` to a plain string (in protocol format), as was present in previous versions of the api. 127 +/ 128 alias toProtocolString this; 129 130 /// ditto 131 void opAssign(string str) { 132 auto idx = str.indexOf("<"); 133 if(idx == -1) { 134 name = null; 135 address = str; 136 } else { 137 name = decodeEncodedWord(unquote(str[0 .. idx].strip)); 138 address = str[idx + 1 .. $ - 1]; 139 } 140 141 } 142 } 143 144 /// ditto 145 struct RecipientList { 146 EmailRecipient[] recipients; 147 148 void opAssign(string[] strings) { 149 recipients = null; 150 foreach(s; strings) 151 recipients ~= EmailRecipient(s); 152 } 153 void opAssign(EmailRecipient[] recpts) { 154 this.recipients = recpts; 155 } 156 157 void opOpAssign(string op : "~")(EmailRecipient r) { 158 recipients ~= r; 159 } 160 void opOpAssign(string op : "~")(string s) { 161 recipients ~= EmailRecipient(s); 162 } 163 int opApply(int delegate(size_t idx, EmailRecipient rcp) dg) { 164 foreach(idx, item; recipients) 165 if(auto result = dg(idx, item)) 166 return result; 167 return 0; 168 } 169 int opApply(int delegate(EmailRecipient rcp) dg) { 170 foreach(item; recipients) 171 if(auto result = dg(item)) 172 return result; 173 return 0; 174 } 175 176 size_t length() { 177 return recipients.length; 178 } 179 180 string toProtocolString(string linesep = "\r\n") { 181 string ret; 182 foreach(idx, item; recipients) { 183 if(idx) 184 ret ~= ", "; 185 ret ~= item.toProtocolString(linesep); 186 } 187 return ret; 188 } 189 190 EmailRecipient front() { return recipients[0]; } 191 void popFront() { recipients = recipients[1 .. $]; } 192 bool empty() { return recipients.length == 0; } 193 RecipientList save() { return this; } 194 } 195 196 private string unquote(string s) { 197 if(s.length == 0) 198 return s; 199 if(s[0] != '"') 200 return s; 201 s = s[1 .. $-1]; // strip the quotes 202 // FIXME: possible to have \" escapes in there too 203 return s; 204 } 205 206 private struct CaseInsensitiveString { 207 string actual; 208 209 size_t toHash() const { 210 string l = actual.toLower; 211 return typeid(string).getHash(&l); 212 } 213 bool opEquals(ref const typeof(this) s) const { 214 return icmp(s.actual, this.actual) == 0; 215 } 216 bool opEquals(string s) const { 217 return icmp(s, this.actual) == 0; 218 } 219 220 alias actual this; 221 } 222 223 /++ 224 A type that acts similarly to a `string[string]` to hold email headers in a case-insensitive way. 225 +/ 226 struct HeadersHash { 227 string[CaseInsensitiveString] hash; 228 229 string opIndex(string key) const { 230 return hash[CaseInsensitiveString(key)]; 231 } 232 string opIndexAssign(string value, string key) { 233 return hash[CaseInsensitiveString(key)] = value; 234 } 235 inout(string)* opBinaryRight(string op : "in")(string key) inout { 236 return CaseInsensitiveString(key) in hash; 237 } 238 alias hash this; 239 } 240 241 unittest { 242 HeadersHash h; 243 h["From"] = "test"; 244 h["from"] = "other"; 245 foreach(k, v; h) { 246 assert(k == "From"); 247 assert(v == "other"); 248 } 249 250 assert("from" in h); 251 assert("From" in h); 252 assert(h["from"] == "other"); 253 254 const(HeadersHash) ch = HeadersHash([CaseInsensitiveString("From") : "test"]); 255 assert(ch["from"] == "test"); 256 assert("From" in ch); 257 } 258 259 /++ 260 For OUTGOING email 261 262 263 To use: 264 265 --- 266 auto message = new EmailMessage(); 267 message.to ~= "someuser@example.com"; 268 message.from = "youremail@example.com"; 269 message.subject = "My Subject"; 270 message.setTextBody("hi there"); 271 //message.toString(); // get string to send externally 272 message.send(); // send via some relay 273 // may also set replyTo, etc 274 --- 275 276 History: 277 This class got an API overhaul on May 13, 2024. Some undocumented members were removed, and some public members got changed (albeit in a mostly compatible way). 278 +/ 279 class EmailMessage { 280 /++ 281 Adds a custom header to the message. The header name should not include a colon and must not duplicate a header set elsewhere in the class; for example, do not use this to set `To`, and instead use the [to] field. 282 283 Setting the same header multiple times will overwrite the old value. It will not set duplicate headers and does not retain the specific order of which you added headers. 284 285 History: 286 Prior to May 13, 2024, this assumed the value was previously encoded. This worked most the time but also left open the possibility of incorrectly encoded values, including the possibility of injecting inappropriate headers. 287 288 Since May 13, 2024, it now encodes the header content internally. You should NOT pass pre-encoded values to this function anymore. 289 290 It also would previously allow you to set repeated headers like `Subject` or `To`. These now throw exceptions. 291 292 It previously also allowed duplicate headers. Adding the same thing twice will now silently overwrite the old value instead. 293 +/ 294 void setHeader(string name, string value, string file = __FILE__, size_t line = __LINE__) { 295 import arsd.core; 296 if(name.length == 0) 297 throw new InvalidArgumentsException("name", "name cannot be an empty string", LimitedVariant(name), "setHeader", file, line); 298 if(name.indexOf(":") != -1) 299 throw new InvalidArgumentsException("name", "do not put a colon in the header name", LimitedVariant(name), "setHeader", file, line); 300 if(!headerSettableThroughAA(name)) 301 throw new InvalidArgumentsException("name", "use named methods/properties for this header instead of setHeader", LimitedVariant(name), "setHeader", file, line); 302 303 headers_[name] = value; 304 } 305 306 protected bool headerSettableThroughAA(string name) { 307 switch(name.toLower) { 308 case "to", "cc", "bcc": 309 case "from", "reply-to", "in-reply-to": 310 case "subject": 311 case "content-type", "content-transfer-encoding", "mime-version": 312 case "received", "return-path": // set by the MTA 313 return false; 314 default: 315 return true; 316 } 317 } 318 319 /++ 320 Recipients of the message. You can use operator `~=` to add people to this list, or you can also use [addRecipient] to achieve the same result. 321 322 --- 323 message.to ~= EmailRecipient("Adam D. Ruppe", "destructionator@gmail.com"); 324 message.cc ~= EmailRecipient("John Doe", "john.doe@example.com"); 325 // or, same result as the above two lines: 326 message.addRecipient("Adam D. Ruppe", "destructionator@gmail.com"); 327 message.addRecipient("John Doe", "john.doe@example.com", ToType.cc); 328 329 // or, the old style code that still works, but is not recommended, since 330 // it is harder to encode properly for anything except pure ascii names: 331 message.to ~= `"Adam D. Ruppe" <destructionator@gmail.com>` 332 --- 333 334 History: 335 On May 13, 2024, the types of these changed. Before, they were `public string[]`; plain string arrays. This put the burden of proper encoding on the user, increasing the probability of bugs. Now, they are [RecipientList]s - internally, an array of `EmailRecipient` objects, but with a wrapper to provide compatibility with the old string-based api. 336 +/ 337 RecipientList to; 338 /// ditto 339 RecipientList cc; 340 /// ditto 341 RecipientList bcc; 342 343 /++ 344 Represents the `From:` and `Reply-To:` header values in the email. 345 346 347 Note that the `from` member is the "From:" header, which is not necessarily the same as the "envelope from". The "envelope from" is set by the email server usually based on your login credentials. The email server may or may not require these to match. 348 349 History: 350 On May 13, 2024, the types of these changed from plain `string` to [EmailRecipient], to try to get the encoding easier to use correctly. `EmailRecipient` offers overloads for string parameters for compatibility, so your code should not need changing, however if you use non-ascii characters in your names, you should retest to ensure it still works correctly. 351 +/ 352 EmailRecipient from; 353 /// ditto 354 EmailRecipient replyTo; 355 /// The `Subject:` header value in the email. 356 string subject; 357 /// The `In-Reply-to:` header value. This should be set to the same value as the `Message-ID` header from the message you're replying to. 358 string inReplyTo; 359 360 private string textBody_; 361 private string htmlBody_; 362 363 private HeadersHash headers_; 364 365 /++ 366 Gets and sets the current text body. 367 368 History: 369 Prior to May 13, 2024, this was a simple `public string` member, but still had a [setTextBody] method too. It now is a public property that works through that method. 370 +/ 371 string textBody() { 372 return textBody_; 373 } 374 /// ditto 375 void textBody(string text) { 376 setTextBody(text); 377 } 378 /++ 379 Gets the current html body, if any. 380 381 There is no setter for this property, use [setHtmlBody] instead. 382 383 History: 384 Prior to May 13, 2024, this was a simple `public string` member. This let you easily get the `EmailMessage` object into an inconsistent state. 385 +/ 386 string htmlBody() { 387 return htmlBody_; 388 } 389 390 /++ 391 If you use the send method with an SMTP server, you don't want to change this. 392 While RFC 2045 mandates CRLF as a lineseperator, there are some edge-cases where this won't work. 393 When passing the E-Mail string to a unix program which handles communication with the SMTP server, some (i.e. qmail) 394 expect the system lineseperator (LF) instead. 395 Notably, the google mail REST API will choke on CRLF lineseps and produce strange emails (as of 2024). 396 397 Do not change this after calling other methods, since it might break presaved values. 398 +/ 399 string linesep = "\r\n"; 400 401 /++ 402 History: 403 Added May 13, 2024 404 +/ 405 this(string linesep = "\r\n") { 406 this.linesep = linesep; 407 } 408 409 private bool isMime = false; 410 private bool isHtml = false; 411 412 /// 413 void addRecipient(string name, string email, ToType how = ToType.to) { 414 addRecipient(`"`~name~`" <`~email~`>`, how); 415 } 416 417 /// 418 void addRecipient(string who, ToType how = ToType.to) { 419 final switch(how) { 420 case ToType.to: 421 to ~= who; 422 break; 423 case ToType.cc: 424 cc ~= who; 425 break; 426 case ToType.bcc: 427 bcc ~= who; 428 break; 429 } 430 } 431 432 /++ 433 Sets the plain text body of the email. You can also separately call [setHtmlBody] to set a HTML body. 434 +/ 435 void setTextBody(string text) { 436 textBody_ = text.strip; 437 } 438 /++ 439 Sets the HTML body to the mail, which can support rich text, inline images (see [addInlineImage]), etc. 440 441 Automatically sets a text fallback if you haven't already, unless you pass `false` as the `addFallback` template value. Adding the fallback requires [arsd.htmltotext]. 442 443 History: 444 The `addFallback` parameter was added on May 13, 2024. 445 +/ 446 void setHtmlBody(bool addFallback = true)(string html) { 447 isMime = true; 448 isHtml = true; 449 htmlBody_ = html; 450 451 static if(addFallback) { 452 import arsd.htmltotext; 453 if(textBody_ is null) 454 textBody_ = htmlToText(html); 455 } 456 } 457 458 const(MimeAttachment)[] attachments; 459 460 /++ 461 The attachmentFileName is what is shown to the user, not the file on your sending computer. It should NOT have a path in it. 462 If you want a filename from your computer, try [addFileAsAttachment]. 463 464 The `mimeType` can be excluded if the filename has a common extension supported by the library. 465 466 --- 467 message.addAttachment("text/plain", "something.txt", std.file.read("/path/to/local/something.txt")); 468 --- 469 470 History: 471 The overload without `mimeType` was added October 28, 2024. 472 473 The parameter `attachmentFileName` was previously called `filename`. This was changed for clarity and consistency with other overloads on October 28, 2024. 474 +/ 475 void addAttachment(string mimeType, string attachmentFileName, const void[] content, string id = null) { 476 isMime = true; 477 attachments ~= MimeAttachment(mimeType, attachmentFileName, cast(const(ubyte)[]) content, id); 478 } 479 480 481 /// ditto 482 void addAttachment(string attachmentFileName, const void[] content, string id = null) { 483 import arsd.core; 484 addAttachment(FilePath(attachmentFileName).contentTypeFromFileExtension, attachmentFileName, content, id); 485 } 486 487 /++ 488 Reads the local file and attaches it. 489 490 If `attachmentFileName` is null, it uses the filename of `localFileName`, without the directory. 491 492 If `mimeType` is null, it guesses one based on the local file name's file extension. 493 494 If these cannot be determined, it will throw an `InvalidArgumentsException`. 495 496 History: 497 Added October 28, 2024 498 +/ 499 void addFileAsAttachment(FilePath localFileName, string attachmentFileName = null, string mimeType = null, string id = null) { 500 if(mimeType is null) 501 mimeType = localFileName.contentTypeFromFileExtension; 502 if(attachmentFileName is null) 503 attachmentFileName = localFileName.filename; 504 505 import std.file; 506 507 addAttachment(mimeType, attachmentFileName, std.file.read(localFileName.toString()), id); 508 509 // see also: curl.h :1877 CURLOPT(CURLOPT_XOAUTH2_BEARER, CURLOPTTYPE_STRINGPOINT, 220), 510 // also option to force STARTTLS 511 } 512 513 /// in the html, use img src="cid:ID_GIVEN_HERE" 514 void addInlineImage(string id, string mimeType, string filename, const void[] content) { 515 assert(isHtml); 516 isMime = true; 517 inlineImages ~= MimeAttachment(mimeType, filename, cast(const(ubyte)[]) content, id); 518 } 519 520 const(MimeAttachment)[] inlineImages; 521 522 523 /* we should build out the mime thingy 524 related 525 mixed 526 alternate 527 */ 528 529 /// Returns the MIME formatted email string, including encoded attachments 530 override string toString() { 531 assert(!isHtml || (isHtml && isMime)); 532 533 string[] headers; 534 foreach(k, v; this.headers_) { 535 if(headerSettableThroughAA(k)) 536 headers ~= k ~ ": " ~ encodeEmailHeaderContentForTransmit(v, this.linesep); 537 } 538 539 if(to.length) 540 headers ~= "To: " ~ to.toProtocolString(this.linesep); 541 if(cc.length) 542 headers ~= "Cc: " ~ cc.toProtocolString(this.linesep); 543 544 if(from.length) 545 headers ~= "From: " ~ from.toProtocolString(this.linesep); 546 547 //assert(0, headers[$-1]); 548 549 if(subject !is null) 550 headers ~= "Subject: " ~ encodeEmailHeaderContentForTransmit(subject, this.linesep); 551 if(replyTo !is null) 552 headers ~= "Reply-To: " ~ replyTo.toProtocolString(this.linesep); 553 if(inReplyTo !is null) 554 headers ~= "In-Reply-To: " ~ encodeEmailHeaderContentForTransmit(inReplyTo, this.linesep); 555 556 if(isMime) 557 headers ~= "MIME-Version: 1.0"; 558 559 /+ 560 if(inlineImages.length) { 561 headers ~= "Content-Type: multipart/related; boundary=" ~ boundary; 562 // so we put the alternative inside asthe first attachment with as seconary boundary 563 // then we do the images 564 } else 565 if(attachments.length) 566 headers ~= "Content-Type: multipart/mixed; boundary=" ~ boundary; 567 else if(isHtml) 568 headers ~= "Content-Type: multipart/alternative; boundary=" ~ boundary; 569 else 570 headers ~= "Content-Type: text/plain; charset=UTF-8"; 571 +/ 572 573 574 string msgContent; 575 576 if(isMime) { 577 MimeContainer top; 578 579 { 580 MimeContainer mimeMessage; 581 enum NO_TRANSFER_ENCODING = "Content-Transfer-Encoding: 8bit"; 582 if(isHtml) { 583 auto alternative = new MimeContainer("multipart/alternative"); 584 alternative.stuff ~= new MimeContainer("text/plain; charset=UTF-8", textBody_).with_header(NO_TRANSFER_ENCODING); 585 alternative.stuff ~= new MimeContainer("text/html; charset=UTF-8", htmlBody_).with_header(NO_TRANSFER_ENCODING); 586 mimeMessage = alternative; 587 } else { 588 mimeMessage = new MimeContainer("text/plain; charset=UTF-8", textBody_).with_header(NO_TRANSFER_ENCODING); 589 } 590 top = mimeMessage; 591 } 592 593 { 594 MimeContainer mimeRelated; 595 if(inlineImages.length) { 596 mimeRelated = new MimeContainer("multipart/related"); 597 598 mimeRelated.stuff ~= top; 599 top = mimeRelated; 600 601 foreach(attachment; inlineImages) { 602 auto mimeAttachment = new MimeContainer(attachment.type ~ "; name=\""~attachment.filename~"\""); 603 mimeAttachment.headers ~= "Content-Transfer-Encoding: base64"; 604 mimeAttachment.headers ~= "Content-ID: <" ~ attachment.id ~ ">"; 605 mimeAttachment.content = encodeBase64Mime(cast(const(ubyte)[]) attachment.content, this.linesep); 606 607 mimeRelated.stuff ~= mimeAttachment; 608 } 609 } 610 } 611 612 { 613 MimeContainer mimeMixed; 614 if(attachments.length) { 615 mimeMixed = new MimeContainer("multipart/mixed"); 616 617 mimeMixed.stuff ~= top; 618 top = mimeMixed; 619 620 foreach(attachment; attachments) { 621 auto mimeAttachment = new MimeContainer(attachment.type); 622 mimeAttachment.headers ~= "Content-Disposition: attachment; filename=\""~encodeEmailHeaderContentForTransmit(attachment.filename, this.linesep)~"\""; 623 mimeAttachment.headers ~= "Content-Transfer-Encoding: base64"; 624 if(attachment.id.length) 625 mimeAttachment.headers ~= "Content-ID: <" ~ attachment.id ~ ">"; 626 627 mimeAttachment.content = encodeBase64Mime(cast(const(ubyte)[]) attachment.content, this.linesep); 628 629 mimeMixed.stuff ~= mimeAttachment; 630 } 631 } 632 } 633 634 headers ~= top.contentType; 635 msgContent = top.toMimeString(true, this.linesep); 636 } else { 637 headers ~= "Content-Type: text/plain; charset=UTF-8"; 638 msgContent = textBody_; 639 } 640 641 642 string msg; 643 msg.reserve(htmlBody_.length + textBody_.length + 1024); 644 645 foreach(header; headers) 646 msg ~= header ~ this.linesep; 647 if(msg.length) // has headers 648 msg ~= this.linesep; 649 650 msg ~= msgContent; 651 652 return msg; 653 } 654 655 /// Sends via a given SMTP relay 656 void send(RelayInfo mailServer = RelayInfo("smtp://localhost")) { 657 auto smtp = SMTP(mailServer.server); 658 659 smtp.verifyHost = false; 660 smtp.verifyPeer = false; 661 //smtp.verbose = true; 662 663 { 664 // std.net.curl doesn't work well with STARTTLS if you don't 665 // put smtps://... and if you do, it errors if you can't start 666 // with a TLS connection from the beginning. 667 668 // This change allows ssl if it can. 669 import std.net.curl; 670 import etc.c.curl; 671 smtp.handle.set(CurlOption.use_ssl, CurlUseSSL.tryssl); 672 } 673 674 if(mailServer.username.length) 675 smtp.setAuthentication(mailServer.username, mailServer.password); 676 677 const(char)[][] allRecipients; 678 void processPerson(string person) { 679 auto idx = person.indexOf("<"); 680 if(idx == -1) 681 allRecipients ~= person; 682 else { 683 person = person[idx + 1 .. $]; 684 idx = person.indexOf(">"); 685 if(idx != -1) 686 person = person[0 .. idx]; 687 688 allRecipients ~= person; 689 } 690 } 691 foreach(person; to) processPerson(person); 692 foreach(person; cc) processPerson(person); 693 foreach(person; bcc) processPerson(person); 694 695 smtp.mailTo(allRecipients); 696 697 auto mailFrom = from; 698 auto idx = mailFrom.indexOf("<"); 699 if(idx != -1) 700 mailFrom = mailFrom[idx + 1 .. $]; 701 idx = mailFrom.indexOf(">"); 702 if(idx != -1) 703 mailFrom = mailFrom[0 .. idx]; 704 705 smtp.mailFrom = mailFrom; 706 smtp.message = this.toString(); 707 smtp.perform(); 708 } 709 } 710 711 /// 712 void email(string to, string subject, string message, string from, RelayInfo mailServer = RelayInfo("smtp://localhost")) { 713 auto msg = new EmailMessage(); 714 msg.from = from; 715 msg.to = [to]; 716 msg.subject = subject; 717 msg.textBody_ = message; 718 msg.send(mailServer); 719 } 720 721 // private: 722 723 import std.conv; 724 725 /// for reading 726 class MimePart { 727 string[] headers; 728 immutable(ubyte)[] content; 729 immutable(ubyte)[] encodedContent; // usually valid only for GPG, and will be cleared by creator; canonical form 730 string textContent; 731 MimePart[] stuff; 732 733 string name; 734 string charset; 735 string type; 736 string transferEncoding; 737 string disposition; 738 string id; 739 string filename; 740 // gpg signatures 741 string gpgalg; 742 string gpgproto; 743 744 MimeAttachment toMimeAttachment() { 745 if(type == "multipart/mixed" && stuff.length == 1) 746 return stuff[0].toMimeAttachment; 747 748 MimeAttachment att; 749 att.type = type; 750 if(att.type == "application/octet-stream" && filename.length == 0 && name.length > 0 ) { 751 att.filename = name; 752 } else { 753 att.filename = filename; 754 } 755 att.id = id; 756 att.content = content; 757 return att; 758 } 759 760 this(immutable(ubyte)[][] lines, string contentType = null) { 761 string boundary; 762 763 void parseContentType(string content) { 764 //{ import std.stdio; writeln("c=[", content, "]"); } 765 foreach(k, v; breakUpHeaderParts(content)) { 766 //{ import std.stdio; writeln(" k=[", k, "]; v=[", v, "]"); } 767 switch(k) { 768 case "root": 769 type = v; 770 break; 771 case "name": 772 name = v; 773 break; 774 case "charset": 775 charset = v; 776 break; 777 case "boundary": 778 boundary = v; 779 break; 780 default: 781 case "micalg": 782 gpgalg = v; 783 break; 784 case "protocol": 785 gpgproto = v; 786 break; 787 } 788 } 789 } 790 791 if(contentType is null) { 792 // read headers immediately... 793 auto copyOfLines = lines; 794 immutable(ubyte)[] currentHeader; 795 796 void commitHeader() { 797 if(currentHeader.length == 0) 798 return; 799 string h = decodeEncodedWord(cast(string) currentHeader); 800 headers ~= h; 801 currentHeader = null; 802 803 auto idx = h.indexOf(":"); 804 if(idx != -1) { 805 auto name = h[0 .. idx].strip.toLower; 806 auto content = h[idx + 1 .. $].strip; 807 808 string[4] filenames_found; 809 810 switch(name) { 811 case "content-type": 812 parseContentType(content); 813 break; 814 case "content-transfer-encoding": 815 transferEncoding = content.toLower; 816 break; 817 case "content-disposition": 818 foreach(k, v; breakUpHeaderParts(content)) { 819 switch(k) { 820 case "root": 821 disposition = v; 822 break; 823 case "filename": 824 filename = v; 825 break; 826 // FIXME: https://datatracker.ietf.org/doc/html/rfc2184#section-3 is what it is SUPPOSED to do 827 case "filename*0": 828 filenames_found[0] = v; 829 break; 830 case "filename*1": 831 filenames_found[1] = v; 832 break; 833 case "filename*2": 834 filenames_found[2] = v; 835 break; 836 case "filename*3": 837 filenames_found[3] = v; 838 break; 839 default: 840 } 841 } 842 break; 843 case "content-id": 844 id = content; 845 break; 846 default: 847 } 848 849 if (filenames_found[0] != "") { 850 foreach (string v; filenames_found) { 851 this.filename ~= v; 852 } 853 } 854 } 855 } 856 857 foreach(line; copyOfLines) { 858 lines = lines[1 .. $]; 859 if(line.length == 0) 860 break; 861 862 if(line[0] == ' ' || line[0] == '\t') 863 currentHeader ~= (cast(string) line).stripLeft(); 864 else { 865 if(currentHeader.length) { 866 commitHeader(); 867 } 868 currentHeader = line; 869 } 870 } 871 872 commitHeader(); 873 } else { 874 parseContentType(contentType); 875 } 876 877 // if it is multipart, find the start boundary. we'll break it up and fill in stuff 878 // otherwise, all the data that follows is just content 879 880 if(boundary.length) { 881 immutable(ubyte)[][] partLines; 882 bool inPart; 883 foreach(line; lines) { 884 if(line.startsWith("--" ~ boundary)) { 885 if(inPart) 886 stuff ~= new MimePart(partLines); 887 inPart = true; 888 partLines = null; 889 890 if(line == "--" ~ boundary ~ "--") 891 break; // all done 892 } 893 894 if(inPart) { 895 partLines ~= line; 896 } else { 897 content ~= line ~ '\n'; 898 } 899 } 900 } else { 901 foreach(line; lines) { 902 content ~= line; 903 904 if(transferEncoding != "base64") 905 content ~= '\n'; 906 } 907 } 908 909 // store encoded content for GPG (should be cleared by caller if necessary) 910 encodedContent = content; 911 912 // decode the content.. 913 switch(transferEncoding) { 914 case "base64": 915 content = Base64.decode(cast(string) content); 916 break; 917 case "quoted-printable": 918 content = decodeQuotedPrintable(cast(string) content); 919 break; 920 default: 921 // no change needed (I hope) 922 } 923 924 if(type.indexOf("text/") == 0) { 925 if(charset.length == 0) 926 charset = "latin1"; 927 textContent = convertToUtf8Lossy(content, charset); 928 } 929 } 930 } 931 932 string[string] breakUpHeaderParts(string headerContent) { 933 string[string] ret; 934 935 string currentName = "root"; 936 string currentContent; 937 bool inQuote = false; 938 bool gettingName = false; 939 bool ignoringSpaces = false; 940 foreach(char c; headerContent) { 941 if(ignoringSpaces) { 942 if(c == ' ') 943 continue; 944 else 945 ignoringSpaces = false; 946 } 947 948 if(gettingName) { 949 if(c == '=') { 950 gettingName = false; 951 continue; 952 } 953 currentName ~= c; 954 } 955 956 if(c == '"') { 957 inQuote = !inQuote; 958 continue; 959 } 960 961 if(!inQuote && c == ';') { 962 ret[currentName] = currentContent; 963 ignoringSpaces = true; 964 currentName = null; 965 currentContent = null; 966 967 gettingName = true; 968 continue; 969 } 970 971 if(!gettingName) 972 currentContent ~= c; 973 } 974 975 if(currentName.length) 976 ret[currentName] = currentContent; 977 978 return ret; 979 } 980 981 // for writing 982 class MimeContainer { 983 private static int sequence; 984 985 immutable string _contentType; 986 immutable string boundary; 987 988 string[] headers; // NOT including content-type 989 string content; 990 MimeContainer[] stuff; 991 992 this(string contentType, string content = null) { 993 this._contentType = contentType; 994 this.content = content; 995 sequence++; 996 if(_contentType.indexOf("multipart/") == 0) 997 boundary = "0016e64be86203dd36047610926a" ~ to!string(sequence); 998 } 999 1000 @property string contentType() { 1001 string ct = "Content-Type: "~_contentType; 1002 if(boundary.length) 1003 ct ~= "; boundary=" ~ boundary; 1004 return ct; 1005 } 1006 1007 1008 string toMimeString(bool isRoot = false, string linesep="\r\n") { 1009 string ret; 1010 1011 if(!isRoot) { 1012 ret ~= contentType; 1013 foreach(header; headers) { 1014 ret ~= linesep; 1015 ret ~= encodeEmailHeaderForTransmit(header, linesep); 1016 } 1017 ret ~= linesep ~ linesep; 1018 } 1019 1020 ret ~= content; 1021 1022 foreach(idx, thing; stuff) { 1023 assert(boundary.length); 1024 ret ~= linesep ~ "--" ~ boundary ~ linesep; 1025 ret ~= thing.toMimeString(false, linesep); 1026 } 1027 1028 if(boundary.length) 1029 ret ~= linesep ~ "--" ~ boundary ~ "--"; 1030 1031 return ret; 1032 } 1033 } 1034 1035 import std.algorithm : startsWith; 1036 /++ 1037 Represents a single email from an incoming or saved source consisting of the raw data. Such saved sources include mbox files (which are several concatenated together, see [MboxMessages] for a full reader of these files), .eml files, and Maildir entries. 1038 +/ 1039 class IncomingEmailMessage : EmailMessage { 1040 /++ 1041 Various constructors for parsing an email message. 1042 1043 1044 The `ref immutable(ubyte)[][]` one is designed for reading a pre-loaded mbox file. It updates the ref variable to the point at the next message in the file as it processes. You probably should use [MboxMessages] in a `foreach` loop instead of calling this directly most the time. 1045 1046 The `string[]` one takes an ascii or utf-8 file of a single email pre-split into lines. 1047 1048 The `immutable(ubyte)[]` one is designed for reading an individual message in its own file in the easiest way. Try `new IncomingEmailMessage(cast(immutable(ubyte)[]) std.file.read("filename.eml"));` to use this. You can also use `IncomingEmailMessage.fromFile("filename.eml")` as well. 1049 1050 History: 1051 The `immutable(ubyte)[]` overload for a single file was added on May 14, 2024. 1052 +/ 1053 this(ref immutable(ubyte)[][] mboxLines, bool asmbox=true) @trusted { 1054 1055 enum ParseState { 1056 lookingForFrom, 1057 readingHeaders, 1058 readingBody 1059 } 1060 1061 auto state = (asmbox ? ParseState.lookingForFrom : ParseState.readingHeaders); 1062 string contentType; 1063 1064 bool isMultipart; 1065 bool isHtml; 1066 immutable(ubyte)[][] mimeLines; 1067 1068 string charset = "latin-1"; 1069 1070 string contentTransferEncoding; 1071 1072 string headerName; 1073 string headerContent; 1074 void commitHeader() { 1075 if(headerName is null) 1076 return; 1077 1078 auto originalHeaderName = headerName; 1079 headerName = headerName.toLower(); 1080 headerContent = headerContent.strip(); 1081 1082 headerContent = decodeEncodedWord(headerContent); 1083 1084 if(headerName == "content-type") { 1085 contentType = headerContent; 1086 if(contentType.indexOf("multipart/") != -1) 1087 isMultipart = true; 1088 else if(contentType.indexOf("text/html") != -1) 1089 isHtml = true; 1090 1091 auto charsetIdx = contentType.indexOf("charset="); 1092 if(charsetIdx != -1) { 1093 string cs = contentType[charsetIdx + "charset=".length .. $]; 1094 if(cs.length && cs[0] == '\"') 1095 cs = cs[1 .. $]; 1096 1097 auto quoteIdx = cs.indexOf("\""); 1098 if(quoteIdx != -1) 1099 cs = cs[0 .. quoteIdx]; 1100 auto semicolonIdx = cs.indexOf(";"); 1101 if(semicolonIdx != -1) 1102 cs = cs[0 .. semicolonIdx]; 1103 1104 cs = cs.strip(); 1105 if(cs.length) 1106 charset = cs.toLower(); 1107 } 1108 } else if(headerName == "from") { 1109 this.from = headerContent; 1110 } else if(headerName == "to") { 1111 this.to ~= headerContent; 1112 } else if(headerName == "subject") { 1113 this.subject = headerContent; 1114 } else if(headerName == "content-transfer-encoding") { 1115 contentTransferEncoding = headerContent; 1116 } 1117 1118 headers_[originalHeaderName] = headerContent; 1119 headerName = null; 1120 headerContent = null; 1121 } 1122 1123 lineLoop: while(mboxLines.length) { 1124 // this can needlessly convert headers too, but that won't harm anything since they are 7 bit anyway 1125 auto line = convertToUtf8Lossy(mboxLines[0], charset); 1126 auto origline = line; 1127 line = line.stripRight; 1128 1129 final switch(state) { 1130 case ParseState.lookingForFrom: 1131 if(line.startsWith("From ")) 1132 state = ParseState.readingHeaders; 1133 break; 1134 case ParseState.readingHeaders: 1135 if(line.length == 0) { 1136 commitHeader(); 1137 state = ParseState.readingBody; 1138 } else { 1139 if(line[0] == ' ' || line[0] == '\t') { 1140 headerContent ~= " " ~ line.stripLeft(); 1141 } else { 1142 commitHeader(); 1143 1144 auto idx = line.indexOf(":"); 1145 if(idx == -1) 1146 headerName = line; 1147 else { 1148 headerName = line[0 .. idx]; 1149 headerContent = line[idx + 1 .. $].stripLeft(); 1150 } 1151 } 1152 } 1153 break; 1154 case ParseState.readingBody: 1155 if (asmbox) { 1156 if(line.startsWith("From ")) { 1157 break lineLoop; // we're at the beginning of the next messsage 1158 } 1159 if(line.startsWith(">>From") || line.startsWith(">From")) { 1160 line = line[1 .. $]; 1161 } 1162 } 1163 1164 if(isMultipart) { 1165 mimeLines ~= mboxLines[0]; 1166 } else if(isHtml) { 1167 // html with no alternative and no attachments 1168 this.htmlBody_ ~= line ~ "\n"; 1169 } else { 1170 // plain text! 1171 // we want trailing spaces for "format=flowed", for example, so... 1172 line = origline; 1173 size_t epos = line.length; 1174 while (epos > 0) { 1175 char ch = line.ptr[epos-1]; 1176 if (ch >= ' ' || ch == '\t') break; 1177 --epos; 1178 } 1179 line = line.ptr[0..epos]; 1180 this.textBody_ ~= line ~ "\n"; 1181 } 1182 break; 1183 } 1184 1185 mboxLines = mboxLines[1 .. $]; 1186 } 1187 1188 if(mimeLines.length) { 1189 auto part = new MimePart(mimeLines, contentType); 1190 deeperInTheMimeTree: 1191 switch(part.type) { 1192 case "text/html": 1193 this.htmlBody_ = part.textContent; 1194 break; 1195 case "text/plain": 1196 this.textBody_ = part.textContent; 1197 break; 1198 case "multipart/alternative": 1199 foreach(p; part.stuff) { 1200 if(p.type == "text/html") 1201 this.htmlBody_ = p.textContent; 1202 else if(p.type == "text/plain") 1203 this.textBody_ = p.textContent; 1204 } 1205 break; 1206 case "multipart/related": 1207 // the first one is the message itself 1208 // after that comes attachments that can be rendered inline 1209 if(part.stuff.length) { 1210 auto msg = part.stuff[0]; 1211 foreach(thing; part.stuff[1 .. $]) { 1212 // FIXME: should this be special? 1213 attachments ~= thing.toMimeAttachment(); 1214 } 1215 part = msg; 1216 goto deeperInTheMimeTree; 1217 } 1218 break; 1219 case "multipart/mixed": 1220 if(part.stuff.length) { 1221 auto msg = part.stuff[0]; 1222 foreach(thing; part.stuff[1 .. $]) { 1223 attachments ~= thing.toMimeAttachment(); 1224 } 1225 part = msg; 1226 goto deeperInTheMimeTree; 1227 } 1228 1229 // FIXME: the more proper way is: 1230 // check the disposition 1231 // if none, concat it to make a text message body 1232 // if inline it is prolly an image to be concated in the other body 1233 // if attachment, it is an attachment 1234 break; 1235 case "multipart/signed": 1236 // FIXME: it would be cool to actually check the signature 1237 if (part.stuff.length) { 1238 auto msg = part.stuff[0]; 1239 //{ import std.stdio; writeln("hdrs: ", part.stuff[0].headers); } 1240 gpgalg = part.gpgalg; 1241 gpgproto = part.gpgproto; 1242 gpgmime = part; 1243 foreach (thing; part.stuff[1 .. $]) { 1244 attachments ~= thing.toMimeAttachment(); 1245 } 1246 part = msg; 1247 goto deeperInTheMimeTree; 1248 } 1249 break; 1250 default: 1251 // FIXME: correctly handle more 1252 if(part.stuff.length) { 1253 part = part.stuff[0]; 1254 goto deeperInTheMimeTree; 1255 } 1256 } 1257 } else { 1258 switch(contentTransferEncoding) { 1259 case "quoted-printable": 1260 if(this.textBody_.length) 1261 this.textBody_ = convertToUtf8Lossy(decodeQuotedPrintable(this.textBody_), charset); 1262 if(this.htmlBody_.length) 1263 this.htmlBody_ = convertToUtf8Lossy(decodeQuotedPrintable(this.htmlBody_), charset); 1264 break; 1265 case "base64": 1266 if(this.textBody_.length) { 1267 this.textBody_ = this.textBody_.decodeBase64Mime.convertToUtf8Lossy(charset); 1268 } 1269 if(this.htmlBody_.length) { 1270 this.htmlBody_ = this.htmlBody_.decodeBase64Mime.convertToUtf8Lossy(charset); 1271 } 1272 1273 break; 1274 default: 1275 // nothing needed 1276 } 1277 } 1278 1279 if(this.htmlBody_.length > 0 && this.textBody_.length == 0) { 1280 import arsd.htmltotext; 1281 this.textBody_ = htmlToText(this.htmlBody_); 1282 textAutoConverted = true; 1283 } 1284 } 1285 1286 /// ditto 1287 this(string[] lines) { 1288 auto lns = cast(immutable(ubyte)[][])lines; 1289 this(lns, false); 1290 } 1291 1292 /// ditto 1293 this(immutable(ubyte)[] fileContent) { 1294 auto lns = splitLinesWithoutDecoding(fileContent); 1295 this(lns, false); 1296 } 1297 1298 /++ 1299 Convenience method that takes a filename instead of the content. 1300 1301 Its implementation is simply `return new IncomingEmailMessage(cast(immutable(ubyte)[]) std.file.read(filename));` 1302 (though i reserve the right to use a different file loading library later, still the same idea) 1303 1304 History: 1305 Added May 14, 2024 1306 +/ 1307 static IncomingEmailMessage fromFile(string filename) { 1308 import std.file; 1309 return new IncomingEmailMessage(cast(immutable(ubyte)[]) std.file.read(filename)); 1310 } 1311 1312 /// 1313 @property bool hasGPGSignature () const nothrow @trusted @nogc { 1314 MimePart mime = cast(MimePart)gpgmime; // sorry 1315 if (mime is null) return false; 1316 if (mime.type != "multipart/signed") return false; 1317 if (mime.stuff.length != 2) return false; 1318 if (mime.stuff[1].type != "application/pgp-signature") return false; 1319 if (mime.stuff[0].type.length <= 5 && mime.stuff[0].type[0..5] != "text/") return false; 1320 return true; 1321 } 1322 1323 /// 1324 ubyte[] extractGPGData () const nothrow @trusted { 1325 if (!hasGPGSignature) return null; 1326 MimePart mime = cast(MimePart)gpgmime; // sorry 1327 char[] res; 1328 res.reserve(mime.stuff[0].encodedContent.length); // more, actually 1329 foreach (string s; mime.stuff[0].headers[1..$]) { 1330 while (s.length && s[$-1] <= ' ') s = s[0..$-1]; 1331 if (s.length == 0) return null; // wtf?! empty headers? 1332 res ~= s; 1333 res ~= "\r\n"; 1334 } 1335 res ~= "\r\n"; 1336 // extract content (see rfc3156) 1337 size_t pos = 0; 1338 auto ctt = mime.stuff[0].encodedContent; 1339 // last CR/LF is a part of mime signature, actually, so remove it 1340 if (ctt.length && ctt[$-1] == '\n') { 1341 ctt = ctt[0..$-1]; 1342 if (ctt.length && ctt[$-1] == '\r') ctt = ctt[0..$-1]; 1343 } 1344 while (pos < ctt.length) { 1345 auto epos = pos; 1346 while (epos < ctt.length && ctt.ptr[epos] != '\n') ++epos; 1347 auto xpos = epos; 1348 while (xpos > pos && ctt.ptr[xpos-1] <= ' ') --xpos; // according to rfc 1349 res ~= ctt[pos..xpos].dup; 1350 res ~= "\r\n"; // according to rfc 1351 pos = epos+1; 1352 } 1353 return cast(ubyte[])res; 1354 } 1355 1356 /// 1357 immutable(ubyte)[] extractGPGSignature () const nothrow @safe @nogc { 1358 if (!hasGPGSignature) return null; 1359 return gpgmime.stuff[1].content; 1360 } 1361 1362 /++ 1363 Allows access to the headers in the email as a key/value hash. 1364 1365 The hash allows access as if it was case-insensitive, but it also still keeps the original case when you loop through it. 1366 1367 Bugs: 1368 Duplicate headers are lost in the current implementation; only the most recent copy of any given name is retained. 1369 +/ 1370 const(HeadersHash) headers() { 1371 return headers_; 1372 } 1373 1374 /++ 1375 Returns the message body as either HTML or text. Gives the same results as through the parent interface, [EmailMessage.htmlBody] and [EmailMessage.textBody]. 1376 1377 If the message was multipart/alternative, both of these will be populated with content from the message. They are supposed to be both the same, but not all senders respect this so you might want to check both anyway. 1378 1379 If the message was just plain text, `htmlMessageBody` will be `null` and `textMessageBody` will have the original message. 1380 1381 If the message was just HTML, `htmlMessageBody` contains the original message and `textMessageBody` will contain an automatically converted version (using [arsd.htmltotext]). [textAutoConverted] will be set to `true`. 1382 1383 History: 1384 Were public strings until May 14, 2024, when it was changed to property getters instead. 1385 +/ 1386 string htmlMessageBody() { 1387 return this.htmlBody_; 1388 } 1389 /// ditto 1390 string textMessageBody() { 1391 return this.textBody_; 1392 } 1393 /// ditto 1394 bool textAutoConverted; 1395 1396 // gpg signature fields 1397 string gpgalg; /// 1398 string gpgproto; /// 1399 MimePart gpgmime; /// 1400 1401 /// 1402 string fromEmailAddress() { 1403 return from.address; 1404 } 1405 1406 /// 1407 string toEmailAddress() { 1408 if(to.recipients.length) 1409 return to.recipients[0].address; 1410 return null; 1411 } 1412 } 1413 1414 /++ 1415 An mbox file is a concatenated list of individual email messages. This is a range of messages given the content of one of those files. 1416 +/ 1417 struct MboxMessages { 1418 immutable(ubyte)[][] linesRemaining; 1419 1420 /// 1421 this(immutable(ubyte)[] data) { 1422 linesRemaining = splitLinesWithoutDecoding(data); 1423 popFront(); 1424 } 1425 1426 IncomingEmailMessage currentFront; 1427 1428 /// 1429 IncomingEmailMessage front() { 1430 return currentFront; 1431 } 1432 1433 /// 1434 bool empty() { 1435 return currentFront is null; 1436 } 1437 1438 /// 1439 void popFront() { 1440 if(linesRemaining.length) 1441 currentFront = new IncomingEmailMessage(linesRemaining); 1442 else 1443 currentFront = null; 1444 } 1445 } 1446 1447 /// 1448 MboxMessages processMboxData(immutable(ubyte)[] data) { 1449 return MboxMessages(data); 1450 } 1451 1452 immutable(ubyte)[][] splitLinesWithoutDecoding(immutable(ubyte)[] data) { 1453 immutable(ubyte)[][] ret; 1454 1455 size_t starting = 0; 1456 bool justSaw13 = false; 1457 foreach(idx, b; data) { 1458 if(b == 13) 1459 justSaw13 = true; 1460 1461 if(b == 10) { 1462 auto use = idx; 1463 if(justSaw13) 1464 use--; 1465 1466 ret ~= data[starting .. use]; 1467 starting = idx + 1; 1468 } 1469 1470 if(b != 13) 1471 justSaw13 = false; 1472 } 1473 1474 if(starting < data.length) 1475 ret ~= data[starting .. $]; 1476 1477 return ret; 1478 } 1479 1480 string decodeEncodedWord(string data) { 1481 string originalData = data; 1482 1483 auto delimiter = data.indexOf("=?"); 1484 if(delimiter == -1) 1485 return data; 1486 1487 string ret; 1488 1489 while(delimiter != -1) { 1490 ret ~= data[0 .. delimiter]; 1491 data = data[delimiter + 2 .. $]; 1492 1493 string charset; 1494 string encoding; 1495 string encodedText; 1496 1497 // FIXME: the insane things should probably throw an 1498 // exception that keeps a copy of orignal data for use later 1499 1500 auto questionMark = data.indexOf("?"); 1501 if(questionMark == -1) return originalData; // not sane 1502 1503 charset = data[0 .. questionMark]; 1504 data = data[questionMark + 1 .. $]; 1505 1506 questionMark = data.indexOf("?"); 1507 if(questionMark == -1) return originalData; // not sane 1508 1509 encoding = data[0 .. questionMark]; 1510 data = data[questionMark + 1 .. $]; 1511 1512 questionMark = data.indexOf("?="); 1513 if(questionMark == -1) return originalData; // not sane 1514 1515 encodedText = data[0 .. questionMark]; 1516 data = data[questionMark + 2 .. $]; 1517 1518 delimiter = data.indexOf("=?"); 1519 if (delimiter == 1 && data[0] == ' ') { 1520 // a single space between encoded words must be ignored because it is 1521 // used to separate multiple encoded words (RFC2047 says CRLF SPACE but a most clients 1522 // just use a space) 1523 data = data[1..$]; 1524 delimiter = 0; 1525 } 1526 1527 immutable(ubyte)[] decodedText; 1528 if(encoding == "Q" || encoding == "q") 1529 decodedText = decodeQuotedPrintable(encodedText); 1530 else if(encoding == "B" || encoding == "b") { 1531 decodedText = cast(typeof(decodedText)) Base64.decode(encodedText); 1532 } else 1533 return originalData; // wtf 1534 1535 ret ~= convertToUtf8Lossy(decodedText, charset); 1536 } 1537 1538 ret ~= data; // keep the rest since there could be trailing stuff 1539 1540 return ret; 1541 } 1542 1543 immutable(ubyte)[] decodeQuotedPrintable(string text) { 1544 immutable(ubyte)[] ret; 1545 1546 int state = 0; 1547 ubyte hexByte; 1548 foreach(b; cast(immutable(ubyte)[]) text) { 1549 switch(state) { 1550 case 0: 1551 if(b == '=') { 1552 state++; 1553 hexByte = 0; 1554 } else if (b == '_') { // RFC2047 4.2.2: a _ may be used to represent a space 1555 ret ~= ' '; 1556 } else 1557 ret ~= b; 1558 break; 1559 case 1: 1560 if(b == '\n') { 1561 state = 0; 1562 continue; 1563 } 1564 goto case; 1565 case 2: 1566 int value; 1567 if(b >= '0' && b <= '9') 1568 value = b - '0'; 1569 else if(b >= 'A' && b <= 'F') 1570 value = b - 'A' + 10; 1571 else if(b >= 'a' && b <= 'f') 1572 value = b - 'a' + 10; 1573 if(state == 1) { 1574 hexByte |= value << 4; 1575 state++; 1576 } else { 1577 hexByte |= value; 1578 ret ~= hexByte; 1579 state = 0; 1580 } 1581 break; 1582 default: assert(0); 1583 } 1584 } 1585 1586 return ret; 1587 } 1588 1589 /// Add header UFCS helper 1590 auto with_header(MimeContainer container, string header){ 1591 container.headers ~= header; 1592 return container; 1593 } 1594 1595 /// Base64 range encoder UFCS helper. 1596 alias base64encode = Base64.encoder; 1597 1598 /// Base64 encoded data with line length of 76 as mandated by RFC 2045 Section 6.8 1599 string encodeBase64Mime(const(ubyte[]) content, string LINESEP = "\r\n") { 1600 enum LINE_LENGTH = 76; 1601 /// Only 6 bit of every byte are used; log2(64) = 6 1602 enum int SOURCE_CHUNK_LENGTH = LINE_LENGTH * 6/8; 1603 1604 return cast(immutable(char[]))content.chunks(SOURCE_CHUNK_LENGTH).base64encode.join(LINESEP); 1605 } 1606 1607 1608 /// Base64 range decoder UFCS helper. 1609 alias base64decode = Base64.decoder; 1610 1611 /// Base64 decoder, ignoring linebreaks which are mandated by RFC2045 1612 immutable(ubyte[]) decodeBase64Mime(string encodedPart) { 1613 return cast(immutable(ubyte[])) encodedPart 1614 .byChar // prevent Autodecoding, which will break Base64 decoder. Since its base64, it's guarenteed to be 7bit ascii 1615 .filter!((c) => (c != '\r') & (c != '\n')) 1616 .base64decode 1617 .array; 1618 } 1619 1620 unittest { 1621 // Mime base64 roundtrip 1622 import std.algorithm.comparison; 1623 string source = chain( 1624 repeat('n', 1200), //long line 1625 "\r\n", 1626 "äöü\r\n", 1627 "ඞ\rn", 1628 ).byChar.array; 1629 assert( source.representation.encodeBase64Mime.decodeBase64Mime.equal(source)); 1630 } 1631 1632 unittest { 1633 import std.algorithm; 1634 import std.string; 1635 // Mime message roundtrip 1636 auto mail = new EmailMessage(); 1637 mail.to = ["recipient@example.org"]; 1638 mail.from = "sender@example.org"; 1639 mail.subject = "Subject"; 1640 1641 auto text = cast(string) chain( 1642 repeat('n', 1200), 1643 "\r\n", 1644 "äöü\r\n", 1645 "ඞ\r\nlast", 1646 ).byChar.array; 1647 mail.setTextBody(text); 1648 mail.addAttachment("text/plain", "attachment.txt", text.representation); 1649 // In case binary and plaintext get handled differently one day 1650 mail.addAttachment("application/octet-stream", "attachment.bin", text.representation); 1651 1652 auto result = new IncomingEmailMessage(mail.toString().split("\r\n")); 1653 1654 assert(result.subject.equal(mail.subject)); 1655 assert(mail.to.canFind(result.to)); 1656 assert(result.from == mail.from.toString); 1657 1658 // This roundtrip works modulo trailing newline on the parsed message and LF vs CRLF 1659 assert(result.textMessageBody.replace("\n", "\r\n").stripRight().equal(mail.textBody_)); 1660 assert(result.attachments.equal(mail.attachments)); 1661 } 1662 1663 private bool hasAllPrintableAscii(in char[] s) { 1664 foreach(ch; s) { 1665 if(ch < 32) 1666 return false; 1667 if(ch >= 127) 1668 return false; 1669 } 1670 return true; 1671 } 1672 1673 private string encodeEmailHeaderContentForTransmit(string value, string linesep, bool prechecked = false) { 1674 if(!prechecked && value.length < 998 && hasAllPrintableAscii(value)) 1675 return value; 1676 1677 return "=?UTF-8?B?" ~ 1678 encodeBase64Mime(cast(const(ubyte)[]) value, "?=" ~ linesep ~ " =?UTF-8?B?") ~ 1679 "?="; 1680 } 1681 1682 private string encodeEmailHeaderForTransmit(string completeHeader, string linesep) { 1683 if(completeHeader.length < 998 && hasAllPrintableAscii(completeHeader)) 1684 return completeHeader; 1685 1686 // note that we are here if there's a newline embedded in the content as well 1687 auto colon = completeHeader.indexOf(":"); 1688 if(colon == -1) // should never happen! 1689 throw new Exception("invalid email header - no colon in " ~ completeHeader); // but exception instead of assert since this might happen as result of public data manip 1690 1691 auto name = completeHeader[0 .. colon + 1]; 1692 if(!hasAllPrintableAscii(name)) // should never happen! 1693 throw new Exception("invalid email header - improper name: " ~ name); // ditto 1694 1695 auto value = completeHeader[colon + 1 .. $].strip; 1696 1697 return 1698 name ~ 1699 " " ~ // i like that leading space after the colon but it was stripped out of value 1700 encodeEmailHeaderContentForTransmit(value, linesep, true); 1701 } 1702 1703 unittest { 1704 auto linesep = "\r\n"; 1705 string test = "Subject: This is an ordinary subject line with no special characters and not exceeding the maximum line length limit."; 1706 assert(test is encodeEmailHeaderForTransmit(test, linesep)); // returned by identity 1707 1708 test = "Subject: foo\nbar"; 1709 assert(test !is encodeEmailHeaderForTransmit(test, linesep)); // a newline forces encoding 1710 } 1711 1712 /+ 1713 void main() { 1714 import std.file; 1715 import std.stdio; 1716 1717 auto data = cast(immutable(ubyte)[]) std.file.read("/home/me/test_email_data"); 1718 foreach(message; processMboxData(data)) { 1719 writeln(message.subject); 1720 writeln(message.textMessageBody); 1721 writeln("**************** END MESSSAGE **************"); 1722 } 1723 } 1724 +/