package parsemail import ( "bytes" "encoding/base64" "fmt" "io" "io/ioutil" "mime" "mime/multipart" "net/mail" "strings" "time" ) const contentTypeMultipartMixed = "multipart/mixed" const contentTypeMultipartAlternative = "multipart/alternative" const contentTypeMultipartRelated = "multipart/related" const contentTypeTextHtml = "text/html" const contentTypeTextPlain = "text/plain" // Parse an email message read from io.Reader into parsemail.Email struct func Parse(r io.Reader) (email Email, err error) { msg, err := mail.ReadMessage(r) if err != nil { return } email, err = createEmailFromHeader(msg.Header) if err != nil { return } contentType, params, err := parseContentType(msg.Header.Get("Content-Type")) if err != nil { return } switch contentType { case contentTypeMultipartMixed: email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"]) case contentTypeMultipartAlternative: email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartAlternative(msg.Body, params["boundary"]) case contentTypeTextPlain: message, _ := ioutil.ReadAll(msg.Body) email.TextBody = strings.TrimSuffix(string(message[:]), "\n") case contentTypeTextHtml: message, _ := ioutil.ReadAll(msg.Body) email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n") default: err = fmt.Errorf("Unknown top level mime type: %s", contentType) } return } func createEmailFromHeader(header mail.Header) (email Email, err error) { hp := headerParser{header: &header} email.Subject = decodeMimeSentence(header.Get("Subject")) email.From = hp.parseAddressList(header.Get("From")) email.Sender = hp.parseAddress(header.Get("Sender")) email.ReplyTo = hp.parseAddressList(header.Get("Reply-To")) email.To = hp.parseAddressList(header.Get("To")) email.Cc = hp.parseAddressList(header.Get("Cc")) email.Bcc = hp.parseAddressList(header.Get("Bcc")) email.Date = hp.parseTime(header.Get("Date")) email.ResentFrom = hp.parseAddressList(header.Get("Resent-From")) email.ResentSender = hp.parseAddress(header.Get("Resent-Sender")) email.ResentTo = hp.parseAddressList(header.Get("Resent-To")) email.ResentCc = hp.parseAddressList(header.Get("Resent-Cc")) email.ResentBcc = hp.parseAddressList(header.Get("Resent-Bcc")) email.ResentMessageID = hp.parseMessageId(header.Get("Resent-Message-ID")) email.MessageID = hp.parseMessageId(header.Get("Message-ID")) email.InReplyTo = hp.parseMessageIdList(header.Get("In-Reply-To")) email.References = hp.parseMessageIdList(header.Get("References")) email.ResentDate = hp.parseTime(header.Get("Resent-Date")) if hp.err != nil { err = hp.err return } //decode whole header for easier access to extra fields //todo: should we decode? aren't only standard fields mime encoded? email.Header, err = decodeHeaderMime(header) if err != nil { return } return } func parseContentType(contentTypeHeader string) (contentType string, params map[string]string, err error) { if contentTypeHeader == "" { contentType = contentTypeTextPlain return } return mime.ParseMediaType(contentTypeHeader) } func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) { pmr := multipart.NewReader(msg, boundary) for { part, err := pmr.NextPart() if err == io.EOF { break } else if err != nil { return textBody, htmlBody, embeddedFiles, err } contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type")) if err != nil { return textBody, htmlBody, embeddedFiles, err } switch contentType { case contentTypeTextPlain: ppContent, err := ioutil.ReadAll(part) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: ppContent, err := ioutil.ReadAll(part) if err != nil { return textBody, htmlBody, embeddedFiles, err } htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeMultipartAlternative: tb, hb, ef, err := parseMultipartAlternative(part, params["boundary"]) if err != nil { return textBody, htmlBody, embeddedFiles, err } htmlBody += hb textBody += tb embeddedFiles = append(embeddedFiles, ef...) default: if isEmbeddedFile(part) { ef, err := decodeEmbeddedFile(part) if err != nil { return textBody, htmlBody, embeddedFiles, err } embeddedFiles = append(embeddedFiles, ef) } else { return textBody, htmlBody, embeddedFiles, fmt.Errorf("Can't process multipart/related inner mime type: %s", contentType) } } } return textBody, htmlBody, embeddedFiles, err } func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) { pmr := multipart.NewReader(msg, boundary) for { part, err := pmr.NextPart() if err == io.EOF { break } else if err != nil { return textBody, htmlBody, embeddedFiles, err } contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type")) if err != nil { return textBody, htmlBody, embeddedFiles, err } switch contentType { case contentTypeTextPlain: ppContent, err := ioutil.ReadAll(part) if err != nil { return textBody, htmlBody, embeddedFiles, err } textBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeTextHtml: ppContent, err := ioutil.ReadAll(part) if err != nil { return textBody, htmlBody, embeddedFiles, err } htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n") case contentTypeMultipartRelated: tb, hb, ef, err := parseMultipartRelated(part, params["boundary"]) if err != nil { return textBody, htmlBody, embeddedFiles, err } htmlBody += hb textBody += tb embeddedFiles = append(embeddedFiles, ef...) default: if isEmbeddedFile(part) { ef, err := decodeEmbeddedFile(part) if err != nil { return textBody, htmlBody, embeddedFiles, err } embeddedFiles = append(embeddedFiles, ef) } else { return textBody, htmlBody, embeddedFiles, fmt.Errorf("Can't process multipart/alternative inner mime type: %s", contentType) } } } return textBody, htmlBody, embeddedFiles, err } func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody string, attachments []Attachment, embeddedFiles []EmbeddedFile, err error) { mr := multipart.NewReader(msg, boundary) for { part, err := mr.NextPart() if err == io.EOF { break } else if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type")) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } if contentType == contentTypeMultipartAlternative { textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"]) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } } else if contentType == contentTypeMultipartRelated { textBody, htmlBody, embeddedFiles, err = parseMultipartRelated(part, params["boundary"]) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } } else if isAttachment(part) { at, err := decodeAttachment(part) if err != nil { return textBody, htmlBody, attachments, embeddedFiles, err } attachments = append(attachments, at) } else { return textBody, htmlBody, attachments, embeddedFiles, fmt.Errorf("Unknown multipart/mixed nested mime type: %s", contentType) } } return textBody, htmlBody, attachments, embeddedFiles, err } func decodeMimeSentence(s string) string { result := []string{} ss := strings.Split(s, " ") for _, word := range ss { dec := new(mime.WordDecoder) w, err := dec.Decode(word) if err != nil { if len(result) == 0 { w = word } else { w = " " + word } } result = append(result, w) } return strings.Join(result, "") } func decodeHeaderMime(header mail.Header) (mail.Header, error) { parsedHeader := map[string][]string{} for headerName, headerData := range header { parsedHeaderData := []string{} for _, headerValue := range headerData { parsedHeaderData = append(parsedHeaderData, decodeMimeSentence(headerValue)) } parsedHeader[headerName] = parsedHeaderData } return mail.Header(parsedHeader), nil } func decodePartData(part *multipart.Part) (io.Reader, error) { encoding := part.Header.Get("Content-Transfer-Encoding") if strings.EqualFold(encoding, "base64") { dr := base64.NewDecoder(base64.StdEncoding, part) dd, err := ioutil.ReadAll(dr) if err != nil { return nil, err } return bytes.NewReader(dd), nil } return nil, fmt.Errorf("Unknown encoding: %s", encoding) } func isEmbeddedFile(part *multipart.Part) bool { return part.Header.Get("Content-Transfer-Encoding") != "" } func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) { cid := decodeMimeSentence(part.Header.Get("Content-Id")) decoded, err := decodePartData(part) if err != nil { return } ef.CID = strings.Trim(cid, "<>") ef.Data = decoded ef.ContentType = part.Header.Get("Content-Type") return } func isAttachment(part *multipart.Part) bool { return part.FileName() != "" } func decodeAttachment(part *multipart.Part) (at Attachment, err error) { filename := decodeMimeSentence(part.FileName()) decoded, err := decodePartData(part) if err != nil { return } at.Filename = filename at.Data = decoded at.ContentType = strings.Split(part.Header.Get("Content-Type"), ";")[0] return } type headerParser struct { header *mail.Header err error } func (hp headerParser) parseAddress(s string) (ma *mail.Address) { if hp.err != nil { return nil } if strings.Trim(s, " \n") != "" { ma, hp.err = mail.ParseAddress(s) return ma } return nil } func (hp headerParser) parseAddressList(s string) (ma []*mail.Address) { if hp.err != nil { return } if strings.Trim(s, " \n") != "" { ma, hp.err = mail.ParseAddressList(s) return } return } func (hp headerParser) parseTime(s string) (t time.Time) { if hp.err != nil || s == "" { return } t, hp.err = time.Parse(time.RFC1123Z, s) if hp.err == nil { return t } t, hp.err = time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", s) return } func (hp headerParser) parseMessageId(s string) string { if hp.err != nil { return "" } return strings.Trim(s, "<> ") } func (hp headerParser) parseMessageIdList(s string) (result []string) { if hp.err != nil { return } for _, p := range strings.Split(s, " ") { if strings.Trim(p, " \n") != "" { result = append(result, hp.parseMessageId(p)) } } return } // Attachment with filename, content type and data (as a io.Reader) type Attachment struct { Filename string ContentType string Data io.Reader } // EmbeddedFile with content id, content type and data (as a io.Reader) type EmbeddedFile struct { CID string ContentType string Data io.Reader } // Email with fields for all the headers defined in RFC5322 with it's attachments and type Email struct { Header mail.Header Subject string Sender *mail.Address From []*mail.Address ReplyTo []*mail.Address To []*mail.Address Cc []*mail.Address Bcc []*mail.Address Date time.Time MessageID string InReplyTo []string References []string ResentFrom []*mail.Address ResentSender *mail.Address ResentTo []*mail.Address ResentDate time.Time ResentCc []*mail.Address ResentBcc []*mail.Address ResentMessageID string HTMLBody string TextBody string Attachments []Attachment EmbeddedFiles []EmbeddedFile }