459 lines
12 KiB
Go
459 lines
12 KiB
Go
|
package parsemail
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"encoding/base64"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"io/ioutil"
|
||
|
"mime"
|
||
|
"mime/multipart"
|
||
|
"net/mail"
|
||
|
"strings"
|
||
|
"time"
|
||
|
)
|
||
|
|
||
|
const contentTypeMultipartMixed = "multipart/mixed"
|
||
|
const contentTypeMultipartAlternative = "multipart/alternative"
|
||
|
const contentTypeMultipartRelated = "multipart/related"
|
||
|
const contentTypeTextHtml = "text/html"
|
||
|
const contentTypeTextPlain = "text/plain"
|
||
|
|
||
|
// Parse an email message read from io.Reader into parsemail.Email struct
|
||
|
func Parse(r io.Reader) (email Email, err error) {
|
||
|
msg, err := mail.ReadMessage(r)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
email, err = createEmailFromHeader(msg.Header)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
contentType, params, err := parseContentType(msg.Header.Get("Content-Type"))
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
switch contentType {
|
||
|
case contentTypeMultipartMixed:
|
||
|
email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"])
|
||
|
case contentTypeMultipartAlternative:
|
||
|
email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartAlternative(msg.Body, params["boundary"])
|
||
|
case contentTypeTextPlain:
|
||
|
message, _ := ioutil.ReadAll(msg.Body)
|
||
|
email.TextBody = strings.TrimSuffix(string(message[:]), "\n")
|
||
|
case contentTypeTextHtml:
|
||
|
message, _ := ioutil.ReadAll(msg.Body)
|
||
|
email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n")
|
||
|
default:
|
||
|
err = fmt.Errorf("Unknown top level mime type: %s", contentType)
|
||
|
}
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func createEmailFromHeader(header mail.Header) (email Email, err error) {
|
||
|
hp := headerParser{header: &header}
|
||
|
|
||
|
email.Subject = decodeMimeSentence(header.Get("Subject"))
|
||
|
email.From = hp.parseAddressList(header.Get("From"))
|
||
|
email.Sender = hp.parseAddress(header.Get("Sender"))
|
||
|
email.ReplyTo = hp.parseAddressList(header.Get("Reply-To"))
|
||
|
email.To = hp.parseAddressList(header.Get("To"))
|
||
|
email.Cc = hp.parseAddressList(header.Get("Cc"))
|
||
|
email.Bcc = hp.parseAddressList(header.Get("Bcc"))
|
||
|
email.Date = hp.parseTime(header.Get("Date"))
|
||
|
email.ResentFrom = hp.parseAddressList(header.Get("Resent-From"))
|
||
|
email.ResentSender = hp.parseAddress(header.Get("Resent-Sender"))
|
||
|
email.ResentTo = hp.parseAddressList(header.Get("Resent-To"))
|
||
|
email.ResentCc = hp.parseAddressList(header.Get("Resent-Cc"))
|
||
|
email.ResentBcc = hp.parseAddressList(header.Get("Resent-Bcc"))
|
||
|
email.ResentMessageID = hp.parseMessageId(header.Get("Resent-Message-ID"))
|
||
|
email.MessageID = hp.parseMessageId(header.Get("Message-ID"))
|
||
|
email.InReplyTo = hp.parseMessageIdList(header.Get("In-Reply-To"))
|
||
|
email.References = hp.parseMessageIdList(header.Get("References"))
|
||
|
email.ResentDate = hp.parseTime(header.Get("Resent-Date"))
|
||
|
|
||
|
if hp.err != nil {
|
||
|
err = hp.err
|
||
|
return
|
||
|
}
|
||
|
|
||
|
//decode whole header for easier access to extra fields
|
||
|
//todo: should we decode? aren't only standard fields mime encoded?
|
||
|
email.Header, err = decodeHeaderMime(header)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func parseContentType(contentTypeHeader string) (contentType string, params map[string]string, err error) {
|
||
|
if contentTypeHeader == "" {
|
||
|
contentType = contentTypeTextPlain
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return mime.ParseMediaType(contentTypeHeader)
|
||
|
}
|
||
|
|
||
|
func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) {
|
||
|
pmr := multipart.NewReader(msg, boundary)
|
||
|
for {
|
||
|
part, err := pmr.NextPart()
|
||
|
|
||
|
if err == io.EOF {
|
||
|
break
|
||
|
} else if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
switch contentType {
|
||
|
case contentTypeTextPlain:
|
||
|
ppContent, err := ioutil.ReadAll(part)
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
|
||
|
case contentTypeTextHtml:
|
||
|
ppContent, err := ioutil.ReadAll(part)
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n")
|
||
|
case contentTypeMultipartAlternative:
|
||
|
tb, hb, ef, err := parseMultipartAlternative(part, params["boundary"])
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
htmlBody += hb
|
||
|
textBody += tb
|
||
|
embeddedFiles = append(embeddedFiles, ef...)
|
||
|
default:
|
||
|
if isEmbeddedFile(part) {
|
||
|
ef, err := decodeEmbeddedFile(part)
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
embeddedFiles = append(embeddedFiles, ef)
|
||
|
} else {
|
||
|
return textBody, htmlBody, embeddedFiles, fmt.Errorf("Can't process multipart/related inner mime type: %s", contentType)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) {
|
||
|
pmr := multipart.NewReader(msg, boundary)
|
||
|
for {
|
||
|
part, err := pmr.NextPart()
|
||
|
|
||
|
if err == io.EOF {
|
||
|
break
|
||
|
} else if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
switch contentType {
|
||
|
case contentTypeTextPlain:
|
||
|
ppContent, err := ioutil.ReadAll(part)
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
|
||
|
case contentTypeTextHtml:
|
||
|
ppContent, err := ioutil.ReadAll(part)
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n")
|
||
|
case contentTypeMultipartRelated:
|
||
|
tb, hb, ef, err := parseMultipartRelated(part, params["boundary"])
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
htmlBody += hb
|
||
|
textBody += tb
|
||
|
embeddedFiles = append(embeddedFiles, ef...)
|
||
|
default:
|
||
|
if isEmbeddedFile(part) {
|
||
|
ef, err := decodeEmbeddedFile(part)
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
embeddedFiles = append(embeddedFiles, ef)
|
||
|
} else {
|
||
|
return textBody, htmlBody, embeddedFiles, fmt.Errorf("Can't process multipart/alternative inner mime type: %s", contentType)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return textBody, htmlBody, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody string, attachments []Attachment, embeddedFiles []EmbeddedFile, err error) {
|
||
|
mr := multipart.NewReader(msg, boundary)
|
||
|
for {
|
||
|
part, err := mr.NextPart()
|
||
|
if err == io.EOF {
|
||
|
break
|
||
|
} else if err != nil {
|
||
|
return textBody, htmlBody, attachments, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, attachments, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
if contentType == contentTypeMultipartAlternative {
|
||
|
textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"])
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, attachments, embeddedFiles, err
|
||
|
}
|
||
|
} else if contentType == contentTypeMultipartRelated {
|
||
|
textBody, htmlBody, embeddedFiles, err = parseMultipartRelated(part, params["boundary"])
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, attachments, embeddedFiles, err
|
||
|
}
|
||
|
} else if isAttachment(part) {
|
||
|
at, err := decodeAttachment(part)
|
||
|
if err != nil {
|
||
|
return textBody, htmlBody, attachments, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
attachments = append(attachments, at)
|
||
|
} else {
|
||
|
return textBody, htmlBody, attachments, embeddedFiles, fmt.Errorf("Unknown multipart/mixed nested mime type: %s", contentType)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return textBody, htmlBody, attachments, embeddedFiles, err
|
||
|
}
|
||
|
|
||
|
func decodeMimeSentence(s string) string {
|
||
|
result := []string{}
|
||
|
ss := strings.Split(s, " ")
|
||
|
|
||
|
for _, word := range ss {
|
||
|
dec := new(mime.WordDecoder)
|
||
|
w, err := dec.Decode(word)
|
||
|
if err != nil {
|
||
|
if len(result) == 0 {
|
||
|
w = word
|
||
|
} else {
|
||
|
w = " " + word
|
||
|
}
|
||
|
}
|
||
|
|
||
|
result = append(result, w)
|
||
|
}
|
||
|
|
||
|
return strings.Join(result, "")
|
||
|
}
|
||
|
|
||
|
func decodeHeaderMime(header mail.Header) (mail.Header, error) {
|
||
|
parsedHeader := map[string][]string{}
|
||
|
|
||
|
for headerName, headerData := range header {
|
||
|
|
||
|
parsedHeaderData := []string{}
|
||
|
for _, headerValue := range headerData {
|
||
|
parsedHeaderData = append(parsedHeaderData, decodeMimeSentence(headerValue))
|
||
|
}
|
||
|
|
||
|
parsedHeader[headerName] = parsedHeaderData
|
||
|
}
|
||
|
|
||
|
return mail.Header(parsedHeader), nil
|
||
|
}
|
||
|
|
||
|
func decodePartData(part *multipart.Part) (io.Reader, error) {
|
||
|
encoding := part.Header.Get("Content-Transfer-Encoding")
|
||
|
|
||
|
if strings.EqualFold(encoding, "base64") {
|
||
|
dr := base64.NewDecoder(base64.StdEncoding, part)
|
||
|
dd, err := ioutil.ReadAll(dr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return bytes.NewReader(dd), nil
|
||
|
}
|
||
|
|
||
|
return nil, fmt.Errorf("Unknown encoding: %s", encoding)
|
||
|
}
|
||
|
|
||
|
func isEmbeddedFile(part *multipart.Part) bool {
|
||
|
return part.Header.Get("Content-Transfer-Encoding") != ""
|
||
|
}
|
||
|
|
||
|
func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {
|
||
|
cid := decodeMimeSentence(part.Header.Get("Content-Id"))
|
||
|
decoded, err := decodePartData(part)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
ef.CID = strings.Trim(cid, "<>")
|
||
|
ef.Data = decoded
|
||
|
ef.ContentType = part.Header.Get("Content-Type")
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func isAttachment(part *multipart.Part) bool {
|
||
|
return part.FileName() != ""
|
||
|
}
|
||
|
|
||
|
func decodeAttachment(part *multipart.Part) (at Attachment, err error) {
|
||
|
filename := decodeMimeSentence(part.FileName())
|
||
|
decoded, err := decodePartData(part)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
at.Filename = filename
|
||
|
at.Data = decoded
|
||
|
at.ContentType = strings.Split(part.Header.Get("Content-Type"), ";")[0]
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
type headerParser struct {
|
||
|
header *mail.Header
|
||
|
err error
|
||
|
}
|
||
|
|
||
|
func (hp headerParser) parseAddress(s string) (ma *mail.Address) {
|
||
|
if hp.err != nil {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
if strings.Trim(s, " \n") != "" {
|
||
|
ma, hp.err = mail.ParseAddress(s)
|
||
|
|
||
|
return ma
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (hp headerParser) parseAddressList(s string) (ma []*mail.Address) {
|
||
|
if hp.err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if strings.Trim(s, " \n") != "" {
|
||
|
ma, hp.err = mail.ParseAddressList(s)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (hp headerParser) parseTime(s string) (t time.Time) {
|
||
|
if hp.err != nil || s == "" {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
t, hp.err = time.Parse(time.RFC1123Z, s)
|
||
|
if hp.err == nil {
|
||
|
return t
|
||
|
}
|
||
|
|
||
|
t, hp.err = time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", s)
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (hp headerParser) parseMessageId(s string) string {
|
||
|
if hp.err != nil {
|
||
|
return ""
|
||
|
}
|
||
|
|
||
|
return strings.Trim(s, "<> ")
|
||
|
}
|
||
|
|
||
|
func (hp headerParser) parseMessageIdList(s string) (result []string) {
|
||
|
if hp.err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
for _, p := range strings.Split(s, " ") {
|
||
|
if strings.Trim(p, " \n") != "" {
|
||
|
result = append(result, hp.parseMessageId(p))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Attachment with filename, content type and data (as a io.Reader)
|
||
|
type Attachment struct {
|
||
|
Filename string
|
||
|
ContentType string
|
||
|
Data io.Reader
|
||
|
}
|
||
|
|
||
|
// EmbeddedFile with content id, content type and data (as a io.Reader)
|
||
|
type EmbeddedFile struct {
|
||
|
CID string
|
||
|
ContentType string
|
||
|
Data io.Reader
|
||
|
}
|
||
|
|
||
|
// Email with fields for all the headers defined in RFC5322 with it's attachments and
|
||
|
type Email struct {
|
||
|
Header mail.Header
|
||
|
|
||
|
Subject string
|
||
|
Sender *mail.Address
|
||
|
From []*mail.Address
|
||
|
ReplyTo []*mail.Address
|
||
|
To []*mail.Address
|
||
|
Cc []*mail.Address
|
||
|
Bcc []*mail.Address
|
||
|
Date time.Time
|
||
|
MessageID string
|
||
|
InReplyTo []string
|
||
|
References []string
|
||
|
|
||
|
ResentFrom []*mail.Address
|
||
|
ResentSender *mail.Address
|
||
|
ResentTo []*mail.Address
|
||
|
ResentDate time.Time
|
||
|
ResentCc []*mail.Address
|
||
|
ResentBcc []*mail.Address
|
||
|
ResentMessageID string
|
||
|
|
||
|
HTMLBody string
|
||
|
TextBody string
|
||
|
|
||
|
Attachments []Attachment
|
||
|
EmbeddedFiles []EmbeddedFile
|
||
|
}
|