
sf at apache
Apr 14, 2012, 2:30 AM
Post #1 of 1
(25 views)
Permalink
|
|
svn commit: r1326076 - in /httpd/mod_mbox/trunk: ./ module-2.0/mbox_parse.c module-2.0/mbox_parse.h module-2.0/mbox_thread.c module-2.0/mod_mbox.c module-2.0/mod_mbox.h module-2.0/mod_mbox_cte.c module-2.0/mod_mbox_mime.c module-2.0/mod_mbox_out.c
|
|
Author: sf Date: Sat Apr 14 09:30:48 2012 New Revision: 1326076 URL: http://svn.apache.org/viewvc?rev=1326076&view=rev Log: Merge branch convert-charsets Now, mails are converted to UTF-8. Minimum httpd version is 2.3.15 Modified: httpd/mod_mbox/trunk/ (props changed) httpd/mod_mbox/trunk/module-2.0/mbox_parse.c httpd/mod_mbox/trunk/module-2.0/mbox_parse.h httpd/mod_mbox/trunk/module-2.0/mbox_thread.c httpd/mod_mbox/trunk/module-2.0/mod_mbox.c httpd/mod_mbox/trunk/module-2.0/mod_mbox.h httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c Propchange: httpd/mod_mbox/trunk/ ------------------------------------------------------------------------------ Merged /httpd/mod_mbox/branches/convert-charsets:r1226496-1326075 Modified: httpd/mod_mbox/trunk/module-2.0/mbox_parse.c URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_parse.c?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mbox_parse.c (original) +++ httpd/mod_mbox/trunk/module-2.0/mbox_parse.c Sat Apr 14 09:30:48 2012 @@ -59,6 +59,7 @@ typedef struct mb_dbm_data const char *subject; const char *references; const char *content_type; + const char *charset; const char *boundary; } mb_dbm_data; @@ -67,10 +68,8 @@ typedef struct mb_dbm_data */ void mbox_fillbuf(MBOX_BUFF *fb) { - int len; - if (fb->fd) { - len = strlen(fb->b); + int len = strlen(fb->b); /* We are backed by a file descriptor. * Read a new set of characters in. @@ -507,6 +506,9 @@ static void normalize_message(request_re if (!m->content_type || !*m->content_type) m->content_type = "text/plain"; + if (m->charset && !*m->charset) + m->charset = NULL; + apr_time_exp_gmt(&time_exp, m->date); m->str_date = (char *) apr_pcalloc(r->pool, APR_RFC822_DATE_LEN); @@ -571,6 +573,7 @@ static apr_status_t fetch_msgc(apr_pool_ fetch_cstring(pool, msgc->subject, msgValue.dptr, pos, tlen); fetch_cstring(pool, msgc->references, msgValue.dptr, pos, tlen); fetch_cstring(pool, msgc->content_type, msgValue.dptr, pos, tlen); + fetch_cstring(pool, msgc->charset, msgValue.dptr, pos, tlen); fetch_cstring(pool, msgc->boundary, msgValue.dptr, pos, tlen); return APR_SUCCESS; @@ -627,6 +630,7 @@ static apr_status_t store_msgc(apr_pool_ sstrlen(msgc->subject) + sizeof(tlen) + sstrlen(msgc->references) + sizeof(tlen) + sstrlen(msgc->content_type) + sizeof(tlen) + + sstrlen(msgc->charset) + sizeof(tlen) + sstrlen(msgc->boundary) + sizeof(tlen); value = apr_palloc(pool, vlen); @@ -648,6 +652,7 @@ static apr_status_t store_msgc(apr_pool_ store_cstring(msgc->subject, value, pos, tlen); store_cstring(msgc->references, value, pos, tlen); store_cstring(msgc->content_type, value, pos, tlen); + store_cstring(msgc->charset, value, pos, tlen); store_cstring(msgc->boundary, value, pos, tlen); msgValue.dptr = (char *) value; @@ -807,11 +812,12 @@ apr_status_t mbox_generate_index(request temp = apr_table_get(table, "Content-Type"); if (temp) { - char *p, *boundary, *dup; + char *p, *boundary, *dup, *charset; dup = apr_pstrdup(tpool, temp); boundary = mbox_strcasestr(dup, "boundary="); + charset = mbox_strcasestr(dup, "charset="); if (boundary) { - msgc.boundary += sizeof("boundary=") - 1; + boundary += strlen("boundary="); if (boundary[0] == '"') { ++boundary; if ((p = strstr(boundary, "\""))) { @@ -824,7 +830,22 @@ apr_status_t mbox_generate_index(request } } } + if (charset) { + charset += strlen("charset="); + if (charset[0] == '"') { + ++charset; + if ((p = strstr(charset, "\""))) { + *p = '\0'; + } + } + else { + if ((p = strstr(charset, ";"))) { + *p = '\0'; + } + } + } msgc.boundary = boundary; + msgc.charset = charset; p = strstr(dup, ";"); if (p) { *p = '\0'; @@ -913,6 +934,7 @@ MBOX_LIST *mbox_load_index(request_rec * curMsg->from = apr_pstrdup(r->pool, msgc.from); curMsg->subject = apr_pstrdup(r->pool, msgc.subject); curMsg->content_type = apr_pstrdup(r->pool, msgc.content_type); + curMsg->charset = apr_pstrdup(r->pool, msgc.charset); curMsg->boundary = apr_pstrdup(r->pool, msgc.boundary); curMsg->date = msgc.date; curMsg->raw_ref = apr_pstrdup(r->pool, msgc.references); @@ -980,6 +1002,7 @@ Message *mbox_fetch_index(request_rec *r curMsg->from = apr_pstrdup(r->pool, msgc.from); curMsg->subject = apr_pstrdup(r->pool, msgc.subject); curMsg->content_type = apr_pstrdup(r->pool, msgc.content_type); + curMsg->charset = apr_pstrdup(r->pool, msgc.charset); curMsg->boundary = apr_pstrdup(r->pool, msgc.boundary); curMsg->date = msgc.date; curMsg->raw_ref = apr_pstrdup(r->pool, msgc.references); @@ -1033,13 +1056,13 @@ static apr_table_t *fetch_first_headers( { apr_status_t status; apr_table_t *table = NULL; + MBOX_BUFF b; #ifdef APR_HAS_MMAP apr_finfo_t fi; + const char *temp; #else char buf[HUGE_STRING_LEN + 1]; #endif - MBOX_BUFF b; - const char *temp; #ifdef APR_HAS_MMAP status = apr_file_name_get(&temp, f); Modified: httpd/mod_mbox/trunk/module-2.0/mbox_parse.h URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_parse.h?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mbox_parse.h (original) +++ httpd/mod_mbox/trunk/module-2.0/mbox_parse.h Sat Apr 14 09:30:48 2012 @@ -118,6 +118,7 @@ typedef struct mbox_mime_message char *boundary; char *content_type; + char *charset; char *content_encoding; char *content_disposition; char *content_name; @@ -142,6 +143,7 @@ struct Message_Struct char *rfc822_date; char *content_type; + char *charset; char *boundary; mbox_cte_e cte; Modified: httpd/mod_mbox/trunk/module-2.0/mbox_thread.c URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_thread.c?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mbox_thread.c (original) +++ httpd/mod_mbox/trunk/module-2.0/mbox_thread.c Sat Apr 14 09:30:48 2012 @@ -59,13 +59,13 @@ static int is_reply(Message *m) */ static char *strip_subject(apr_pool_t *p, Message *m) { - char *newVal = m->subject, *match = m->subject, *tmp = 0; + char *newVal, *match = m->subject, *tmp = NULL; /* Match the following cases: Re:, RE:, RE[1]:, Re: Re[2]: Re: */ while (match && *match) { /* When we don't have a match, tmp contains the "real" subject. */ tmp = newVal = match; - match = 0; + match = NULL; if (*newVal == 'R' && (*++newVal == 'e' || *newVal == 'E')) { /* Note to self. In pure compressed syntax, the famous dangling * else occurs. Oh, well. */ Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox.c URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox.c?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mod_mbox.c (original) +++ httpd/mod_mbox/trunk/module-2.0/mod_mbox.c Sat Apr 14 09:30:48 2012 @@ -177,28 +177,29 @@ char *mbox_wrap_text(char *str) return str; } -char *mbox_ascii_escape(apr_pool_t *p, const char *s) +/* Escape control chars */ +char *mbox_cntrl_escape(apr_pool_t *p, char *s) { int i, j; char *x; /* first, count the number of extra characters */ for (i = 0, j = 0; s[i] != '\0'; i++) - if (!apr_isascii(s[i])) + if (apr_iscntrl(s[i])) j += 5; if (j == 0) - return apr_pstrmemdup(p, s, i); + return s; x = apr_palloc(p, i + j + 1); - for (i = 0, j = 0; s[i] != '\0'; i++, j++) - if (!apr_isascii(s[i])) { - char *esc = apr_psprintf(p, "&#%3.3d;", (unsigned char)s[i]); - memcpy(&x[j], esc, 6); + for (i = 0, j = 0; s[i] != '\0'; i++, j++) { + if (apr_iscntrl(s[i])) { + snprintf(&x[j], 7, "&#%3.3d;", (unsigned char)s[i]); j += 5; } else x[j] = s[i]; + } x[j] = '\0'; return x; Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox.h URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox.h?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mod_mbox.h (original) +++ httpd/mod_mbox/trunk/module-2.0/mod_mbox.h Sat Apr 14 09:30:48 2012 @@ -24,8 +24,10 @@ #include "http_protocol.h" #include "http_request.h" #include "util_script.h" +#include "util_varbuf.h" #include "apr_date.h" +#include "apr_lib.h" #include "apr_strings.h" #include "apr_dbm.h" #include "apr_hash.h" @@ -120,17 +122,20 @@ apr_size_t mbox_cte_decode_qp(char *p); apr_size_t mbox_cte_decode_b64(char *src); apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s, apr_size_t len, char **body); -char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src); char *mbox_cte_decode_header(apr_pool_t *p, char *src); +apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset, + const char *src, apr_size_t len, + struct ap_varbuf *vb); /* MIME decoding functions */ -mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p, +mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p, char *body, char *ct, + char *charset, mbox_cte_e cte, char *boundary); char *mbox_mime_decode_body(apr_pool_t *p, mbox_cte_e cte, char *body, apr_size_t len, apr_size_t *ret_len); -char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m); +char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m); void mbox_mime_display_static_structure(request_rec *r, mbox_mime_message_t *m, char *link); @@ -139,18 +144,18 @@ void mbox_mime_display_xml_structure(req /* Utility functions */ char *mbox_wrap_text(char *str); -char *mbox_ascii_escape(apr_pool_t *p, const char *s); +char *mbox_cntrl_escape(apr_pool_t *p, char *s); const char *get_base_path(request_rec *r); const char *get_base_uri(request_rec *r); const char *get_base_name(request_rec *r); -#if AP_MODULE_MAGIC_AT_LEAST(20081231,0) +/* XXX This should enforce that the result is valid UTF-8 */ #define ESCAPE_OR_BLANK(pool, s) \ -(s ? ap_escape_html2(pool, s, 1) : "") -#else -#define ESCAPE_OR_BLANK(pool, s) \ -(s ? mbox_ascii_escape(pool, ap_escape_html(pool, s)) : "") -#endif +(s ? mbox_cntrl_escape(pool, ap_escape_html(pool, s)) : "") + +/* XXX This should enforce that the result is valid UTF-8 */ +#define ESCAPE_AND_CONV_HDR(pool, s) \ +(s ? mbox_cntrl_escape(pool, ap_escape_html(pool, mbox_cte_decode_header(pool, s))) : "") #define URI_ESCAPE_OR_BLANK(pool, s) \ (s ? ap_escape_uri(pool, s) : "") Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c (original) +++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c Sat Apr 14 09:30:48 2012 @@ -80,6 +80,7 @@ const char *mbox_cte_to_char(mbox_cte_e /* Unlike the original ap_escape_html, this one is also binary * safe. + * The result is always NUL-terminated */ apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s, apr_size_t len, char **body) @@ -107,7 +108,7 @@ apr_size_t mbox_cte_escape_html(apr_pool /* Otherwise, we have some extra characters to insert : allocate enough space for them, and process the data. */ else { - x = apr_palloc(p, i + j); + x = apr_palloc(p, i + j + 1); for (i = 0, j = 0; i < len; i++, j++) { if (s[i] == '<') { @@ -126,6 +127,7 @@ apr_size_t mbox_cte_escape_html(apr_pool x[j] = s[i]; } } + x[j] = '\0'; } *body = x; @@ -244,53 +246,94 @@ apr_size_t mbox_cte_decode_qp(char *p) return len; } +apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset, + const char *src, apr_size_t len, + struct ap_varbuf *vb) +{ + apr_xlate_t *convset; + apr_status_t rv; + apr_size_t outbytes_left, inbytes_left = len; + char *dst; + if (len <= 0) + return APR_SUCCESS; + rv = apr_xlate_open(&convset, "UTF-8", charset, p); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, + "could not open convset '%s'", charset); + return rv; + } + ap_log_error(APLOG_MARK, APLOG_TRACE6, rv, ap_server_conf, + "using convset %s", charset); + + while (inbytes_left > 0) { + ap_varbuf_grow(vb, vb->strlen + inbytes_left + 8); + dst = vb->buf + vb->strlen; + outbytes_left = vb->avail - vb->strlen; + rv = apr_xlate_conv_buffer(convset, src + len - inbytes_left, &inbytes_left, + dst, &outbytes_left); + if (rv != APR_SUCCESS) { + *dst = '\0'; + goto out; + } + vb->strlen = vb->avail - outbytes_left; + } + ap_varbuf_grow(vb, vb->strlen + 8); + outbytes_left = vb->avail - vb->strlen; + dst = vb->buf + vb->strlen; + rv = apr_xlate_conv_buffer(convset, NULL, NULL, dst, &outbytes_left); + if (rv != APR_SUCCESS) { + *dst = '\0'; + goto out; + } + vb->strlen = vb->avail - outbytes_left; + vb->buf[vb->strlen] = '\0'; + +out: + apr_xlate_close(convset); + return rv; +} + /* This function performs the decoding of strings like : * =?UTF-8?B?QnJhbmtvIMSMaWJlag==?= * * These strings complies to the following syntax : * =?charset?mode?data?= rest + * + * Appends decoded string to vb, resturns + * position where to continue parsing. */ -char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src) +static char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb) { - apr_xlate_t *xlate; - char *charset, *mode, *data, *rest; int i; + apr_status_t rv; + apr_size_t data_len; - /* Look for charset */ - charset = strstr(src, "=?"); - if (!charset) { + if (strncmp(src, "=?", 2) != 0) return src; - } - *charset = 0; - charset += strlen("=?"); + charset = src + strlen("=?"); /* Encoding mode (first '?' after charset) */ mode = strstr(charset, "?"); if (!mode) { return src; } - *mode = 0; mode++; /* Fetch data */ data = strstr(mode, "?"); - if (!data) { + if (!data || data != mode + 1) return src; - } - *data = 0; data++; /* Look for the end bound */ rest = strstr(data, "?="); - if (!rest) { + if (!rest) return src; - } - *rest = 0; + data = apr_pstrmemdup(p, data, rest - data); /* Quoted-Printable decoding : mode 'q' */ if ((*mode == 'q') || (*mode == 'Q')) { - apr_size_t data_len; int i; /* In QP header encoding, spaces are encoded either in =20 (as @@ -304,77 +347,78 @@ char *mbox_cte_decode_rfc2047(apr_pool_t } data_len = mbox_cte_decode_qp(data); - data[data_len] = 0; } else if ((*mode == 'b') || (*mode == 'B')) { - apr_size_t data_len; - data_len = mbox_cte_decode_b64(data); - data[data_len] = 0; + } + else { + return src; } /* Convert charset to uppercase */ + charset = apr_pstrmemdup(p, charset, mode - charset - 1); for (i = 0; i < strlen(charset); i++) { charset[i] = toupper(charset[i]); } /* Charset conversion */ - if (apr_xlate_open(&xlate, "UTF-8", charset, p) == APR_SUCCESS) { - apr_size_t inbytes_left, outbytes_left; - apr_size_t outbuf_len = strlen(data); - - char *new_data; - - /* Allocate some memory for our resulting data, and initialize - counters. */ - new_data = apr_palloc(p, outbuf_len); - inbytes_left = strlen(data); - outbytes_left = strlen(data); - - /* Convert */ - // apr_xlate_conv_buffer(xlate, data, &inbytes_left, - // new_data, &outbytes_left); - - // new_data[outbuf_len - outbytes_left] = 0; - // data = new_data; - - apr_xlate_close(xlate); + rv = mbox_cte_convert_to_utf8(p, charset, data, data_len, vb); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, + "conversion from %s to utf-8 failed", charset); + *rest = '?'; + return src; } - - return data; + return rest + strlen("?=");; } /* MIME header decoding (see RFC 2047). */ char *mbox_cte_decode_header(apr_pool_t *p, char *src) { - char *start, *end, *part; - char *result = ""; + char *start, *cont; + struct ap_varbuf vb; + int seen_encoded_word = 0; + if (src == NULL || *src == '\0') + return ""; + ap_varbuf_init(p, &vb, 0); + vb.strlen = 0; do { - char c; - start = strstr(src, "=?"); if (!start) { - result = apr_psprintf(p, "%s%s", result, src); - return result; + if (vb.strlen == 0) + return src; + return apr_pstrcat(p, vb.buf, src, NULL); } - end = strstr(start, "?="); - if (!end) { - result = apr_psprintf(p, "%s%s", result, src); - return result; + if (start != src) { + if (seen_encoded_word) { + /* space between consecutive encoded words must be discarded */ + char *p = src; + while (p < start && apr_isspace(*p)) + p++; + if (p == start) + src = start; + /* XXX: this is wrong if the next encoded word fails to decode */ + } + if (start != src) { + ap_varbuf_strmemcat(&vb, src, start - src); + seen_encoded_word = 0; + } } - c = *start; - *start = 0; - result = apr_psprintf(p, "%s%s", result, src); - *start = c; - - part = mbox_cte_decode_rfc2047(p, start); - result = apr_psprintf(p, "%s%s", result, part); - - src = end + 2; + cont = mbox_cte_decode_rfc2047(p, start, &vb); + if (cont == start) { + /* decoding failed, copy start delimiter and continue */ + ap_varbuf_strmemcat(&vb, start, 2); + src = start + 2; + } + else { + src = cont; + seen_encoded_word = 1; + } } while (src && *src); - return result; + /* vb.buf is pool memory */ + return vb.buf; } Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c (original) +++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c Sat Apr 14 09:30:48 2012 @@ -18,11 +18,99 @@ */ #include "mod_mbox.h" +#include <apr_lib.h> #ifdef APLOG_USE_MODULE APLOG_USE_MODULE(mbox); #endif +/** + * find certain header line, return copy of first part (up to first ";") + * @param p pool to allocate from + * @param name name of the header + * @param string input: pointer to pointer string where to find the header; + * output: pointer to the ";" or "\n" after the copied value + * @param end pointer where to stop searching + * @note string must be NUL-terminated (but the NUL may be after *end) + * @return copy of the header value or NULL if not found + */ +static char *mbox_mime_get_header(apr_pool_t *p, const char *name, + char **string, const char *end) +{ + char *ptr; + int namelen = strlen(name); + for (ptr = *string; + ptr && *ptr && ptr < end ; + ptr = ap_strchr(ptr + 1, '\n')) + { + int l; + if (strncasecmp(ptr, name, namelen) != 0) + continue; + ptr += namelen; + if (*ptr != ':') + continue; + ptr++; + while (*ptr == ' ') + ptr++; + if (ptr >= end) + break; + l = strcspn(ptr, ";\n"); + *string = ptr + l; + while (apr_isspace(ptr[l]) && l > 0) + l--; + return apr_pstrndup(p, ptr, l); + } + return NULL; +} + +/** + * find value for parameter with certain name + * @param p pool to allocate from + * @param name name of the attribute + * @param string string with name=value pairs separated by ";", + * value may be a quoted string delimited by double quotes + * @param end pointer where to stop searching + * @note string must be NUL-terminated (but the NUL may be after *end) + * @return copy of the value, NULL if not found + */ +static char *mbox_mime_get_parameter(apr_pool_t *p, const char *name, + const char *string, const char *end) +{ + const char *ptr = string; + int namelen = strlen(name); + while (ptr && *ptr && ptr < end) { + int have_match = 0; + const char *val_end; + while (*ptr && apr_isspace(*ptr)) + ptr++; + if (strncasecmp(ptr, name, namelen) == 0) { + ptr += strlen(name); + while (*ptr && apr_isspace(*ptr) && ptr < end) + ptr++; + if (*ptr == '=') { + have_match = 1; + ptr++; + if (ptr >= end) + break; + while (*ptr && apr_isspace(*ptr) && ptr < end) + ptr++; + } + } + if (!have_match) + ptr += strcspn(ptr, "= \t"); + if (*ptr == '"') + val_end = ap_strchr_c(++ptr, '"'); + else + val_end = ptr + strcspn(ptr, ";\n "); + if (!val_end || val_end > end) + val_end = end; + if (have_match) + return apr_pstrmemdup(p, ptr, val_end - ptr); + ptr = val_end + 1; + } + return NULL; +} + static apr_status_t cleanup_mime_msg(void *data) { mbox_mime_message_t *mail = data; @@ -33,15 +121,17 @@ static apr_status_t cleanup_mime_msg(voi /* Decode a multipart (or not) email. In order to support multiple * levels of MIME parts, this function is recursive. */ -mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p, char *body, - char *ct, mbox_cte_e cte, - char *boundary) +mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p, char *body, + char *ct, char *charset, + mbox_cte_e cte, char *boundary) { mbox_mime_message_t *mail; - char *tmp = NULL, *k = NULL, *end_bound = NULL; + char *tmp = NULL, *end_bound = NULL; char *headers_bound = NULL; if (!body) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "mbox_mime_decode_multipart: no body"); return NULL; } @@ -49,6 +139,8 @@ mbox_mime_message_t *mbox_mime_decode_mu if (!ct) { headers_bound = ap_strstr(body, "\n\n"); if (!headers_bound) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "no '\\n\\n' header separator found"); return NULL; } } @@ -56,142 +148,59 @@ mbox_mime_message_t *mbox_mime_decode_mu headers_bound = body; } - /* If no Content-Type is provided, it means that we are parsing a - sub-part of the multipart message. The Content-Type header - should then be the first line of the part. If not, use - text/plain as default for the sub-part. */ - tmp = ap_strstr(body, "Content-Type: "); - if (!ct && (!tmp || tmp > headers_bound)) { - ct = "text/plain"; - } - mail = apr_pcalloc(p, sizeof(mbox_mime_message_t)); /* make sure the memory allocated by realloc() below is cleaned up */ apr_pool_cleanup_register(p, mail, cleanup_mime_msg, apr_pool_cleanup_null); - /* If no Content-Type is given, we have to look for it. */ if (!ct) { - tmp += sizeof("Content-Type: ") - 1; - k = strchr(tmp, ';'); - - /* Isolate the Content-Type string (between 'Content-Type: ' - and ';' or end of line */ - if (k && k < headers_bound) { - *k = 0; + /* If no Content-Type is provided, it means that we are parsing a + * sub-part of the multipart message. The Content-Type header + * should then be the first line of the part. If not, use + * text/plain as default for the sub-part. + */ + tmp = body; + ct = mbox_mime_get_header(p, "Content-Type", &tmp, headers_bound); + if (!ct) { + ct = "text/plain"; } else { - k = tmp; - while (*k) { - if (isspace(*k)) { - *k = 0; - break; - } - k++; - } - } - - /* Copy the Content-Type and reset *k */ - mail->content_type = apr_pstrdup(p, tmp); - *k = ';'; - - /* If available, get MIME part name */ - tmp = ap_strstr(body, "name="); - if (tmp && tmp < headers_bound) { - char c = '\0'; - tmp += sizeof("name=") - 1; - k = tmp; - - while (*k) { - if (isspace(*k) || *k == ';') { - c = *k; - *k = 0; - break; - } - k++; - } - - /* Check for double quotes */ - if ((*tmp == '"') && (tmp[strlen(tmp) - 1] == '"')) { - mail->content_name = - apr_pstrndup(p, tmp + 1, strlen(tmp) - 2); - } - else { - mail->content_name = apr_pstrdup(p, tmp); - } - - *k = c; + if (!charset) + charset = mbox_mime_get_parameter(p, "charset", tmp, headers_bound); + mail->content_name = mbox_mime_get_parameter(p, "name", tmp, headers_bound); } + mail->content_type = ct; } else { mail->content_type = ct; + if (!charset) + charset = mbox_mime_get_parameter(p, "charset", ct, ct + strlen(ct)); } + mail->charset = charset; /* Now we have a Content-Type. Look for other useful header information */ /* Check Content-Disposition if the match is within the headers */ - tmp = ap_strstr(body, "Content-Disposition: "); - if (tmp && tmp < headers_bound) { - char c = '\0'; - tmp += sizeof("Content-Disposition: ") - 1; - k = tmp; - - while (*k) { - if (isspace(*k) || *k == ';') { - c = *k; - *k = 0; - break; - } - k++; - } - - /* Copy the Content-Disposition and reset *k */ - mail->content_disposition = apr_pstrdup(p, tmp); - *k = c; - } - else { - mail->content_disposition = apr_pstrdup(p, "inline"); - } + tmp = body; + mail->content_disposition = mbox_mime_get_header(p, "Content-Disposition", &tmp, headers_bound); + if (!mail->content_disposition) + mail->content_disposition = "inline"; /* Check Content-Transfer-Encoding, if needed */ if (cte == CTE_NONE) { - tmp = ap_strstr(body, "Content-Transfer-Encoding: "); - if (tmp && tmp < headers_bound) { - char c = '\0'; - tmp += sizeof("Content-Transfer-Encoding: ") - 1; - k = tmp; - - while (*k) { - if (isspace(*k) || *k == ';') { - c = *k; - *k = 0; - break; - } - k++; - } - - /* Copy the Content-Disposition and reset *k */ + tmp = body; + tmp = mbox_mime_get_header(p, "Content-Transfer-Encoding", &tmp, headers_bound); + if (tmp) mail->cte = mbox_parse_cte_header(tmp); - *k = c; - } } else { mail->cte = cte; } - /* Now we have all the headers we need. Start processing the - body. If the Content-Type was given at call time, the body - starts where it's given. Otherwise it's after the headers - (first new empty line) */ - - if (ct) { + /* Now we have all the headers we need. Start processing the body */ + if (headers_bound == body) mail->body = body; - } - else { - mail->body = ap_strstr(body, "\n\n"); - if (mail->body != NULL) { - mail->body += 2; - } - } + else + mail->body = headers_bound + 2; /* skip double new line */ /* If the mail is a multipart message, search for the boundary, and process its sub parts by recursive calls. */ @@ -201,30 +210,16 @@ mbox_mime_message_t *mbox_mime_decode_mu /* If the boundary was not given, we must look for it in the headers */ if (!boundary) { - tmp = ap_strstr(body, "boundary=\""); - if (!tmp) { + boundary = mbox_mime_get_parameter(p, "boundary", body, headers_bound); + if (!boundary) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "invalid multipart message: no boundary defined"); return NULL; } - - tmp += sizeof("boundary=\"") - 1; - k = tmp; - - while (*k) { - if (*k == '"') { - *k = 0; - break; - } - k++; - } - - mail->boundary = apr_pstrdup(p, tmp); - *k = '"'; - } - - /* Otherwise, the boundary is as given to us */ - else { - mail->boundary = boundary; } + mail->boundary = boundary; + ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, + "decoding multipart message: boundary %s", boundary); /* Now we have our boundary string. We must : look for it once (begining of MIME part) and then look for the end boundary : @@ -264,8 +259,10 @@ mbox_mime_message_t *mbox_mime_decode_mu /* Allocate a new pointer for the sub part, and parse it. */ mail->sub = realloc(mail->sub, ++count * sizeof(struct mimemsg *)); + ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r, + "decoding part %d", count); mail->sub[count - 1] = - mbox_mime_decode_multipart(p, search, NULL, CTE_NONE, NULL); + mbox_mime_decode_multipart(r, p, search, NULL, NULL, CTE_NONE, NULL); /* If the boudary is found again, it means we have another MIME part in the same multipart message. Set the new @@ -286,6 +283,9 @@ mbox_mime_message_t *mbox_mime_decode_mu /* Finally reset the end-body pointer. */ // *tmp = '-'; + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "done decoding multipart message (boundary %s)", + boundary); } /* If the parsed body is not multipart or is a MIME part, the body @@ -330,43 +330,74 @@ char *mbox_mime_decode_body(apr_pool_t * return new_body; } - /* This function returns the relevant MIME part from a message. For * the moment, it just returns the first text/ MIME part available. */ -char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m) +char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m) { int i; /* If the message structure or the message body is empty, just return NULL */ if (!m || !m->body) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "mbox_mime_get_body: %s", + m == NULL ? "no message???" : "no body"); return MBOX_FETCH_ERROR_STR; } if (strncasecmp(m->content_type, "text/", strlen("text/")) == 0) { char *new_body; - - new_body = - mbox_mime_decode_body(p, m->cte, m->body, m->body_len, NULL); + apr_size_t new_len; + new_body = mbox_mime_decode_body(p, m->cte, m->body, m->body_len, + &new_len); if (!new_body) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "mbox_mime_get_body: could not decode body"); return MBOX_FETCH_ERROR_STR; } - m->body_len = - mbox_cte_escape_html(p, new_body, m->body_len, &(m->body)); + if (m->charset) { + struct ap_varbuf vb; + apr_status_t rv; + ap_varbuf_init(p, &vb, 0); + vb.strlen = 0; + ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r, + "mbox_mime_get_body: converting %d bytes from %s", + new_len, m->charset); + if ((rv = mbox_cte_convert_to_utf8(p, m->charset, new_body, new_len, &vb)) + == APR_SUCCESS) { + new_body = vb.buf; + new_len = vb.strlen + 1; + } + else { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, + "conversion from '%s' to utf-8 failed", m->charset); + } + ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r, + "mbox_mime_get_body: conversion done"); + } - return apr_pstrndup(p, m->body, m->body_len); + mbox_cte_escape_html(p, new_body, new_len, &new_body); + return new_body; } if (!m->sub) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "mbox_mime_get_body: message not text/* and no sub parts"); return MBOX_FETCH_ERROR_STR; } for (i = 0; i < m->sub_count; i++) { - return mbox_mime_get_body(p, m->sub[i]); + /* XXX this loop is bullshit, should check result of mbox_mime_get_body() */ + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "mbox_mime_get_body: choosing m->sub[%d]", i); + return mbox_mime_get_body(r, p, m->sub[i]); } + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "m->sub != NULL but m->subcount == 0 ???"); + return MBOX_FETCH_ERROR_STR; } @@ -390,10 +421,12 @@ void mbox_mime_display_static_structure( } if (m->content_name) { - ap_rprintf(r, "%s", m->content_name); + ap_rprintf(r, "%s (%s)", + ESCAPE_OR_BLANK(r->pool, m->content_name), + ESCAPE_OR_BLANK(r->pool, m->content_type)); } else { - ap_rprintf(r, "Unnamed %s", m->content_type); + ap_rprintf(r, "Unnamed %s", ESCAPE_OR_BLANK(r->pool, m->content_type)); } if (m->body_len) { Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c?rev=1326076&r1=1326075&r2=1326076&view=diff ============================================================================== --- httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c (original) +++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c Sat Apr 14 09:30:48 2012 @@ -49,9 +49,9 @@ static void display_atom_entry(request_r char *c; ap_rputs("<entry>\n", r); - ap_rprintf(r, "<title>%s</title>\n", ESCAPE_OR_BLANK(pool, m->subject)); + ap_rprintf(r, "<title>%s</title>\n", ESCAPE_AND_CONV_HDR(pool, m->subject)); ap_rprintf(r, "<author><name>%s</name></author>\n", - ESCAPE_OR_BLANK(pool, m->from)); + ESCAPE_AND_CONV_HDR(pool, m->from)); ap_rprintf(r, "<link rel=\"alternate\" href=\"%s%s/%s\"/>\n", ap_construct_url(r->pool, r->uri, r), @@ -79,12 +79,13 @@ static void display_atom_entry(request_r load_message(pool, f, m); /* Parse multipart information */ - m->mime_msg = mbox_mime_decode_multipart(pool, m->raw_body, + m->mime_msg = mbox_mime_decode_multipart(r, pool, m->raw_body, m->content_type, + m->charset, m->cte, m->boundary); ap_rprintf(r, "%s", - mbox_ascii_escape(pool, mbox_wrap_text(mbox_mime_get_body(pool, m->mime_msg)))); + mbox_cntrl_escape(pool, mbox_wrap_text(mbox_mime_get_body(r, pool, m->mime_msg)))); ap_rputs("\n</pre>\n</div>\n</content>\n", r); ap_rputs("</entry>\n", r); @@ -381,7 +382,11 @@ apr_status_t mbox_static_index_boxlist(r return APR_SUCCESS; } -/* Antispam protection */ +/* Antispam protection, + * proper order is: + * apply mbox_cte_decode_header(), then email_antispam(), then + * ESCAPE_OR_BLANK() + */ static char *email_antispam(char *email) { char *tmp; @@ -418,22 +423,22 @@ static void display_static_msglist_entry { mbox_dir_cfg_t *conf; - char *from; + char *tmp; int i; conf = ap_get_module_config(r->per_dir_config, &mbox_module); - from = ESCAPE_OR_BLANK(r->pool, m->str_from); - from = mbox_cte_decode_header(r->pool, from); - if (conf->antispam) { - from = email_antispam(from); - } - /* Message author */ ap_rputs(" <tr>\n", r); + tmp = mbox_cte_decode_header(r->pool, m->str_from); + if (conf->antispam) { + tmp = email_antispam(tmp); + } + tmp = ESCAPE_OR_BLANK(r->pool, tmp); + if (linked) { - ap_rprintf(r, " <td class=\"author\">%s</td>\n", from); + ap_rprintf(r, " <td class=\"author\">%s</td>\n", tmp); } else { ap_rputs(" <td class=\"author\"></td>\n", r); @@ -450,7 +455,7 @@ static void display_static_msglist_entry URI_ESCAPE_OR_BLANK(r->pool, m->msgID)); } - ap_rprintf(r, "%s", ESCAPE_OR_BLANK(r->pool, m->subject)); + ap_rprintf(r, "%s", ESCAPE_AND_CONV_HDR(r->pool, m->subject)); if (linked) { ap_rputs("</a>", r); } @@ -482,17 +487,17 @@ static void display_xml_msglist_entry(re if (conf->antispam) { from = email_antispam(from); } + from = ESCAPE_OR_BLANK(r->pool, from); ap_rprintf(r, " <message linked=\"%d\" depth=\"%d\" id=\"%s\">\n", linked, depth, ESCAPE_OR_BLANK(r->pool, m->msgID)); - ap_rprintf(r, " <from><![CDATA[%s]]></from>\n", - ESCAPE_OR_BLANK(r->pool, from)); + ap_rprintf(r, " <from><![CDATA[%s]]></from>\n", from); ap_rprintf(r, " <date><![CDATA[%s]]></date>\n", ESCAPE_OR_BLANK(r->pool, m->str_date)); ap_rprintf(r, " <subject><![CDATA[%s]]></subject>\n", - ESCAPE_OR_BLANK(r->pool, m->subject)); + ESCAPE_AND_CONV_HDR(r->pool, m->subject)); ap_rprintf(r, " </message>\n"); } @@ -991,8 +996,9 @@ int mbox_raw_message(request_rec *r, apr /* First, parse the MIME structure, and look for the correct subpart */ - m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body, + m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body, m->content_type, + m->charset, m->cte, m->boundary); mime_part = m->mime_msg; @@ -1111,7 +1117,7 @@ int mbox_static_message(request_rec *r, Message *m; const char *baseURI; - char *from, **context, *msgID, *escaped_msgID; + char *from, **context, *msgID, *escaped_msgID, *subject; conf = ap_get_module_config(r->per_dir_config, &mbox_module); baseURI = get_base_uri(r); @@ -1132,12 +1138,13 @@ int mbox_static_message(request_rec *r, } /* Parse multipart information */ - m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body, + m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body, m->content_type, + m->charset, m->cte, m->boundary); - send_page_header(r, - ESCAPE_OR_BLANK(r->pool, m->subject), + subject = ESCAPE_AND_CONV_HDR(r->pool, m->subject); + send_page_header(r, subject, apr_psprintf(r->pool, "%s mailing list archives", get_base_name(r)), 0); @@ -1153,11 +1160,11 @@ int mbox_static_message(request_rec *r, "List index</a></h5>", get_base_path(r)); /* Display context message list */ - from = ESCAPE_OR_BLANK(r->pool, m->from); - from = mbox_cte_decode_header(r->pool, from); + from = mbox_cte_decode_header(r->pool, m->from); if (conf->antispam) { from = email_antispam(from); } + from = ESCAPE_OR_BLANK(r->pool, from); ap_rputs(" <table class=\"static\" id=\"msgview\">\n", r); @@ -1186,7 +1193,7 @@ int mbox_static_message(request_rec *r, ap_rprintf(r, " <tr class=\"subject\">\n" " <td class=\"left\">Subject</td>\n" " <td class=\"right\">%s</td>\n" - " </tr>\n", ESCAPE_OR_BLANK(r->pool, m->subject)); + " </tr>\n", subject); ap_rprintf(r, " <tr class=\"date\">\n" " <td class=\"left\">Date</td>\n" @@ -1196,7 +1203,7 @@ int mbox_static_message(request_rec *r, /* Message body */ ap_rputs(" <tr class=\"contents\"><td colspan=\"2\"><pre>\n", r); ap_rprintf(r, "%s", - mbox_wrap_text(mbox_mime_get_body(r->pool, m->mime_msg))); + mbox_wrap_text(mbox_mime_get_body(r, r->pool, m->mime_msg))); ap_rputs("</pre></td></tr>\n", r); /* MIME structure */ @@ -1228,8 +1235,7 @@ apr_status_t mbox_xml_message(request_re { mbox_dir_cfg_t *conf; Message *m; - - char *from, *msgID; + char *from, *subj, *msgID; conf = ap_get_module_config(r->per_dir_config, &mbox_module); @@ -1243,8 +1249,9 @@ apr_status_t mbox_xml_message(request_re } /* Parse multipart information */ - m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body, + m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body, m->content_type, + m->charset, m->cte, m->boundary); ap_rputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", r); @@ -1254,6 +1261,7 @@ apr_status_t mbox_xml_message(request_re from = email_antispam(from); } from = ESCAPE_OR_BLANK(r->pool, from); + subj = ESCAPE_AND_CONV_HDR(r->pool, m->subject); ap_rprintf(r, "<mail id=\"%s\">\n" " <from><![CDATA[%s]]></from>\n" @@ -1261,12 +1269,11 @@ apr_status_t mbox_xml_message(request_re " <date><![CDATA[%s]]></date>\n" " <contents><![.CDATA[.", URI_ESCAPE_OR_BLANK(r->pool, m->msgID), - from, - ESCAPE_OR_BLANK(r->pool, m->subject), + from, subj, ESCAPE_OR_BLANK(r->pool, m->rfc822_date)); ap_rprintf(r, "%s", - mbox_ascii_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r->pool, m->mime_msg)))); + mbox_cntrl_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r, r->pool, m->mime_msg)))); ap_rputs("]]></contents>\n", r); ap_rputs(" <mime>\n", r); mbox_mime_display_xml_structure(r, m->mime_msg,
|