diff --git a/src/dc_receive_imf.rs b/src/dc_receive_imf.rs index 8055d5892f..5155629463 100644 --- a/src/dc_receive_imf.rs +++ b/src/dc_receive_imf.rs @@ -2623,4 +2623,26 @@ mod tests { ); assert_eq!(last_msg.from_id, DC_CONTACT_ID_INFO); } + + #[async_std::test] + async fn test_html_only_mail() { + let t = TestContext::new_alice().await; + t.ctx + .set_config(Config::ShowEmails, Some("2")) + .await + .unwrap(); + dc_receive_imf( + &t.ctx, + include_bytes!("../test-data/message/wrong-html.eml"), + "INBOX", + 0, + false, + ) + .await + .unwrap(); + let chats = Chatlist::try_load(&t.ctx, 0, None, None).await.unwrap(); + let msg_id = chats.get_msg_id(0).unwrap(); + let msg = Message::load_from_db(&t.ctx, msg_id).await.unwrap(); + assert_eq!(msg.text.unwrap(), " Guten Abend, \n\n Lots of text \n\n text with Umlaut รค... \n\n MfG [...]"); + } } diff --git a/src/dehtml.rs b/src/dehtml.rs index 9e2f6fb169..f12b4b9bef 100644 --- a/src/dehtml.rs +++ b/src/dehtml.rs @@ -25,7 +25,19 @@ enum AddText { // dehtml() returns way too many newlines; however, an optimisation on this issue is not needed as // the newlines are typically removed in further processing by the caller pub fn dehtml(buf: &str) -> String { - let buf = buf.trim(); + let s = dehtml_quick_xml(buf); + if !s.trim().is_empty() { + return s; + } + let s = dehtml_manually(buf); + if !s.trim().is_empty() { + return s; + } + buf.to_string() +} + +pub fn dehtml_quick_xml(buf: &str) -> String { + let buf = buf.trim().trim_start_matches(""); let mut dehtml = Dehtml { strbuilder: String::with_capacity(buf.len()), @@ -171,9 +183,28 @@ fn dehtml_starttag_cb( } } +pub fn dehtml_manually(buf: &str) -> String { + // Just strip out everything between "<" and ">" + let mut strbuilder = String::new(); + let mut show_next_chars = true; + for c in buf.chars() { + match c { + '<' => show_next_chars = false, + '>' => show_next_chars = true, + _ => { + if show_next_chars { + strbuilder.push(c) + } + } + } + } + strbuilder +} + #[cfg(test)] mod tests { use super::*; + use crate::simplify::simplify; #[test] fn test_dehtml() { @@ -182,20 +213,23 @@ mod tests { " Foo ", "[ Foo ](https://example.com)", ), - ("", ""), (" bar ", "* bar *"), (" bar foo", "* bar _ foo"), ("& bar", "& bar"), - // Note missing ' - ("", "[](https://get.delta.chat/)", ), ("", ""), + ("\nfat text", "*fat text*"), + // Invalid html (at least DC should show the text if the html is invalid): + ("\nsome text", "some text"), + ("", ""), ]; for (input, output) in cases { - assert_eq!(dehtml(input), output); + assert_eq!(simplify(dehtml(input), true).0, output); } } diff --git a/test-data/message/wrong-html.eml b/test-data/message/wrong-html.eml new file mode 100644 index 0000000000..9540209c66 --- /dev/null +++ b/test-data/message/wrong-html.eml @@ -0,0 +1,114 @@ +Return-Path: +X-Original-To: alice@example.com +Delivered-To: m045a7e8@dd37930.kasserver.com +Received: from mout.kundenserver.de (mout.kundenserver.de [212.227.126.131]) + by dd37930.kasserver.com (Postfix) with ESMTPS id 271F34B4258C + for ; Thu, 6 Aug 2020 18:40:32 +0200 (CEST) +Received: from oxbsltgw18.schlund.de ([172.19.249.35]) by + mrelayeu.kundenserver.de (mreue009 [213.165.67.103]) with ESMTPSA (Nemesis) + id 1MpDRv-1kW93Y0lGZ-00qjvh for ; Thu, 06 Aug 2020 + 18:40:31 +0200 +Date: Thu, 6 Aug 2020 18:40:30 +0200 (CEST) +From: Camping +Reply-To: Camping +To: Alice +Message-ID: <512278196.1287440.1596732031020@email.ionos.fr> +Subject: Re: subj? +MIME-Version: 1.0 +Content-Type: multipart/related; + boundary="----=_Part_1287438_2124736777.1596732031007" +X-Mailer: Open-Xchange Mailer v7.10.1-Rev32 +X-Originating-Client: open-xchange-appsuite + +------=_Part_1287438_2124736777.1596732031007 +MIME-Version: 1.0 +Content-Type: text/html; charset=UTF-8 +Content-Transfer-Encoding: quoted-printable + + + + =20 + =20 + + +
+ Guten Abend, +
+
+
+
+
+
+ Lots of text +
+
+
+ text with Umlaut =C3=A4... +
+
+
+ MfG +
+
+
+
+
+
+
+
+ Le 5 ao=C3=BBt 2020 =C3=A0 10:46, holger < + holger@somedomain.de> a = +=C3=A9crit : +
+
+
+
+
+
+
+
+ Bonjour, +
+
+
+
+
+
+
+
+
+
+
+ -- +
+
+ Sent with my Delta Chat Messenger:=20 + https= +://delta.chat +
+
+
=20 + + +------=_Part_1287438_2124736777.1596732031007 +Content-Type: image/png +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline + +iVBORw0KGgoAAAANSUhEUgAAAlgAAADICAYAAAA0n5+2AAAgAElEQVR4nCzaZ3sjhIGo7fyMPSch +MN29S7YsW71avfcuWZJlS3K33HuvY08fpjEVhoGhDh0CSSAESEhCgDRSKNns5mx2k8AM5Dznw/v+ +[scrubbed] +/kNZ/B9Uub32fzngSwAAAABJRU5ErkJggg== +------=_Part_1287438_2124736777.1596732031007-- +