diff --git a/src/main/java/MailTest.java b/src/main/java/MailTest.java
index 7cba431a7..9d1a87a23 100644
--- a/src/main/java/MailTest.java
+++ b/src/main/java/MailTest.java
@@ -1,6 +1,4 @@
-import org.codemonkey.simplejavamail.Mailer;
-import org.codemonkey.simplejavamail.TransportStrategy;
-import org.codemonkey.simplejavamail.email.Email;
+import static javax.xml.bind.DatatypeConverter.parseBase64Binary;
import javax.mail.Message.RecipientType;
import javax.mail.MessagingException;
@@ -8,9 +6,12 @@
import javax.mail.internet.MimeMessage;
import javax.mail.util.ByteArrayDataSource;
import java.io.IOException;
-import java.util.Base64;
import java.util.Properties;
+import org.codemonkey.simplejavamail.Mailer;
+import org.codemonkey.simplejavamail.TransportStrategy;
+import org.codemonkey.simplejavamail.email.Email;
+
/**
* Demonstration program for the Simple Java Mail framework.
*
@@ -31,7 +32,7 @@ public static void main(final String[] args) throws IOException, MessagingExcept
emailNormal.addAttachment("dresscode.txt", new ByteArrayDataSource("Black Tie Optional", "text/plain"));
emailNormal.addAttachment("location.txt", "On the moon!".getBytes(), "text/plain");
String base64String = "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAABeElEQVRYw2NgoAAYGxu3GxkZ7TY1NZVloDcAWq4MxH+B+D8Qv3FwcOCgtwM6oJaDMTAUXOhmuYqKCjvQ0pdoDrCnmwNMTEwakC0H4u8GBgYC9Ap6DSD+iewAoIPm0ctyLqBlp9F8/x+YE4zpYT8T0LL16JYD8U26+B7oyz4sloPwenpYno3DchCeROsUbwa05A8eB3wB4kqgIxOAuArIng7EW4H4EhC/B+JXQLwDaI4ryZaDSjeg5mt4LCcFXyIn1fdSyXJQVt1OtMWGhoai0OD8T0W8GohZifE1PxD/o7LlsPLiFNAKRrwOABWptLAcqc6QGDAHQEOAYaAc8BNotsJAOgAUAosG1AFA/AtUoY3YEFhKMAvS2AE7iC1+WaG1H6gY3gzE36hUFJ8mqzbU1dUVBBqQBzTgIDQRkWo5qCZdpaenJ0Zx1aytrc0DDB0foIG1oAYKqC0IZK8D4n1AfA6IzwPxXpCFoGoZVEUDaRGGUTAKRgEeAAA2eGJC+ETCiAAAAABJRU5ErkJggg==";
- emailNormal.addEmbeddedImage("thumbsup", Base64.getDecoder().decode(base64String), "image/png");
+ emailNormal.addEmbeddedImage("thumbsup", parseBase64Binary(base64String), "image/png");
// let's try producing and then consuming a MimeMessage ->
final MimeMessage mimeMessage = Mailer.produceMimeMessage(emailNormal, Session.getDefaultInstance(new Properties()));
diff --git a/src/main/java/org/codemonkey/simplejavamail/Mailer.java b/src/main/java/org/codemonkey/simplejavamail/Mailer.java
index 1d3d8610a..ce82df0f4 100644
--- a/src/main/java/org/codemonkey/simplejavamail/Mailer.java
+++ b/src/main/java/org/codemonkey/simplejavamail/Mailer.java
@@ -1,7 +1,10 @@
package org.codemonkey.simplejavamail;
+import static org.hazlewood.connor.bottema.emailaddress.EmailAddressCriteria.RFC_COMPLIANT;
+
import java.io.UnsupportedEncodingException;
import java.util.Date;
+import java.util.EnumSet;
import java.util.Map;
import java.util.Properties;
@@ -25,23 +28,18 @@
import org.codemonkey.simplejavamail.email.AttachmentResource;
import org.codemonkey.simplejavamail.email.Email;
import org.codemonkey.simplejavamail.email.Recipient;
-import org.codemonkey.simplejavamail.util.EmailAddressValidationCriteria;
-import org.codemonkey.simplejavamail.util.EmailValidationUtil;
+import org.hazlewood.connor.bottema.emailaddress.EmailAddressCriteria;
+import org.hazlewood.connor.bottema.emailaddress.EmailAddressValidator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Mailing tool aimed for simplicity, for sending e-mails of any complexity. This includes e-mails with plain text and/or html content,
- * embedded images and separate attachments, SMTP, SMTPS / SSL and SMTP + SSL
- *
- * This mailing tool abstracts the javax.mail API to a higher level easy to use API. For public use, this tool only works with {@link Email}
- * instances.
- *
- * The e-mail message structure is built to work with all e-mail clients and has been tested with many different webclients as well as some
- * mainstream client applications such as MS Outlook or Mozilla Thunderbird.
- *
- * Technically, the resulting email structure is as follows:
- *
+ * Mailing tool aimed for simplicity, for sending e-mails of any complexity. This includes e-mails with plain text and/or html content, embedded images and
+ * separate attachments, SMTP, SMTPS / SSL and SMTP + SSL
This mailing tool abstracts the javax.mail API to a higher level easy to use API. For public
+ * use, this tool only works with {@link Email} instances.
The e-mail message structure is built to work with all e-mail clients and has been tested
+ * with many different webclients as well as some mainstream client applications such as MS Outlook or Mozilla Thunderbird.
Technically, the resulting
+ * email structure is as follows:
+ *
* - root * - related @@ -51,10 +49,9 @@ * - embedded images * - attachments *- * - *
* Email email = new Email(); * email.setFromAddress("lollypop", "lolly.pop@somemail.com"); @@ -66,9 +63,9 @@ * // or: * new Mailer("smtp.someserver.com", 25, "username", "password").sendMail(email); *- * + * * @author Benny Bottema - * @see MimeEmailMessageWrapper + * @see Mailer.MimeEmailMessageWrapper * @see Email */ public class Mailer { @@ -81,57 +78,48 @@ public class Mailer { private static final String CHARACTER_ENCODING = "UTF-8"; /** - * Used to actually send the email. This session can come from being passed in the default constructor, or made by
Mailer
- * directly, when no Session
instance was provided.
- *
+ * Used to actually send the email. This session can come from being passed in the default constructor, or made by Mailer
directly, when no
+ * Session
instance was provided.
+ *
* @see #Mailer(Session)
* @see #Mailer(String, Integer, String, String, TransportStrategy)
*/
private final Session session;
/**
- * The transport protocol strategy enum that actually handles the session configuration. Session configuration meaning setting the right
- * properties for the appropriate transport type (ie. "mail.smtp.host" for SMTP, "mail.smtps.host" for SMTPS).
+ * The transport protocol strategy enum that actually handles the session configuration. Session configuration meaning setting the right properties for the
+ * appropriate transport type (ie. "mail.smtp.host" for SMTP, "mail.smtps.host" for SMTPS).
*/
private TransportStrategy transportStrategy;
/**
- * Email address restriction flags set either by constructor or overridden by getter by user.
- *
- * @see EmailAddressValidationCriteria
+ * Email address restriction flags set to {@link EmailAddressCriteria#RFC_COMPLIANT} or overridden by by user with {@link
+ * #setEmailAddressCriteria(EnumSet)}.
*/
- private EmailAddressValidationCriteria emailAddressValidationCriteria;
+ private EnumSet- * Also leaves email address validation criteria empty so that no validation is being performed. Validation errors will come from the - * smtp server instead. - * + * Default constructor, stores the given mail session for later use. Assumes that *all* properties used to make a connection are configured (host, port, + * authentication and transport protocol settings). + * * @param session A preconfigured mail {@link Session} object with which a {@link Message} can be produced. */ public Mailer(final Session session) { this.session = session; - this.emailAddressValidationCriteria = null; } /** - * Overloaded constructor which produces a new {@link Session} on the fly. Use this if you don't have a mail session configured in your - * web container, or Spring context etc. - *
- * Also leaves email address validation criteria empty so that no validation is being performed. Validation errors will come from the
- * smtp server instead.
- *
- * @param host The address URL of the SMTP server to be used.
- * @param port The port of the SMTP server.
- * @param username An optional username, may be null
.
- * @param password An optional password, may be null
, but only if username is null
as well.
+ * Overloaded constructor which produces a new {@link Session} on the fly. Use this if you don't have a mail session configured in your web container, or
+ * Spring context etc.
+ *
+ * @param host The address URL of the SMTP server to be used.
+ * @param port The port of the SMTP server.
+ * @param username An optional username, may be null
.
+ * @param password An optional password, may be null
, but only if username is null
as well.
* @param transportStrategy The transport protocol configuration type for handling SSL or TLS (or vanilla SMTP)
*/
public Mailer(final String host, final Integer port, final String username, final String password,
final TransportStrategy transportStrategy) {
- // we're doing these validations manually instead of using Apache Commons to avoid another dependency
if (host == null || host.trim().equals("")) {
throw new MailException(MailException.MISSING_HOST);
} else if ((password != null && !password.trim().equals("")) && (username == null || username.trim().equals(""))) {
@@ -139,20 +127,17 @@ public Mailer(final String host, final Integer port, final String username, fina
}
this.transportStrategy = transportStrategy;
this.session = createMailSession(host, port, username, password);
- this.emailAddressValidationCriteria = null;
+ this.emailAddressCriteria = null;
}
/**
- * Actually instantiates and configures the {@link Session} instance. Delegates resolving transport protocol specific properties to the
- * {@link #transportStrategy} in two ways:
- *
null
.
* @param password An optional password, may be null
.
* @return A fully configured Session
instance complete with transport protocol settings.
@@ -194,9 +179,9 @@ protected PasswordAuthentication getPasswordAuthentication() {
/**
* Overloaded constructor which produces a new {@link Session} on the fly, using default vanilla SMTP transport protocol.
- *
- * @param host The address URL of the SMTP server to be used.
- * @param port The port of the SMTP server.
+ *
+ * @param host The address URL of the SMTP server to be used.
+ * @param port The port of the SMTP server.
* @param username An optional username, may be null
.
* @param password An optional password, may be null
, but only if username is null
as well.
* @see #Mailer(String, Integer, String, String, TransportStrategy)
@@ -206,8 +191,8 @@ public Mailer(final String host, final Integer port, final String username, fina
}
/**
- * In case Simple Java Mail falls short somehow, you can get a hold of the internal {@link Session} instance to debug or tweak. Please
- * let us know why you are needing this on https://github.com/bbottema/simple-java-mail/issues.
+ * In case Simple Java Mail falls short somehow, you can get a hold of the internal {@link Session} instance to debug or tweak. Please let us know why you
+ * are needing this on https://github.com/bbottema/simple-java-mail/issues.
*/
public Session getSession() {
LOGGER.warn("Providing access to Session instance for emergency fall-back scenario. Please let us know why you need it.");
@@ -217,7 +202,7 @@ public Session getSession() {
/**
* Actually sets {@link Session#setDebug(boolean)} so that it generates debug information.
- *
+ *
* @param debug Flag to indicate debug mode yes/no.
*/
public void setDebug(final boolean debug) {
@@ -226,7 +211,7 @@ public void setDebug(final boolean debug) {
/**
* Copies all property entries into the {@link Session} using {@link Session#getProperties()}.
- *
+ *
* @param properties The source properties to add or override in the internal {@link Session} instance.
*/
public void applyProperties(final Properties properties) {
@@ -235,16 +220,15 @@ public void applyProperties(final Properties properties) {
/**
* Processes an {@link Email} instance into a completely configured {@link Message}.
- * - * Sends the Sun JavaMail {@link Message} object using {@link Session#getTransport()}. It will call {@link Transport#connect()} assuming - * all connection details have been configured in the provided {@link Session} instance. - *
- * Performs a call to {@link Message#saveChanges()} as the Sun JavaMail API indicates it is needed to configure the message headers and - * providing a message id. - * + *
+ * Sends the Sun JavaMail {@link Message} object using {@link Session#getTransport()}. It will call {@link Transport#connect()} assuming all connection + * details have been configured in the provided {@link Session} instance. + * + * Performs a call to {@link Message#saveChanges()} as the Sun JavaMail API indicates it is needed to configure the message headers and providing a message + * id. + * * @param email The information for the email to be sent. - * @throws MailException Can be thrown if an email isn't validating correctly, or some other problem occurs during connection, sending - * etc. + * @throws MailException Can be thrown if an email isn't validating correctly, or some other problem occurs during connection, sending etc. * @see #validate(Email) * @see #produceMimeMessage(Email, Session) * @see #setRecipients(Email, Message) @@ -296,11 +280,11 @@ private void logSession(Session session, TransportStrategy transportStrategy) { /** * Validates an {@link Email} instance. Validation fails if the subject is missing, content is missing, or no recipients are defined. - * + * * @param email The email that needs to be configured correctly. * @return Alwaystrue
(throws a {@link MailException} exception if validation fails).
* @throws MailException Is being thrown in any of the above causes.
- * @see EmailValidationUtil
+ * @see EmailAddressValidator
*/
public boolean validate(final Email email)
throws MailException {
@@ -312,17 +296,17 @@ public boolean validate(final Email email)
throw new MailException(MailException.MISSING_RECIPIENT);
} else if (email.getFromRecipient() == null) {
throw new MailException(MailException.MISSING_SENDER);
- } else if (emailAddressValidationCriteria != null) {
- if (!EmailValidationUtil.isValid(email.getFromRecipient().getAddress(), emailAddressValidationCriteria)) {
+ } else if (emailAddressCriteria != null) {
+ if (!EmailAddressValidator.isValid(email.getFromRecipient().getAddress(), emailAddressCriteria)) {
throw new MailException(String.format(MailException.INVALID_SENDER, email));
}
for (final Recipient recipient : email.getRecipients()) {
- if (!EmailValidationUtil.isValid(recipient.getAddress(), emailAddressValidationCriteria)) {
+ if (!EmailAddressValidator.isValid(recipient.getAddress(), emailAddressCriteria)) {
throw new MailException(String.format(MailException.INVALID_RECIPIENT, email));
}
}
if (email.getReplyToRecipient() != null) {
- if (!EmailValidationUtil.isValid(email.getReplyToRecipient().getAddress(), emailAddressValidationCriteria)) {
+ if (!EmailAddressValidator.isValid(email.getReplyToRecipient().getAddress(), emailAddressCriteria)) {
throw new MailException(String.format(MailException.INVALID_REPLYTO, email));
}
}
@@ -332,13 +316,13 @@ public boolean validate(final Email email)
/**
* Creates a new {@link MimeMessage} instance and prepares it in the email structure, so that it can be filled and send.
- * + *
* Fills subject, from,reply-to, content, sent-date, recipients, texts, embedded images, attachments, content and adds all headers. - * - * @param email The email message from which the subject and From-address are extracted. + * + * @param email The email message from which the subject and From-address are extracted. * @param session The Session to attach the MimeMessage to * @return A fully preparated {@link Message} instance, ready to be sent. - * @throws MessagingException May be thrown when the message couldn't be processed by JavaMail. + * @throws MessagingException May be thrown when the message couldn't be processed by JavaMail. * @throws UnsupportedEncodingException Zie {@link InternetAddress#InternetAddress(String, String)}. */ public static MimeMessage produceMimeMessage(final Email email, final Session session) @@ -369,11 +353,11 @@ public static MimeMessage produceMimeMessage(final Email email, final Session se /** * Fills the {@link Message} instance with recipients from the {@link Email}. - * - * @param email The message in which the recipients are defined. + * + * @param email The message in which the recipients are defined. * @param message The javax message that needs to be filled with recipients. * @throws UnsupportedEncodingException See {@link InternetAddress#InternetAddress(String, String)}. - * @throws MessagingException See {@link Message#addRecipient(javax.mail.Message.RecipientType, Address)} + * @throws MessagingException See {@link Message#addRecipient(javax.mail.Message.RecipientType, Address)} */ private static void setRecipients(final Email email, final Message message) throws UnsupportedEncodingException, MessagingException { @@ -385,11 +369,11 @@ private static void setRecipients(final Email email, final Message message) /** * Fills the {@link Message} instance with reply-to address. - * - * @param email The message in which the recipients are defined. + * + * @param email The message in which the recipients are defined. * @param message The javax message that needs to be filled with reply-to address. * @throws UnsupportedEncodingException See {@link InternetAddress#InternetAddress(String, String)}. - * @throws MessagingException See {@link Message#setReplyTo(Address[])} + * @throws MessagingException See {@link Message#setReplyTo(Address[])} */ private static void setReplyTo(final Email email, final Message message) throws UnsupportedEncodingException, MessagingException { @@ -403,11 +387,11 @@ private static void setReplyTo(final Email email, final Message message) /** * Fills the {@link Message} instance with the content bodies (text and html). - * - * @param email The message in which the content is defined. + * + * @param email The message in which the content is defined. * @param multipartAlternativeMessages See {@link MimeMultipart#addBodyPart(BodyPart)} - * @throws MessagingException See {@link BodyPart#setText(String)}, {@link BodyPart#setContent(Object, String)} and - * {@link MimeMultipart#addBodyPart(BodyPart)}. + * @throws MessagingException See {@link BodyPart#setText(String)}, {@link BodyPart#setContent(Object, String)} and {@link + * MimeMultipart#addBodyPart(BodyPart)}. */ private static void setTexts(final Email email, final MimeMultipart multipartAlternativeMessages) throws MessagingException { @@ -425,11 +409,10 @@ private static void setTexts(final Email email, final MimeMultipart multipartAlt /** * Fills the {@link Message} instance with the embedded images from the {@link Email}. - * - * @param email The message in which the embedded images are defined. + * + * @param email The message in which the embedded images are defined. * @param multipartRelated The branch in the email structure in which we'll stuff the embedded images. - * @throws MessagingException See {@link MimeMultipart#addBodyPart(BodyPart)} and - * {@link #getBodyPartFromDatasource(AttachmentResource, String)} + * @throws MessagingException See {@link MimeMultipart#addBodyPart(BodyPart)} and {@link #getBodyPartFromDatasource(AttachmentResource, String)} */ private static void setEmbeddedImages(final Email email, final MimeMultipart multipartRelated) throws MessagingException { @@ -440,11 +423,10 @@ private static void setEmbeddedImages(final Email email, final MimeMultipart mul /** * Fills the {@link Message} instance with the attachments from the {@link Email}. - * - * @param email The message in which the attachments are defined. + * + * @param email The message in which the attachments are defined. * @param multipartRoot The branch in the email structure in which we'll stuff the attachments. - * @throws MessagingException See {@link MimeMultipart#addBodyPart(BodyPart)} and - * {@link #getBodyPartFromDatasource(AttachmentResource, String)} + * @throws MessagingException See {@link MimeMultipart#addBodyPart(BodyPart)} and {@link #getBodyPartFromDatasource(AttachmentResource, String)} */ private static void setAttachments(final Email email, final MimeMultipart multipartRoot) throws MessagingException { @@ -454,14 +436,14 @@ private static void setAttachments(final Email email, final MimeMultipart multip } /** - * Sets all headers on the {@link Message} instance. Since we're not using a high-level JavaMail method, the JavaMail library says we - * need to do some encoding and 'folding' manually, to get the value right for the headers (see {@link MimeUtility}. - * - * @param email The message in which the headers are defined. + * Sets all headers on the {@link Message} instance. Since we're not using a high-level JavaMail method, the JavaMail library says we need to do some + * encoding and 'folding' manually, to get the value right for the headers (see {@link MimeUtility}. + * + * @param email The message in which the headers are defined. * @param message The {@link Message} on which to set the raw, encoded and folded headers. * @throws UnsupportedEncodingException See {@link MimeUtility#encodeText(String, String, String)} - * @throws MessagingException See {@link Message#addHeader(String, String)} - * @see {@link MimeUtility#encodeText(String, String, String)} + * @throws MessagingException See {@link Message#addHeader(String, String)} + * @see MimeUtility#encodeText(String, String, String) * @see MimeUtility#fold(int, String) */ private static void setHeaders(final Email email, final Message message) @@ -476,13 +458,11 @@ private static void setHeaders(final Email email, final Message message) } /** - * Helper method which generates a {@link BodyPart} from an {@link AttachmentResource} (from its {@link DataSource}) and a disposition - * type ({@link Part#INLINE} or {@link Part#ATTACHMENT}). With this the attachment data can be converted into objects that fit in the - * email structure.* - root * - related @@ -515,7 +493,7 @@ private static BodyPart getBodyPartFromDatasource(final AttachmentResource resou * - embedded images * - attachments *- * + * * @author Benny Bottema */ private static class MimeEmailMessageWrapper { @@ -549,13 +527,9 @@ private static class MimeEmailMessageWrapper { } /** - * Overrides the default email address validation restrictions when validating and sending emails using the current
Mailer
- * instance. By default no validation will be performed by simple-java-mail, until a criteria object has been set.
- *
- * @param emailAddressValidationCriteria Refer to
- * {@link EmailAddressValidationCriteria#EmailAddressValidationCriteria(boolean, boolean)}.
+ * Overrides the default email address validation restrictions {@link #emailAddressCriteria} when validating and sending emails using the current Mailer
instance.
*/
- public void setEmailAddressValidationCriteria(EmailAddressValidationCriteria emailAddressValidationCriteria) {
- this.emailAddressValidationCriteria = emailAddressValidationCriteria;
+ public void setEmailAddressCriteria(EnumSet
- * From the original author:
- *
If you use this code, please keep the author information in tact and reference my site at leshazlewood.com. Thanks!- *
- * Code sanitized by Benny Bottema (kept validation 100% in tact). - * - * @author Les Hazlewood, Casey Connor, Benny Bottema - * @see EmailAddressValidationCriteria - */ -package org.codemonkey.simplejavamail.util; - -/* - * Original code Copyright 2013-2016 Les Hazlewood, Boxbe, Inc., Casey Connor - * EmailAddress.java - *
- * RFC2822 email address parsing and extraction, some header verification. - */ - -/* - * Original code Copyright 2008 Les Hazlewood - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.util.ArrayList; -import javax.mail.internet.InternetAddress; -import java.io.UnsupportedEncodingException; - -/** - * EmailAddress.java - *
- * A utility class to parse, clean up, and extract email addresses from messages - * per RFC2822 syntax. Designed to integrate with Javamail (this class will require that you - * have a javamail mail.jar in your classpath), but you could easily change - * the existing methods around to not use Javamail at all. For example, if you're changing - * the code, see the difference between getInternetAddress and getDomain: the latter doesn't - * depend on any javamail code. This is all a by-product of what this class was written for, - * so feel free to modify it to suit your needs. - *
- * For real-world addresses, this class is roughly 3-4 times slower than parsing with - * InternetAddress (although recent versions of this class might be faster), but - * it can handle a whole lot more. Because of sensible design tradeoffs made in javamail, if - * InternetAddress has trouble parsing, - * it might throw an exception, but often it will silently leave the entire original string - * in the result of ia.getAddress(). This class can be trusted to only provide authenticated - * results. - *
- * This class has been successfully used on many billion real-world addresses, live in - * production environments, but it's not perfect yet. - *
- * Comments/Questions/Corrections welcome: java <at> caseyconnor.org - *
- * Started with code by Les Hazlewood: - * leshazlewood.com. - *
- * Modified/added: removed some functions, added support for CFWS token, - * corrected FWSP token, added some boolean flags, added getInternetAddress and - * extractHeaderAddresses and other methods, some optimization. - *
- * Where Mr. Hazlewood's version was more for ensuring certain forms that were passed in during - * registrations, etc, this handles more types of verifying as well a few forms of extracting - * the data in predictable, cleaned-up chunks. - *
- * Note: CFWS means the "comment folded whitespace" token from 2822, in other words, - * whitespace and comment text that is enclosed in ()'s. - *
- * Limitations: doesn't support nested CFWS (comments within (other) comments), doesn't - * support mailbox groups except when flat-extracting addresses from headers or when doing - * verification, doesn't support - * any of the obs-* tokens. Also: the getInternetAddress and - * extractHeaderAddresses methods return InternetAddress objects; if the personal name has - * any quotes or \'s in it at all, the InternetAddress object will always - * escape the name entirely and put it in quotes, so - * multiple-token personal names with those characters somewhere in them will always be munged - * into one big escaped string. This is not really a big deal at all, but I mention it anyway. - * (And you could get around it by a simple modification to those methods to not use - * InternetAddress objects.) See the docs of those methods for more info. - *
- * Note: Unlike InternetAddress, this class will preserve any RFC-2047-encoding of international - * characters. Thus doing my_internetaddress.getPersonal() will return the 2047-encoded string, - * ready for use in an RFC-822-compliant message, - * whereas the common InternetAddress constructor (when used outside the context of - * EmailAddress) would return the decoded version of the text, if any was needed. If you need the - * decoded form, you can do something like this (where ia is the InternetAddress object returned - * from an EmailAddress method): - *
- * ia.setPersonal(javax.mail.internet.MimeUtility.decodeText(ia.getPersonal())); - *
- * ...subsequent calls to ia.getPersonal() will then return the decoded text. - *
- * Note: This class does not do any header-length-checking. There are no such limitations on the - * email address grammar in 2822, though email headers in general do have length restrictions. - * So if the return path - * is 40000 unfolded characters long, but otherwise valid under 2822, this class will pass it. - *
- * Examples of passing (2822-valid) addresses, believe it or not: - *
- * bob @example.com
- *
"bob" @ example.com
- *
bob (comment) (other comment) @example.com (personal name)
- *
"<bob \" (here) " < (hi there) "bob(the man)smith" (hi) @ (there) example.com (hello) > (again)
- *
- * (none of which are permitted by javamail's InternetAddress parsing, incidentally) - *
- * By using getInternetAddress(), you can retrieve an InternetAddress object that, when - * toString()'ed, would reveal that the parser had converted the above into: - *
- * <bob@example.com>
- *
<bob@example.com>
- *
"personal name" <bob@example.com>
- *
"<bob \" (here)" <"bob(the man)smith"@example.com>
- *
(respectively) - *
If parsing headers, however, you'll probably be calling extractHeaderAddresses(). - *
- * A future improvement may be to use this class to extract info from corrupted - * addresses, but for now, it does not permit them. - *
- * Some of the configuration booleans allow a bit of tweaking - * already. The source code can be compiled with these booleans in various - * states. They are configured to what is probably the most commonly-useful state. - * - * @author Les Hazlewood, Casey Connor - * @version 1.13 - */ -public class EmailAddress -{ - /** - * This constant changes the behavior of the domain parsing. If true, the parser will - * allow 2822 domains, which include single-level domains (e.g. bob@localhost) as well - * as domain literals, e.g.: - * - *
someone@[192.168.1.100] or
- *
john.doe@[23:33:A2:22:16:1F] or
- *
me@[my computer]
The RFC says these are valid email addresses, but most people don't like - * allowing them. - * If you don't want to allow them, and only want to allow valid domain names - * (RFC 1035, x.y.z.com, etc), - * and specifically only those with at least two levels ("example.com"), then - * change this constant to false. - * - *
Its default (compiled) value is false, thus it is not RFC 2822 compliant, - * but you should set it depending on what you need for your application. - */ - public static final boolean ALLOW_DOMAIN_LITERALS = false; - - /** - * This constant states that quoted identifiers are allowed - * (using quotes and angle brackets around the raw address) are allowed, e.g.: - * - *
"John Smith" <john.smith@somewhere.com> - * - *
The RFC says this is a valid mailbox. If you don't want to - * allow this, because for example, you only want users to enter in - * a raw address (john.smith@somewhere.com - no quotes or angle - * brackets), then change this constant to false. - * - *
Its default (compiled) value is true to remain RFC 2822 compliant, but - * you should set it depending on what you need for your application. - */ - public static final boolean ALLOW_QUOTED_IDENTIFIERS = true; - - /** - * This constant allows "." to appear in atext (note: only atext which appears - * in the 2822 "name-addr" part of the address, not the other instances) - *
- * The addresses: - *
Kayaks.org <kayaks@kayaks.org> - *
Bob K. Smith<bobksmith@bob.net> - *
- * ...are not valid. They should be: - *
"Kayaks.org" <kayaks@kayaks.org> - *
"Bob K. Smith" <bobksmith@bob.net> - *
- * If this boolean is set to false, the parser will act per 2822 and will require - * the quotes; if set to true, it will allow the use of "." without quotes. - * Default (compiled) setting is false. - */ - public static final boolean ALLOW_DOT_IN_ATEXT = false; - - /** - * This controls the behavior of getInternetAddress and extractHeaderAddresses. If true, - * it allows the not-totally-kosher-but-happens-in-the-real-world practice of: - *
- * <bob@example.com> (Bob Smith) - *
- * In this case, "Bob Smith" is not techinically the personal name, just a - * comment. If this is set to true, the methods will convert this into: - * Bob Smith <bob@example.com> - *
- * This also happens somewhat more often and appropriately with - *
- * mailer-daemon@blah.com (Mail Delivery System) - *
- * If a personal name appears to the left and CFWS appears to the right of an address, - * the methods will favor the personal name to the left. If the methods need to use the - * CFWS following the address, they will take the first comment token they find. - *
e.g.: - *
"bob smith" <bob@example.com> (Bobby)
- *
will yield personal name "bob smith"
- *
<bob@example.com> (Bobby)
- *
will yield personal name "Bobby"
- *
bob@example.com (Bobby)
- *
will yield personal name "Bobby"
- *
bob@example.com (Bob) (Smith)
- *
will yield personal name "Bob"
- *
- * Default (compiled) setting is true. - */ - public static final boolean EXTRACT_CFWS_PERSONAL_NAMES = true; - - /** - * This constant allows "[" or "]" to appear in atext. Not very - * useful, maybe, but there it is. - *
- * The address: - *
[Kayaks] <kayaks@kayaks.org> - * ...is not valid. It should be: - *
"[Kayaks]" <kayaks@kayaks.org> - *
- * If this boolean is set to false, the parser will act per 2822 and will require - * the quotes; if set to true, it will allow them to be missing. - *
- * One real-world example seen: - *
- * Bob Smith [mailto:bsmith@gmail.com]=20 - *
- * Use at your own risk. There may be some issue with enabling this feature in conjunction - * with ALLOW_DOMAIN_LITERALS, but i haven't looked into that. If ALLOW_DOMAIN_LITERALS - * is false, i think this should be pretty safe. Whether or not it's useful, that's up - * to you. Default (compiled) setting of false. - */ - public static final boolean ALLOW_SQUARE_BRACKETS_IN_ATEXT = false; - - /** - * This contant allows ")" or "(" to appear in quoted versions of - * the localpart (they are never allowed in unquoted versions) - *
- * The default (2822) behavior is to allow this, i.e. boolean true. - *
- * You can disallow it, but better to leave it true. I left this hanging around (from an - * earlier incarnation of the code) as a random option you can switch off. No, it's not - * necssarily useful. Long story. - *
- * If false, it will prevent such addresses from being valid, even though they are: - * "bob(hi)smith"@test.com - *
- * Deafult (compiled) setting of true. - */ - public static final boolean ALLOW_PARENS_IN_LOCALPART = true; - - /** - * Checks to see if the specified string is a valid - * email address according to the RFC 2822 specification, which is remarkably - * squirrely. See doc for this class: 2822 not fully implemented, but probably close - * enough for almost any needs. Note that things like spaces in addresses ("bob @hi.com") - * are valid according to 2822! Read the docs for this class before using this method! - *
- * If being used on a 2822 header, this method applies to Sender, Resent-Sender, - * only, - * although you can also use it on the Return-Path if you know it to be non-empty - * (see doc for isValidReturnPath()!). Folded header lines should work OK, but I haven't - * tested that. - *
- * @param email the email address string to test for validity (null and "" OK, - * will return false for those) - * @return true if the given email text is valid according to RFC 2822, false otherwise. - */ - public static boolean isValidMailbox(String email) - { - return (email != null) && MAILBOX_PATTERN.matcher(email).matches(); - } - - /** - * Tells us if the email represents a valid return path header string. - *
- * NOTE: legit forms like <(comment here)> will return true. - *
- * You can check isValidReturnPath(), and - * if it is true, and if getInternetAddress() returns null, you know you have a DSN, - * whether it be an empty return path or one with only CFWS inside the brackets (which is - * legit, as demonstated above). Note that - * you can also simply call getReturnPathAddress() to have that operation done for you. - *
Note that <""> is not a valid return-path. - */ - public static boolean isValidReturnPath(String email) - { - return(email != null) && RETURN_PATH_PATTERN.matcher(email).matches(); - } - - /** - * WARNING: You may want to use getReturnPathAddress() instead if you're - * looking for a clean version of the return path without CFWS, etc. See that - * documentation first! - *
- * Pull whatever's inside the angle brackets out, without alteration or cleaning. - * This is more secure than a simple substring() since paths like: - *
<(my > path) > - *
...are legal return-paths and may throw a simpler parser off. However - * this method will return all CFWS (comments, whitespace) that may be between - * the brackets as well. So the example above will return: - *
(my > path)_
(where the _ is the trailing space from the original
- * string)
- */
- public static String getReturnPathBracketContents(String email)
- {
- if (email == null) return(null);
- Matcher m = RETURN_PATH_PATTERN.matcher(email);
-
- if (m.matches())
- return(m.group(1));
- else return(null);
- }
-
- /**
- * Pull out the cleaned-up return path address. May return an empty string.
- * Will require two parsings due to an inefficiency.
- *
- * @return null if there are any syntax issues or other weirdness, otherwise
- * the valid, trimmed return path email address without CFWS, surrounding angle brackets,
- * with quotes stripped where possible, etc. (may return an empty string).
- */
- public static String getReturnPathAddress(String email)
- {
- if (email == null) return(null);
-
- // inefficient, but there is no parallel grammar tree to extract the return path
- // accurately:
-
- if (isValidReturnPath(email))
- {
- InternetAddress ia = getInternetAddress(email);
- if (ia == null) return("");
- else return(ia.getAddress());
- }
- else return(null);
- }
-
- /**
- * Tells us if a header line is valid, i.e. checks for a 2822 mailbox-list (which
- * could only have one address in it, or might have more.) Applicable to From or
- * Resent-From headers only.
- *
- * This method seems quick enough so far, but I'm not totally - * convinced it couldn't be slow given a complicated near-miss string. You may just - * want to call extractHeaderAddresses() instead, unless you must confirm that the - * format is perfect. I think that in 99.9999% of real-world cases this method will - * work fine. - *
- * @see #isValidAddressList(String) - */ - public static boolean isValidMailboxList(String header_txt) - { - return(MAILBOX_LIST_PATTERN.matcher(header_txt).matches()); - } - - /** - * Tells us if a header line is valid, i.e. a 2822 address-list (which - * could only have one address in it, or might have more.) Applicable to To, Cc, Bcc, - * Reply-To, Resent-To, Resent-Cc, and Resent-Bcc headers only. - *
- * This method seems quick enough so far, but I'm not totally - * convinced it couldn't be slow given a complicated near-miss string. You may just - * want to call extractHeaderAddresses() instead, unless you must confirm that the - * format is perfect. I think that in 99.9999% of real-world cases this method will - * work fine and quickly enough. Let me know what your testing reveals. - *
- * @see #isValidMailboxList(String) - */ - public static boolean isValidAddressList(String header_txt) - { - // creating the actual ADDRESS_LIST_PATTERN string proved too large for java, but - // forutnately we can use this alternative FSM to check. Since the address pattern - // is greedy, it will match all CFWS up to the comma which we can then require easily. - - boolean valid = false; - Matcher m = ADDRESS_PATTERN.matcher(header_txt); - int max = header_txt.length(); - - while (m.lookingAt()) - { - if (m.end() == max) - { - valid = true; - break; - } - else - { - valid = false; - if (header_txt.charAt(m.end()) == ',') - { - m.region(m.end() + 1, max); - continue; - } - else break; - } - } - - return(valid); - // return(ADDRESS_LIST_PATTERN.matcher(header_txt).matches()); - } - - /** - * Given a 2822-valid single address string, give us an InternetAddress object holding - * that address, otherwise returns null. The email address that comes back from the - * resulting InternetAddress object's getAddress() call will have comments and unnecessary - * quotation marks or whitespace removed. - *
- * If your String is an email header, you should probably use - * extractHeaderAddresses instead, since most headers can have multiple addresses in them. - * (see that method for more info.) This method will indeed fail if you use it on a header - * line with more than one address. - *
- * Exception: You CAN and should use this for the Sender header, and probably you want - * to use it for the X-Original-To as well. - *
- * Another exception: You can use this for the Return-Path, but if you want to know that - * a Return-Path is valid and you want to extract - * it, you will have to call both this method and isValidReturnPath; this operation can - * be done for you by simply calling getReturnPathAddress() instead of this method. In - * terms of this method's application to the return-path, note that - * the common valid Return-Path value <> will return null. So will the illegitimate - * "" or legitimate - * empty-string, but other illegitimate Return-Paths like - *
"hi" <bob@smith.com> - *
will return an address, so the moral is that - * you may want to check isValidReturnPath() first, if you care. This method is useful if - * you trust the return path and want to extract a clean address from it without CFWS - * (getReturnPathBracketContents() will return any CFWS), - * or if you want to determine if a validated return path actually contains an address in - * it and isn't just empty or full of CFWS. Except for empty return paths (those lacking an - * address) the Return-Path specification is a subset - * of valid 2822 addresses, so this method will work on all non-empty return-paths, - * failing only on the empty ones. - *
- * In general for this method, note: although this method does not use InternetAddress to
- * parse/extract the
- * information, it does ensure that InternetAddress can use the results (i.e. that
- * there are no encoding issues), but note that an InternetAddress object can hold
- * (and use) values for the address which it could not have parsed itself.
- * Thus, it's possible that for InternetAddress addr, which came as the result of
- * this method, the following may throw an exception or may silently fail:
- * InternetAddress addr2 = InternetAddress.parse(addr.toString());
- *
- * The InternetAddress objects returned by this method will not do any decoding of RFC-2047 - * encoded personal names. See the documentation for this overall class (above) for more. - *
- * Again, all other uses of that addr object should work OK. It is recommended that if - * you are using this class that you never create an InternetAddress object using - * InternetAddress's own constructors or parsing methods; rather, retrieve them through - * this class. Perhaps the addr.clone() would work OK, though. - *
- * The personal name will include any and all phrase token(s) to the left of the address, - * if they exist, and the string will be trim()'ed, but note that InternetAddress, when - * generating the getPersonal() result or the toString() result, if - * it encounters any quotes or backslashes in the personal name String, will put the entire - * thing in a big quoted-escaped chunk. - *
- * This will do some smart unescaping to prevent that from happening unnecessarily; - * specifically, if there are unecessary quotes around a personal name, it will remove - * them. E.g. - *
- * "Bob" <bob@hi.com>
- *
becomes:
- *
Bob <bob@hi.com>
- *
- * (apologies to bob@hi.com for everything i've done to him) - */ - public static InternetAddress getInternetAddress(String email) - { - if (email == null) return(null); - - Matcher m = MAILBOX_PATTERN.matcher(email); - - if (m.matches()) return(pullFromGroups(m)); - else return(null); - } - - /** - * See getInternetAddress; does the same thing but returns the constituent parts - * of the address in a three-element array (or null if the address is invalid). - *
- * This may be useful because even with cleaned-up address extracted with this class - * the parsing to achieve this is not trivial. - *
- * To actually use these values in an email, you should construct an InternetAddress - * object (or - * equivalent) which can handle the various quoting, adding of the angle brackets - * around the address, etc., necessary for presenting the whole address. - *
- * To construct the email address, you can safely use:
- *
result[1] + "@" + result[2]
- *
- * @return a three-element array containing the personal name String, local part String, - * and the domain part String of the address, in that order, without the @; will return - * null if the address is invalid; if it is valid this will not - * return null but the personal name (at index 0) may be null - */ - public static String[] getAddressParts(String email) - { - if (email == null) return (null); - - Matcher m = MAILBOX_PATTERN.matcher(email); - - if (m.matches()) return(getMatcherParts(m)); else return(null); - } - - /** - * See getInternetAddress; does the same thing but returns the personal name that would - * have been returned from getInternetAddress() in String - * form. - *
- * The Strings returned by this method will not reflect any decoding of RFC-2047 - * encoded personal names. See the documentation for this overall class (above) for more. - */ - public static String getPersonalName(String email) - { - if (email == null) return (null); - - Matcher m = MAILBOX_PATTERN.matcher(email); - - if (m.matches()) return(getMatcherParts(m)[0]); else return(null); - } - - /** - * See getInternetAddress; does the same thing but returns the local part that would - * have been returned from getInternetAddress() in String - * form (essentially, the part to the left of the @). This may be useful because - * a simple search/split on a "@" is not a safe way to do this, given - * escaped quoted strings, etc. - */ - public static String getLocalPart(String email) - { - if (email == null) return (null); - - Matcher m = MAILBOX_PATTERN.matcher(email); - - if (m.matches()) return(getMatcherParts(m)[1]); else return(null); - } - - /** - * See getInternetAddress; does the same thing but returns the domain part in string - * form (essentially, the part to the right of the @). This may be useful because - * a simple search/split on a "@" is not a safe way to do this, given - * escaped quoted strings, etc. - */ - public static String getDomain(String email) - { - if (email == null) return (null); - - Matcher m = MAILBOX_PATTERN.matcher(email); - - if (m.matches()) return(getMatcherParts(m)[2]); else return(null); - } - - /** - * Given the value of a header, like the From:, extract valid 2822 addresses from it - * and place them in an array. Returns an empty array if none found, will not return - * null. Note that you should pass in everything except, e.g. "From: ", in other - * words, - * the header value without the header name and ": " at the start.. The addresses - * that come back from the - * resulting InternetAddress objects' getAddress calls will have comments and unnecessary - * quotation marks or whitespace removed. If a bad address is encountered, parsing stops, - * and the good - * addresses found up until then (if any) are returned. This is kind of strict - * and could be improved, but that's the way it is for now. If you need to know - * if the header is totally valid (not just up to a certain address) then you can use - * isValidMailboxList() or isValidAddressList() or isValidMailbox(), depending on - * the header: - *
- * This method can handle group addresses, but it does not preseve the group name or - * the structure of any groups; rather it flattens them all into the same array. - * You can call this method on the From or any other header that uses the mailbox-list form - * (which doesn't use groups), or you can call it on the To, Cc, Bcc, or Reply-To or any - * other header which uses the address-list format which might have groups in there. - * This method doesn't enforce any group structure syntax either. If you care to test - * for 2822 validity of a list of addresses (including group format), use the appropriate - * method. This will dependably extract addresses from a valid list. If the list is - * invalid, it may extract them anyway, or it may fail somewhere along the line. - *
- * You should not use this method on the Return-Path header; instead use - * getInternetAddress() or getReturnPathAddress() (see that doc for info about - * Return-Path). However, you could use this on the Sender header if you didn't care - * to check it for validity, since single mailboxes are valid subsets of valid - * mailbox-lists and address-lists. - *
- * @param header_txt is text from whatever header (not including the header name and - * ": ". I don't - * think the String needs to be unfolded, but i haven't tested that. - *
- * see getInternetAddress() for more info: this extracts the same way - *
- * @return zero-length array if erorrs or none found, otherwise an array of length > 0
- * with the addresses as InternetAddresses with the personal name and emails set correctly
- * (i.e. doesn't rely on InternetAddress parsing for extraction, but does require that
- * the address be usable by InternetAddress, although re-parsing with InternetAddress may
- * cause exceptions, see getInternetAddress()); will not return null.
- */
- public static InternetAddress[] extractHeaderAddresses(String header_txt)
- {
- // you may go insane from this code
-
- if (header_txt == null || header_txt.equals("")) return(new InternetAddress[0]);
-
-// optimize: separate method or boolean to indicate if group should be worried about at all
-
- Matcher m = MAILBOX_PATTERN.matcher(header_txt);
- Matcher gp = GROUP_PREFIX_PATTERN.matcher(header_txt);
-
- ArrayList
- * You could roll your own method that does what you care about.
- *
- * This should work on the matcher for MAILBOX_LIST_PATTERN or MAILBOX_PATTERN, but
- * only those. With some tweaking it could easily be adapted to some others.
- *
- * May return null on encoding errors.
- *
- * Also cleans up the address: tries to strip bounding quotes off of the local
- * part without damaging it's parsability (by this class); if it can, do that; all other
- * cases, don't.
- *
- * e.g. "bob"@example.com becomes bob@example.com
- */
- private static InternetAddress pullFromGroups(Matcher m)
- {
- InternetAddress current_ia = null;
- String[] parts = getMatcherParts(m);
-
- if (parts[1] == null || parts[2] == null) return(null);
-
- // if for some reason you want to require that the result be re-parsable by
- // InternetAddress, you
- // could uncomment the appropriate stuff below, but note that not all the utility
- // functions use pullFromGroups; some call getMatcherParts directly.
- try
- {
- //current_ia = new InternetAddress(parts[0] + " <" + parts[1] + "@" +
- // parts[2]+ ">", true);
- // so it parses it OK, but since javamail doesn't extract too well
- // we make sure that the consituent parts
- // are correct
-
- current_ia = new InternetAddress();
- current_ia.setPersonal(parts[0]);
- current_ia.setAddress(parts[1] + "@" + parts[2]);
- }
- //catch (AddressException ae)
- // {
- //System.out.println("ex: " + ae);
- // current_ia = null;
- // }
- catch (UnsupportedEncodingException uee)
- {
- current_ia = null;
- }
-
- return(current_ia);
- }
-
- /**
- * See pullFromGroups
- *
- * @return will not return null
- */
- private static String[] getMatcherParts(Matcher m)
- {
- String current_localpart = null;
- String current_domainpart = null;
- String local_part_da = null;
- String local_part_qs = null;
- String domain_part_da = null;
- String domain_part_dl = null;
- String personal_string = null;
-
- // see the group-ID lists in the grammar comments
-
- if (ALLOW_QUOTED_IDENTIFIERS)
- {
- if (ALLOW_DOMAIN_LITERALS)
- {
- // yes quoted identifiers, yes domain literals
-
- if (m.group(1) != null)
- {
- // name-addr form
- local_part_da = m.group(5);
- if (local_part_da == null) local_part_qs = m.group(6);
-
- domain_part_da = m.group(7);
- if (domain_part_da == null) domain_part_dl = m.group(8);
-
- current_localpart =
- (local_part_da == null ? local_part_qs : local_part_da);
-
- current_domainpart =
- (domain_part_da == null ? domain_part_dl : domain_part_da);
-
- personal_string = m.group(2);
- if (personal_string == null && EXTRACT_CFWS_PERSONAL_NAMES)
- {
- personal_string = m.group(9);
- personal_string = removeAnyBounding('(', ')',
- getFirstComment(personal_string));
- }
- }
- else if (m.group(10) != null)
- {
- // addr-spec form
-
- local_part_da = m.group(12);
- if (local_part_da == null) local_part_qs = m.group(13);
-
- domain_part_da = m.group(14);
- if (domain_part_da == null) domain_part_dl = m.group(15);
-
- current_localpart =
- (local_part_da == null ? local_part_qs : local_part_da);
-
- current_domainpart =
- (domain_part_da == null ? domain_part_dl : domain_part_da);
-
- if (EXTRACT_CFWS_PERSONAL_NAMES)
- {
- personal_string = m.group(16);
- personal_string = removeAnyBounding('(', ')',
- getFirstComment(personal_string));
- }
- }
- }
- else
- {
- // yes quoted identifiers, no domain literals
-
- if (m.group(1) != null)
- {
- // name-addr form
-
- local_part_da = m.group(5);
- if (local_part_da == null) local_part_qs = m.group(6);
-
- current_localpart =
- (local_part_da == null ? local_part_qs : local_part_da);
-
- current_domainpart = m.group(7);
-
- personal_string = m.group(2);
- if (personal_string == null && EXTRACT_CFWS_PERSONAL_NAMES)
- {
- personal_string = m.group(8);
- personal_string = removeAnyBounding('(', ')',
- getFirstComment(personal_string));
- }
- }
- else if (m.group(9) != null)
- {
- // addr-spec form
-
- local_part_da = m.group(11);
- if (local_part_da == null) local_part_qs = m.group(12);
-
- current_localpart =
- (local_part_da == null ? local_part_qs : local_part_da);
-
- current_domainpart = m.group(13);
-
- if (EXTRACT_CFWS_PERSONAL_NAMES)
- {
- personal_string = m.group(14);
- personal_string = removeAnyBounding('(', ')',
- getFirstComment(personal_string));
- }
- }
- }
- }
- else
- {
- // no quoted identifiers, yes|no domain literals
-
- local_part_da = m.group(3);
- if (local_part_da == null) local_part_qs = m.group(4);
-
- domain_part_da = m.group(5);
- if (domain_part_da == null && ALLOW_DOMAIN_LITERALS)
- domain_part_dl = m.group(6);
-
- current_localpart = (local_part_da == null ? local_part_qs : local_part_da);
-
- current_domainpart = (domain_part_da == null ? domain_part_dl : domain_part_da);
-
- if (EXTRACT_CFWS_PERSONAL_NAMES)
- {
- personal_string = m.group((ALLOW_DOMAIN_LITERALS ? 1 : 0) + 6);
- personal_string = removeAnyBounding('(', ')',
- getFirstComment(personal_string));
- }
- }
-
- if (current_localpart != null) current_localpart = current_localpart.trim();
- if (current_domainpart != null) current_domainpart = current_domainpart.trim();
- if (personal_string != null)
- {
- // trim even though calling cPS which trims, because the latter may return
- // the same thing back without trimming
- personal_string = personal_string.trim();
- personal_string = cleanupPersonalString(personal_string);
- }
-
- // remove any unecessary bounding quotes from the localpart:
-
- String test_addr = removeAnyBounding('"', '"', current_localpart) +
- "@" + current_domainpart;
-
- if (ADDR_SPEC_PATTERN.matcher(test_addr).matches()) current_localpart =
- removeAnyBounding('"', '"', current_localpart);
-
- return(new String[] { personal_string, current_localpart, current_domainpart });
- }
-
- /**
- * Given a string, extract the first matched comment token as defined in 2822, trimmed;
- * return null on all errors or non-findings
- *
- * This is probably not super-useful. Included just in case.
- *
- * Note for future improvement: if COMMENT_PATTERN could handle nested
- * comments, then this should be able to as well, but if this method were to be used to
- * find the CFWS personal name (see boolean option) then such a nested comment would
- * probably not be the one you were looking for?
- */
- public static String getFirstComment(String text)
- {
- if (text == null) return(null); // important
-
- Matcher m = COMMENT_PATTERN.matcher(text);
-
- if (! m.find()) return(null);
-
- return(m.group().trim()); // trim important
- }
-
- /**
- * Given a string, if the string is a quoted string (without CFWS
- * around it, although it will be trimmed) then remove the bounding
- * quotations and then unescape it. Useful when passing
- * simple named address personal names into InternetAddress since InternetAddress always
- * quotes the entire phrase token into one mass; in this simple (and common) case, we
- * can strip off the quotes and de-escape, and passing to javamail will result in a cleaner
- * quote-free result (if there are no embedded escaped characters) or the proper
- * one-level-quoting
- * result (if there are embedded escaped characters). If the string is anything else,
- * this just returns it unadulterated.
- */
- private static String cleanupPersonalString(String text)
- {
- if (text == null) return(null);
- text = text.trim();
-
- Matcher m = QUOTED_STRING_WO_CFWS_PATTERN.matcher(text);
-
- if (! m.matches()) return(text);
-
- text = removeAnyBounding('"', '"', m.group());
-
- text = ESCAPED_BSLASH_PATTERN.matcher(text).replaceAll("\\\\");
- text = ESCAPED_QUOTE_PATTERN.matcher(text).replaceAll("\"");
-
- return(text.trim());
- }
-
- /**
- * If the string starts and ends with s and e, remove them, otherwise return
- * the string as it was passed in.
- */
- private static String removeAnyBounding(char s, char e, String str)
- {
- if (str == null || str.length() < 2) return(str);
-
- if (str.startsWith(String.valueOf(s)) && str.endsWith(String.valueOf(e)))
- return(str.substring(1, str.length() - 1));
- else return(str);
- }
-
-/* The current regex string for mailbox token, just for fun:
-(((?:(?:(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?[a-zA-Z0-9\!\#-\'\*\+\-\/\=\?\^-\`\{-\~\.]+(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?)|(?:(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?\"(?:(?:(?:[ \t]*\r\n)?[ \t]+)?(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!\#-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F])))*(?:(?:[ \t]*\r\n)?[ \t]+)?\"(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?))(?:(?:(?:[ \t]*\r\n)?[ \t]+)(?:(?:(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?[a-zA-Z0-9\!\#-\'\*\+\-\/\=\?\^-\`\{-\~\.]+(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?)|(?:(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?\"(?:(?:(?:[ \t]*\r\n)?[ \t]+)?(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!\#-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F])))*(?:(?:[ \t]*\r\n)?[ \t]+)?\"(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?)))*)??((?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?<((?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?([a-zA-Z0-9\!\#-\'\*\+\-\/\=\?\^-\`\{-\~\.]+(?:\.[a-zA-Z0-9\!\#-\'\*\+\-\/\=\?\^-\`\{-\~\.]+)*)(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?|(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?(\"(?:(?:(?:[ \t]*\r\n)?[ \t]+)?(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!\#-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F])))*(?:(?:[ \t]*\r\n)?[ \t]+)?\")(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?)@(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?([a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\.[a-zA-Z]{2,6})(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?>((?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?))|(((?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?([a-zA-Z0-9\!\#-\'\*\+\-\/\=\?\^-\`\{-\~\.]+(?:\.[a-zA-Z0-9\!\#-\'\*\+\-\/\=\?\^-\`\{-\~\.]+)*)(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[\t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?|(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?(\"(?:(?:(?:[ \t]*\r\n)?[ \t]+)?(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!\#-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F])))*(?:(?:[ \t]*\r\n)?[ \t]+)?\")(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?)@(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?([a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\.[a-zA-Z]{2,6})((?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))*(?:(?:(?:(?:[ \t]*\r\n)?[ \t]+)?\((?:(?:(?:[ \t]*\r\n)?[ \t]+)?[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\!-\'\*-\[\]-\~]|(?:\\[\x01-\x09\x0B\x0C\x0E-\x7F]))*(?:(?:[ \t]*\r\n)?[ \t]+)?\))|(?:(?:[ \t]*\r\n)?[ \t]+)))?)
-
-*/
-
-}
\ No newline at end of file
diff --git a/src/main/java/org/codemonkey/simplejavamail/util/EmailAddressValidationCriteria.java b/src/main/java/org/codemonkey/simplejavamail/util/EmailAddressValidationCriteria.java
deleted file mode 100644
index 5018824fb..000000000
--- a/src/main/java/org/codemonkey/simplejavamail/util/EmailAddressValidationCriteria.java
+++ /dev/null
@@ -1,64 +0,0 @@
-package org.codemonkey.simplejavamail.util;
-
-/**
- * Defines a set of restriction flags for email address validation. To remain completely true to RFC 2822, all flags should be set to
- *
- * someone@[192.168.1.100] or
- * The RFC says these are valid email addresses, but most people don't like allowing them. If you don't want to allow them,
- * and only want to allow valid domain names (RFC 1035, x.y.z.com, etc),
- * change this constant to false.
- *
- * Its default value is true to remain RFC 2822 compliant, but you should set it depending on what you need for your
- * application.
- * "John Smith" <john.smith@somewhere.com>
- *
- * The RFC says this is a valid mailbox. If you don't want to allow this, because for example, you only want users to enter
- * in a raw address (john.smith@somewhere.com - no quotes or angle brackets), then change this constant to
- * false.
- *
- * Its default value is true to remain RFC 2822 compliant, but you should set it depending on what you need for your
- * application.
- * From the original author:
- * Code sanitized by Benny Bottema (kept validation 100% in tact).
- *
- * @author Les Hazlewood, Benny Bottema
- * @see EmailAddressValidationCriteria
- */
-public final class EmailValidationUtil {
-
- /**
- * Private constructor; this is a utility class with static methods only, not designed for extension.
- */
- private EmailValidationUtil() {
- //
- }
-
- /**
- * Validates an e-mail with default validation flags that remains someone@[192.168.1.100] or The RFC says these are valid email addresses, but most people don't like allowing them. If you don't want to allow them, and only want to allow valid
+ * domain names (RFC 1035, x.y.z.com, etc), and specifically only those with at least two levels
+ * ("example.com"), then don't include this critera.
+ */
+ ALLOW_DOMAIN_LITERALS,
+ /**
+ * This criteria states that as per RFC 2822, quoted identifiers are allowed (using quotes and angle brackets around the raw address), e.g.:
+ * The RFC says this is a valid mailbox. If you don't want to allow this, because for example, you only want users to enter in a raw address
+ * (john.smith@somewhere.com - no quotes or angle brackets), then don't include this criteria.
+ */
+ ALLOW_QUOTED_IDENTIFIERS,
+ /**
+ * This criteria allows "." to appear in atext (note: only atext which appears in the 2822 "name-addr" part of the address, not the
+ * other instances)
+ * Kayaks.org <kayaks@kayaks.org> Bob K. Smith<bobksmith@bob.net>
+ * "Kayaks.org" <kayaks@kayaks.org> "Bob K. Smith"
+ * <bobksmith@bob.net>
+ * [Kayaks] <kayaks@kayaks.org> ...is not valid. It should be: "[Kayaks]" <kayaks@kayaks.org>
+ * e.g.: "bob smith" <bob@example.com> (Bobby)
+ * Note that <""> is not a valid return-path.
+ */
+ public static boolean isValidReturnPath(String email, EnumSet <(my > path) > ...are legal return-paths and may throw a simpler parser off. However this method will return all CFWS
+ * (comments, whitespace) that may be between the brackets as well. So the example above will return: (my > path)_ "hi" <bob@smith.com> will return an
+ * address, so the moral is that you may want to check isValidReturnPath() first, if you care. This method is useful if you trust the return path and want
+ * to extract a clean address from it without CFWS (getReturnPathBracketContents() will return any CFWS), or if you want to determine if a validated return
+ * path actually contains an address in it and isn't just empty or full of CFWS. Except for empty return paths (those lacking an address) the Return-Path
+ * specification is a subset of valid 2822 addresses, so this method will work on all non-empty return-paths, failing only on the empty ones.
+ * (respectively) If parsing headers, however, you'll probably be calling
+ * extractHeaderAddresses().
+ * true
.
- *
- * @author Benny Bottema
- * @see #EmailAddressValidationCriteria(boolean, boolean)
- */
-public class EmailAddressValidationCriteria {
-
- private final boolean allowDomainLiterals;
- private final boolean allowQuotedIdentifiers;
-
- /**
- * Criteria which is most RFC 2822 compliant and allows all compliant address forms, including the more exotic ones.
- *
- * @see #EmailAddressValidationCriteria(boolean, boolean)
- */
- public static final EmailAddressValidationCriteria RFC_COMPLIANT = new EmailAddressValidationCriteria(true, true);
-
- /**
- * @param allowDomainLiterals
- *
- * @param allowQuotedIdentifiers
- * john.doe@[23:33:A2:22:16:1F] or
- * me@[my computer]
- *
- *
- */
- public EmailAddressValidationCriteria(boolean allowDomainLiterals, boolean allowQuotedIdentifiers) {
- this.allowDomainLiterals = allowDomainLiterals;
- this.allowQuotedIdentifiers = allowQuotedIdentifiers;
- }
-
- public final boolean isAllowDomainLiterals() {
- return allowDomainLiterals;
- }
-
- public final boolean isAllowQuotedIdentifiers() {
- return allowQuotedIdentifiers;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/org/codemonkey/simplejavamail/util/EmailValidationUtil.java b/src/main/java/org/codemonkey/simplejavamail/util/EmailValidationUtil.java
deleted file mode 100644
index cc368b543..000000000
--- a/src/main/java/org/codemonkey/simplejavamail/util/EmailValidationUtil.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright 2008 Les Hazlewood Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
- * file except in compliance with the License. You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- * either express or implied. See the License for the specific language governing permissions and limitations under the
- * License.
- */
-package org.codemonkey.simplejavamail.util;
-
-import java.util.regex.Pattern;
-
-/**
- * Validates an email address according to RFC 2822, using regular expressions.
- *
- * If you use this code, please keep the author information in tact and reference my site at leshazlewood.com. Thanks!
- * true
to RFC 2822. This means allowing both domain
- * literals and quoted identifiers.
- *
- * @param email A complete email address.
- * @return Whether the e-mail address is compliant with RFC 2822.
- * @see EmailAddressValidationCriteria#RFC_COMPLIANT
- */
- public static boolean isValid(final String email) {
- return isValid(email, EmailAddressValidationCriteria.RFC_COMPLIANT);
- }
-
- /**
- * Validates an e-mail with given validation flags.
- *
- * @param email A complete email address.
- * @param emailAddressValidationCriteria A set of flags that restrict or relax RFC 2822 compliance.
- * @return Whether the e-mail address is compliant with RFC 2822, configured using the passed in {@link EmailAddressValidationCriteria}.
- * @see EmailAddressValidationCriteria#RFC_COMPLIANT
- */
- public static boolean isValid(final String email, final EmailAddressValidationCriteria emailAddressValidationCriteria) {
- return buildValidEmailPattern(emailAddressValidationCriteria).matcher(email).matches();
- }
-
- protected static Pattern buildValidEmailPattern(EmailAddressValidationCriteria parameterObject) {
- // RFC 2822 2.2.2 Structured Header Field Bodies
- final String wsp = "[ \\t]"; // space or tab
- final String fwsp = wsp + "*";
- // RFC 2822 3.2.1 Primitive tokens
- final String dquote = "\\\"";
- // ASCII Control characters excluding white space:
- final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";
- // all ASCII characters except CR and LF:
- final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";
- // RFC 2822 3.2.2 Quoted characters:
- // single backslash followed by a text char
- final String quotedPair = "(\\\\" + asciiText + ")";
- // RFC 2822 3.2.4 Atom:
- final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]";
- final String atom = fwsp + atext + "+" + fwsp;
- final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*";
- final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp;
- // RFC 2822 3.2.5 Quoted strings:
- // noWsCtl and the rest of ASCII except the doublequote and backslash characters:
- final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
- final String qcontent = "(" + qtext + "|" + quotedPair + ")";
- final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote;
- // RFC 2822 3.2.6 Miscellaneous tokens
- final String word = "((" + atom + ")|(" + quotedString + "))";
- final String phrase = word + "+"; // one or more words.
- // RFC 1035 tokens for domain names:
- final String letter = "[a-zA-Z]";
- final String letDig = "[a-zA-Z0-9]";
- final String letDigHyp = "[a-zA-Z0-9-]";
- final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?";
- final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";
- // RFC 2822 3.4 Address specification
- // domain text - non white space controls and the rest of ASCII chars not including [, ], or \:
- final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
- final String dcontent = dtext + "|" + quotedPair;
- final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]";
- final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")";
- final String domain = parameterObject.isAllowDomainLiterals() ? rfc2822Domain : rfc1035DomainName;
- final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
- final String addrSpec = localPart + "@" + domain;
- final String angleAddr = "<" + addrSpec + ">";
- final String nameAddr = "(" + phrase + ")?" + fwsp + angleAddr;
- final String mailbox = nameAddr + "|" + addrSpec;
- // now compile a pattern for efficient re-use:
- // if we're allowing quoted identifiers or not:
- final String patternString = parameterObject.isAllowQuotedIdentifiers() ? mailbox : addrSpec;
- return Pattern.compile(patternString);
- }
-}
\ No newline at end of file
diff --git a/src/main/java/org/hazlewood/connor/bottema/emailaddress/Dragons.java b/src/main/java/org/hazlewood/connor/bottema/emailaddress/Dragons.java
new file mode 100644
index 000000000..1c59799bd
--- /dev/null
+++ b/src/main/java/org/hazlewood/connor/bottema/emailaddress/Dragons.java
@@ -0,0 +1,358 @@
+package org.hazlewood.connor.bottema.emailaddress;
+
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+/**
+ * MY DRAGONS WILL EAT YOUR DRAGONS
+ * true
.
+ *
+ * @author Benny Bottema
+ */
+public enum EmailAddressCriteria {
+ /**
+ * This criteria changes the behavior of the domain parsing. If included, the parser will allow 2822 domains, which include single-level domains (e.g.
+ * bob@localhost) as well as domain literals, e.g.:
john.doe@[23:33:A2:22:16:1F] or
me@[my
+ * computer]ALLOW_DOMAIN_LITERALS
criteria is not included, I think this should be pretty safe. Whether or not it's useful, that's up to
+ * you.
+ */
+ ALLOW_SQUARE_BRACKETS_IN_A_TEXT,
+ /**
+ * This criteria allows as per RFC 2822 ")" or "(" to appear in quoted versions of the localpart (they are never allowed in unquoted
+ * versions)
+ *
+ * You can disallow it, but better to include this criteria. I left this hanging around (from an earlier incarnation of the code) as a random option you
+ * can
+ * switch off. No, it's not necssarily useful. Long story.
+ *
+ * If this criteria is not included, it will prevent such addresses from being valid, even though they are: "bob(hi)smith"@test.com
+ */
+ ALLOW_PARENS_IN_LOCALPART;
+
+ /**
+ * The default setting is not strictly 2822 compliant. For example, it does not include the {@link #ALLOW_DOMAIN_LITERALS} criteria, which results in
+ * exclusions on single domains.
+ *
+ * Included in the defaults are:
+ */
+ public static final EnumSet
Regarding the parameter extractCfwsPersonalNames
:
+ *
+ * This criteria controls the behavior of getInternetAddress and extractHeaderAddresses. If included, it allows the
+ * not-totally-kosher-but-happens-in-the-real-world practice of:
+ *
+ * <bob@example.com> (Bob Smith)
+ *
+ * In this case, "Bob Smith" is not techinically the personal name, just a comment. If this is included, the methods will convert this into: Bob Smith
+ * <bob@example.com>
+ *
+ * This also happens somewhat more often and appropriately with
+ *
+ * mailer-daemon@blah.com (Mail Delivery System)
+ *
+ * If a personal name appears to the left and CFWS appears to the right of an address, the methods will favor the personal name to the left. If the methods need
+ * to use the CFWS following the address, they will take the first comment token they find.
will yield personal name "bob smith"
<bob@example.com> (Bobby)
will yield personal name "Bobby"
+ *
bob@example.com (Bobby)
will yield personal name "Bobby"
bob@example.com (Bob) (Smith)
will yield personal name
+ * "Bob"
+ */
+public final class EmailAddressParser {
+ /**
+ * Private constructor; this is a utility class with static methods only, not designed for extension.
+ */
+ private EmailAddressParser() {
+ //
+ }
+
+ /**
+ * Tells us if the email represents a valid return path header string.
+ *
+ * NOTE: legit forms like <(comment here)> will return true.
+ *
+ * You can check isValidReturnPath(), and if it is true, and if getInternetAddress() returns null, you know you have a DSN, whether it be an empty return
+ * path or one with only CFWS inside the brackets (which is legit, as demonstated above). Note that you can also simply call getReturnPathAddress() to have
+ * that operation done for you.
(where the _ is the
+ * trailing space from the original string)
+ */
+ public static String getReturnPathBracketContents(String email, EnumSet
InternetAddress addr2 = InternetAddress.parse(addr.toString());
+ *
+ * The InternetAddress objects returned by this method will not do any decoding of RFC-2047 encoded personal names. See the documentation for this overall
+ * class (above) for more.
+ *
+ * Again, all other uses of that addr object should work OK. It is recommended that if you are using this class that you never create an InternetAddress
+ * object using InternetAddress's own constructors or parsing methods; rather, retrieve them through this class. Perhaps the addr.clone() would work OK,
+ * though.
+ *
+ * The personal name will include any and all phrase token(s) to the left of the address, if they exist, and the string will be trim()'ed, but note that
+ * InternetAddress, when generating the getPersonal() result or the toString() result, if it encounters any quotes or backslashes in the personal name
+ * String, will put the entire thing in a big quoted-escaped chunk.
+ *
+ * This will do some smart unescaping to prevent that from happening unnecessarily; specifically, if there are unecessary quotes around a personal name, it
+ * will remove them. E.g.
+ *
+ * "Bob" <bob@hi.com>
becomes:
Bob <bob@hi.com>
+ *
+ * (apologies to bob@hi.com for everything i've done to him)
+ *
+ * @param extractCfwsPersonalNames See {@link EmailAddressParser}
+ */
+ public static InternetAddress getInternetAddress(String email, EnumSet
result[1] + "@" + result[2]
+ *
+ *
+ * @param extractCfwsPersonalNames See {@link EmailAddressParser}
+ * @return a three-element array containing the personal name String, local part String, and the domain part String of the address, in that order, without
+ * the @; will return null if the address is invalid; if it is valid this will not return null but the personal name (at index 0) may be null
+ */
+ public static String[] getAddressParts(String email, EnumSet
Historie:
+ *
+ * Started with code by Les Hazlewood: leshazlewood.com.
+ *
+ * Modified/added (Casey Connor): removed some functions, added support for CFWS token, corrected FWSP token, added some boolean flags, added getInternetAddress
+ * and extractHeaderAddresses and other methods, some optimization.
+ *
+ * Modified/added (Benny Bottema): modularized the code and separated configuration, validation and extraction functions.
+ *
+ * Where Mr. Hazlewood's version was more for ensuring certain forms that were passed in during registrations, etc, this handles more types of verifying as well
+ * a few forms of extracting the data in predictable, cleaned-up chunks.
+ *
+ * Note: CFWS means the "comment folded whitespace" token from 2822, in other words, whitespace and comment text that is enclosed in ()'s.
+ *
+ * Limitations: doesn't support nested CFWS (comments within (other) comments), doesn't support mailbox groups except when flat-extracting addresses from
+ * headers or when doing verification, doesn't support any of the obs-* tokens. Also: the getInternetAddress and extractHeaderAddresses methods return
+ * InternetAddress objects; if the personal name has any quotes or \'s in it at all, the InternetAddress object will always escape the name entirely and put it
+ * in quotes, so multiple-token personal names with those characters somewhere in them will always be munged into one big escaped string. This is not really a
+ * big deal at all, but I mention it anyway. (And you could get around it by a simple modification to those methods to not use InternetAddress objects.) See the
+ * docs of those methods for more info.
+ *
+ * Note: Unlike InternetAddress, this class will preserve any RFC-2047-encoding of international characters. Thus doing my_internetaddress.getPersonal() will
+ * return the 2047-encoded string, ready for use in an RFC-822-compliant message, whereas the common InternetAddress constructor (when used outside the context
+ * of EmailAddressValidator) would return the decoded version of the text, if any was needed. If you need the decoded form, you can do something like this
+ * (where ia is the InternetAddress object returned from an EmailAddressValidator method):
+ *
+ * ia.setPersonal(javax.mail.internet.MimeUtility.decodeText(ia.getPersonal()));
+ *
+ * ...subsequent calls to ia.getPersonal() will then return the decoded text.
+ *
+ * Note: This class does not do any header-length-checking. There are no such limitations on the email address grammar in 2822, though email headers in general
+ * do have length restrictions. So if the return path is 40000 unfolded characters long, but otherwise valid under 2822, this class will pass it.
+ *
+ * Examples of passing (2822-valid) addresses, believe it or not:
+ *
+ * bob @example.com
"bob" @ example.com
bob (comment) (other comment) @example.com (personal name)
+ *
"<bob \" (here) " < (hi there) "bob(the man)smith" (hi) @ (there) example.com (hello) > (again)
+ *
+ * (none of which are permitted by javamail's InternetAddress parsing, incidentally)
+ *
+ * By using getInternetAddress(), you can retrieve an InternetAddress object that, when toString()'ed, would reveal that the parser had converted the above
+ * into:
+ *
+ * <bob@example.com>
<bob@example.com>
"personal name" <bob@example.com>
"<bob
+ * \" (here)" <"bob(the man)smith"@example.com> true
to RFC 2822.
+ *
+ * @param email A string representing an email addres.
+ * @return Whether the e-mail address is compliant with RFC 2822.
+ * @see EmailAddressCriteria#RFC_COMPLIANT
+ */
+ public static boolean isValid(final String email) {
+ return isValid(email, EmailAddressCriteria.RFC_COMPLIANT);
+ }
+
+ /**
+ * Using the given validation criteria, checks to see if the specified string is a valid email address according to the RFC 2822 specification, which is
+ * remarkably squirrely. See doc for this class: 2822 not fully implemented, but probably close enough for almost any needs. Note that things like
+ * spaces
+ * in addresses ("bob @hi.com") are valid according to 2822! Read the docs for this class before using this method!
+ *
+ * If being used on a 2822 header, this method applies to Sender, Resent-Sender, only, although you can also use it on the Return-Path if you
+ * know it
+ * to be non-empty (see doc for isValidReturnPath()!). Folded header lines should work OK, but I haven't tested that.
+ *
+ * @param email A complete email address.
+ * @param criteria A set of criteria flags that restrict or relax RFC 2822 compliance.
+ * @return Whether the e-mail address is compliant with RFC 2822, configured using the passed in {@link EmailAddressCriteria}.
+ * @see EmailAddressCriteria
+ */
+ public static boolean isValid(final String email, final EnumSet