From 80b86564df8368a4e4b6d0730f681f5dc79776b4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= Date: Sat, 29 Oct 2016 00:29:07 +0200 Subject: [PATCH] Parse freemail addresses --- .../net/pterodactylus/sone/text/FreemailPart.java | 37 ++++ .../pterodactylus/sone/text/SoneTextParser.java | 188 ++++++++++++++------- .../pterodactylus/sone/text/FreemailPartTest.java | 37 ++++ .../sone/text/SoneTextParserTest.java | 40 +++++ 4 files changed, 240 insertions(+), 62 deletions(-) create mode 100644 src/main/java/net/pterodactylus/sone/text/FreemailPart.java create mode 100644 src/test/java/net/pterodactylus/sone/text/FreemailPartTest.java diff --git a/src/main/java/net/pterodactylus/sone/text/FreemailPart.java b/src/main/java/net/pterodactylus/sone/text/FreemailPart.java new file mode 100644 index 0000000..69e6bfe --- /dev/null +++ b/src/main/java/net/pterodactylus/sone/text/FreemailPart.java @@ -0,0 +1,37 @@ +package net.pterodactylus.sone.text; + +/** + * {@link Part} implementation that holds a freemail address. + * + * @author David ‘Bombe’ Roden + */ +public class FreemailPart implements Part { + + private final String emailLocalPart; + private final String freemailId; + private final String identityId; + + public FreemailPart(String emailLocalPart, String freemailId, String identityId) { + this.emailLocalPart = emailLocalPart; + this.freemailId = freemailId; + this.identityId = identityId; + } + + @Override + public String getText() { + return String.format("%s@%s.freemail", emailLocalPart, freemailId); + } + + public String getEmailLocalPart() { + return emailLocalPart; + } + + public String getFreemailId() { + return freemailId; + } + + public String getIdentityId() { + return identityId; + } + +} diff --git a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java index bd389e1..88efd16 100644 --- a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java +++ b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java @@ -17,6 +17,8 @@ package net.pterodactylus.sone.text; +import static com.google.common.base.Optional.absent; +import static com.google.common.base.Optional.of; import static java.util.logging.Logger.getLogger; import java.io.BufferedReader; @@ -39,8 +41,10 @@ import net.pterodactylus.sone.database.PostProvider; import net.pterodactylus.sone.database.SoneProvider; import com.google.common.base.Optional; +import org.bitpedia.util.Base32; import freenet.keys.FreenetURI; +import freenet.support.Base64; /** * {@link Parser} implementation that can recognize Freenet URIs. @@ -55,6 +59,38 @@ public class SoneTextParser implements Parser { /** Pattern to detect whitespace. */ private static final Pattern whitespacePattern = Pattern.compile("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]"); + private static class NextLink { + + private final int position; + private final String link; + private final String remainder; + private final LinkType linkType; + + private NextLink(int position, String link, String remainder, LinkType linkType) { + this.position = position; + this.link = link; + this.remainder = remainder; + this.linkType = linkType; + } + + public int getPosition() { + return position; + } + + public String getLink() { + return link; + } + + public String getRemainder() { + return remainder; + } + + public LinkType getLinkType() { + return linkType; + } + + } + /** * Enumeration for all recognized link types. * @@ -69,7 +105,39 @@ public class SoneTextParser implements Parser { HTTP("http://", false), HTTPS("https://", false), SONE("sone://", false), - POST("post://", false); + POST("post://", false), + + FREEMAIL("", true) { + @Override + public Optional findNext(String line) { + int nextFreemailSuffix = line.indexOf(".freemail"); + if (nextFreemailSuffix < 54) { + /* 52 chars for the id, 1 on @, at least 1 for the local part. */ + return absent(); + } + if (line.charAt(nextFreemailSuffix - 53) != '@') { + return absent(); + } + if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches("^[a-z2-7]*$")) { + return absent(); + } + int startOfLocalPart = nextFreemailSuffix - 54; + if (!isAllowedInLocalPart(line.charAt(startOfLocalPart))) { + return absent(); + } + while ((startOfLocalPart > 0) && isAllowedInLocalPart(line.charAt(startOfLocalPart - 1))) { + startOfLocalPart--; + } + return of(new NextLink(startOfLocalPart, line.substring(startOfLocalPart, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9), this)); + } + + private boolean isAllowedInLocalPart(char character) { + return ((character >= 'A') && (character <= 'Z')) + || ((character >= 'a') && (character <= 'z')) + || ((character >= '0') && (character <= '9')) + || (character == '.') || (character == '-') || (character == '_'); + } + }; private final String scheme; private final boolean freenetLink; @@ -92,6 +160,38 @@ public class SoneTextParser implements Parser { return freenetLink; } + public Optional findNext(String line) { + int nextLinkPosition = line.indexOf(getScheme()); + if (nextLinkPosition == -1) { + return absent(); + } + int endOfLink = findEndOfLink(line.substring(nextLinkPosition)); + return of(new NextLink(nextLinkPosition, line.substring(nextLinkPosition, nextLinkPosition + endOfLink), line.substring(nextLinkPosition + endOfLink), this)); + } + + private static int findEndOfLink(String line) { + Matcher matcher = whitespacePattern.matcher(line); + int endOfLink = matcher.find() ? matcher.start() : line.length(); + while (isPunctuation(line.charAt(endOfLink - 1))) { + endOfLink--; + } + int openParens = 0; + for (int i = 0; i < endOfLink; i++) { + switch (line.charAt(i)) { + case '(': + openParens++; + break; + case ')': + openParens--; + if (openParens < 0) { + return i; + } + default: + } + } + return endOfLink; + } + } /** The Sone provider. */ @@ -147,7 +247,7 @@ public class SoneTextParser implements Parser { */ boolean lineComplete = true; while (line.length() > 0) { - Optional nextLink = NextLink.findNextLink(line); + Optional nextLink = findNextLink(line); if (!nextLink.isPresent()) { if (lineComplete && !lastLineEmpty) { parts.add(new PlainTextPart("\n" + line)); @@ -175,8 +275,7 @@ public class SoneTextParser implements Parser { } lineComplete = false; - int endOfLink = findEndOfLink(line); - String link = line.substring(0, endOfLink); + String link = nextLink.get().getLink(); logger.log(Level.FINER, String.format("Found link: %s", link)); /* if there is no text after the scheme, it’s not a link! */ @@ -203,9 +302,11 @@ public class SoneTextParser implements Parser { case HTTPS: renderHttpLink(parts, link, linkType); break; + case FREEMAIL: + renderFreemailLink(parts, link); } - line = line.substring(endOfLink); + line = nextLink.get().getRemainder(); } lastLineEmpty = false; } @@ -223,6 +324,20 @@ public class SoneTextParser implements Parser { return parts; } + public static Optional findNextLink(String line) { + int earliestLinkPosition = Integer.MAX_VALUE; + NextLink earliestNextLink = null; + for (LinkType possibleLinkType : LinkType.values()) { + Optional nextLink = possibleLinkType.findNext(line); + if (nextLink.isPresent()) { + if (nextLink.get().getPosition() < earliestLinkPosition) { + earliestNextLink = nextLink.get(); + } + } + } + return Optional.fromNullable(earliestNextLink); + } + private void renderSoneLink(PartContainer parts, String line) { if (line.length() >= (7 + 43)) { String soneId = line.substring(7, 50); @@ -298,67 +413,16 @@ public class SoneTextParser implements Parser { parts.add(new LinkPart(link, name)); } - private int findEndOfLink(String line) { - Matcher matcher = whitespacePattern.matcher(line); - int endOfLink = matcher.find() ? matcher.start() : line.length(); - while ((endOfLink > 0) && isPunctuation(line.charAt(endOfLink - 1))) { - endOfLink--; - } - int openParens = 0; - for (int i = 0; i < endOfLink; i++) { - switch (line.charAt(i)) { - case '(': - openParens++; - break; - case ')': - openParens--; - if (openParens < 0) { - return i; - } - default: - } - } - return endOfLink; + private void renderFreemailLink(PartContainer parts, String line) { + int separator = line.indexOf('@'); + String freemailId = line.substring(separator + 1, separator + 53); + String identityId = Base64.encode(Base32.decode(freemailId)); + String emailLocalPart = line.substring(0, separator); + parts.add(new FreemailPart(emailLocalPart, freemailId, identityId)); } private static boolean isPunctuation(char character) { return (character == '.') || (character == ',') || (character == '!') || (character == '?'); } - private static class NextLink { - - private final int position; - private final LinkType linkType; - - private NextLink(int position, LinkType linkType) { - this.position = position; - this.linkType = linkType; - } - - public int getPosition() { - return position; - } - - public LinkType getLinkType() { - return linkType; - } - - public static Optional findNextLink(String line) { - int earliestLinkPosition = Integer.MAX_VALUE; - LinkType linkType = null; - for (LinkType possibleLinkType : LinkType.values()) { - int nextLinkPosition = line.indexOf(possibleLinkType.getScheme()); - if (nextLinkPosition > -1) { - if (nextLinkPosition < earliestLinkPosition) { - earliestLinkPosition = nextLinkPosition; - linkType = possibleLinkType; - } - } - } - return earliestLinkPosition < Integer.MAX_VALUE ? - Optional.of(new NextLink(earliestLinkPosition, linkType)) : Optional.absent(); - } - - } - } diff --git a/src/test/java/net/pterodactylus/sone/text/FreemailPartTest.java b/src/test/java/net/pterodactylus/sone/text/FreemailPartTest.java new file mode 100644 index 0000000..49972df --- /dev/null +++ b/src/test/java/net/pterodactylus/sone/text/FreemailPartTest.java @@ -0,0 +1,37 @@ +package net.pterodactylus.sone.text; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; + +import org.junit.Test; + +/** + * Unit test for {@link FreemailPart}. + * + * @author David ‘Bombe’ Roden + */ +public class FreemailPartTest { + + private final FreemailPart part = new FreemailPart("local", "freemail-id", "identity-id"); + + @Test + public void freemailPartRetainsEmailLocalPart() { + assertThat(part.getEmailLocalPart(), is("local")); + } + + @Test + public void freemailPartRetainsFreemailId() { + assertThat(part.getFreemailId(), is("freemail-id")); + } + + @Test + public void freemailPartRetainsIdentityId() { + assertThat(part.getIdentityId(), is("identity-id")); + } + + @Test + public void freemailPartReturnsCorrectText() { + assertThat(part.getText(), is("local@freemail-id.freemail")); + } + +} diff --git a/src/test/java/net/pterodactylus/sone/text/SoneTextParserTest.java b/src/test/java/net/pterodactylus/sone/text/SoneTextParserTest.java index 59987c3..8956ba4 100644 --- a/src/test/java/net/pterodactylus/sone/text/SoneTextParserTest.java +++ b/src/test/java/net/pterodactylus/sone/text/SoneTextParserTest.java @@ -17,6 +17,7 @@ package net.pterodactylus.sone.text; +import static java.lang.String.format; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.isIn; @@ -313,6 +314,42 @@ public class SoneTextParserTest { assertThat("Part Text", convertText(parts, PlainTextPart.class, LinkPart.class), is("A link: [http://example.sone/abc|example.sone/abc|example.sone/abc]?")); } + @Test + public void correctFreemailAddressIsLinkedToCorrectly() { + Iterable parts = soneTextParser.parse("Mail me at sone@t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra.freemail!", null); + assertThat("Part Text", convertText(parts), is("Mail me at [Freemail|sone|t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra|nwa8lHa271k2QvJ8aa0Ov7IHAV-DFOCFgmDt3X6BpCI]!")); + } + + @Test + public void freemailAddressWithInvalidFreemailIdIsParsedAsText() { + Iterable parts = soneTextParser.parse("Mail me at sone@t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqr8.freemail!", null); + assertThat("Part Text", convertText(parts), is("Mail me at sone@t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqr8.freemail!")); + } + + @Test + public void freemailAddressWithInvalidSizedFreemailIdIsParsedAsText() { + Iterable parts = soneTextParser.parse("Mail me at sone@4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra.freemail!", null); + assertThat("Part Text", convertText(parts), is("Mail me at sone@4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra.freemail!")); + } + + @Test + public void freemailAddressWithoutLocalPartIsParsedAsText() { + Iterable parts = soneTextParser.parse(" @t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra.freemail!", null); + assertThat("Part Text", convertText(parts), is(" @t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra.freemail!")); + } + + @Test + public void correctFreemailAddressIsParsedCorrectly() { + Iterable parts = soneTextParser.parse("sone@t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra.freemail", null); + assertThat("Part Text", convertText(parts), is("[Freemail|sone|t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra|nwa8lHa271k2QvJ8aa0Ov7IHAV-DFOCFgmDt3X6BpCI]")); + } + + @Test + public void localPartOfFreemailAddressCanContainLettersDigitsMinusDotUnderscore() { + Iterable parts = soneTextParser.parse("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._@t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra.freemail", null); + assertThat("Part Text", convertText(parts), is("[Freemail|ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._|t4dlzfdww3xvsnsc6j6gtliox6zaoak7ymkobbmcmdw527ubuqra|nwa8lHa271k2QvJ8aa0Ov7IHAV-DFOCFgmDt3X6BpCI]")); + } + /** * Converts all given {@link Part}s into a string, validating that the * part’s classes match only the expected classes. @@ -336,6 +373,9 @@ public class SoneTextParserTest { } else if (part instanceof FreenetLinkPart) { FreenetLinkPart freenetLinkPart = (FreenetLinkPart) part; text.append('[').append(freenetLinkPart.getLink()).append('|').append(freenetLinkPart.isTrusted() ? "trusted|" : "").append(freenetLinkPart.getTitle()).append('|').append(freenetLinkPart.getText()).append(']'); + } else if (part instanceof FreemailPart) { + FreemailPart freemailPart = (FreemailPart) part; + text.append(format("[Freemail|%s|%s|%s]", freemailPart.getEmailLocalPart(), freemailPart.getFreemailId(), freemailPart.getIdentityId())); } else if (part instanceof LinkPart) { LinkPart linkPart = (LinkPart) part; text.append('[').append(linkPart.getLink()).append('|').append(linkPart.getTitle()).append('|').append(linkPart.getText()).append(']'); -- 2.7.4