X-Git-Url: https://git.pterodactylus.net/?p=Sone.git;a=blobdiff_plain;f=src%2Fmain%2Fjava%2Fnet%2Fpterodactylus%2Fsone%2Ftext%2FSoneTextParser.java;h=39d26dbc6bcde786ea48a0d702cb595005033e10;hp=6e84c70e64ed9c427ec578dc69acf31efd6175a3;hb=a21cf1224cebf99f3210883efbf4afb9bd8da87c;hpb=7bf3f34153d68c6c74245e24d0957aa11e93b056 diff --git a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java index 6e84c70..39d26db 100644 --- a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java +++ b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java @@ -1,5 +1,5 @@ /* - * Sone - SoneTextParser.java - Copyright © 2010–2013 David Roden + * Sone - SoneTextParser.java - Copyright © 2010–2016 David Roden * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,24 +17,36 @@ package net.pterodactylus.sone.text; +import static com.google.common.base.Optional.absent; +import static com.google.common.base.Optional.of; +import static java.util.logging.Logger.getLogger; + import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; +import java.io.StringReader; import java.net.MalformedURLException; +import java.util.ArrayList; +import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.google.common.base.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; -import net.pterodactylus.sone.core.PostProvider; -import net.pterodactylus.sone.core.SoneProvider; import net.pterodactylus.sone.data.Post; import net.pterodactylus.sone.data.Sone; -import net.pterodactylus.util.io.Closer; -import net.pterodactylus.util.logging.Logging; +import net.pterodactylus.sone.data.impl.IdOnlySone; +import net.pterodactylus.sone.database.PostProvider; +import net.pterodactylus.sone.database.SoneProvider; + +import com.google.common.base.Optional; +import org.bitpedia.util.Base32; + import freenet.keys.FreenetURI; +import freenet.support.Base64; /** * {@link Parser} implementation that can recognize Freenet URIs. @@ -44,53 +56,97 @@ import freenet.keys.FreenetURI; public class SoneTextParser implements Parser { /** The logger. */ - private static final Logger logger = Logging.getLogger(SoneTextParser.class); + private static final Logger logger = getLogger(SoneTextParser.class.getName()); /** Pattern to detect whitespace. */ private static final Pattern whitespacePattern = Pattern.compile("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]"); - /** - * Enumeration for all recognized link types. - * - * @author David ‘Bombe’ Roden - */ - private enum LinkType { + private static class NextLink { + + private final int position; + private final String link; + private final String remainder; + private final LinkType linkType; + + private NextLink(int position, String link, String remainder, LinkType linkType) { + this.position = position; + this.link = link; + this.remainder = remainder; + this.linkType = linkType; + } - /** Link is a KSK. */ - KSK("KSK@"), + public int getPosition() { + return position; + } - /** Link is a CHK. */ - CHK("CHK@"), + public String getLink() { + return link; + } - /** Link is an SSK. */ - SSK("SSK@"), + public String getRemainder() { + return remainder; + } - /** Link is a USK. */ - USK("USK@"), + public LinkType getLinkType() { + return linkType; + } - /** Link is HTTP. */ - HTTP("http://"), + } - /** Link is HTTPS. */ - HTTPS("https://"), + /** + * Enumeration for all recognized link types. + * + * @author David ‘Bombe’ Roden + */ + private enum LinkType { - /** Link is a Sone. */ - SONE("sone://"), + KSK("KSK@", true), + CHK("CHK@", true), + SSK("SSK@", true), + USK("USK@", true), + HTTP("http://", false), + HTTPS("https://", false), + SONE("sone://", false), + POST("post://", false), + + FREEMAIL("", true) { + @Override + public Optional findNext(String line) { + int nextFreemailSuffix = line.indexOf(".freemail"); + if (nextFreemailSuffix < 54) { + /* 52 chars for the id, 1 on @, at least 1 for the local part. */ + return absent(); + } + if (line.charAt(nextFreemailSuffix - 53) != '@') { + return absent(); + } + if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches("^[a-z2-7]*$")) { + return absent(); + } + int startOfLocalPart = nextFreemailSuffix - 54; + if (!isAllowedInLocalPart(line.charAt(startOfLocalPart))) { + return absent(); + } + while ((startOfLocalPart > 0) && isAllowedInLocalPart(line.charAt(startOfLocalPart - 1))) { + startOfLocalPart--; + } + return of(new NextLink(startOfLocalPart, line.substring(startOfLocalPart, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9), this)); + } - /** Link is a post. */ - POST("post://"); + private boolean isAllowedInLocalPart(char character) { + return ((character >= 'A') && (character <= 'Z')) + || ((character >= 'a') && (character <= 'z')) + || ((character >= '0') && (character <= '9')) + || (character == '.') || (character == '-') || (character == '_'); + } + }; - /** The scheme identifying this link type. */ private final String scheme; + private final boolean freenetLink; - /** - * Creates a new link type identified by the given scheme. - * - * @param scheme - * The scheme of the link type - */ - private LinkType(String scheme) { + LinkType(String scheme, boolean freenetLink) { this.scheme = scheme; + this.freenetLink = freenetLink; } /** @@ -102,6 +158,42 @@ public class SoneTextParser implements Parser { return scheme; } + public boolean isFreenetLink() { + return freenetLink; + } + + public Optional findNext(String line) { + int nextLinkPosition = line.indexOf(getScheme()); + if (nextLinkPosition == -1) { + return absent(); + } + int endOfLink = findEndOfLink(line.substring(nextLinkPosition)); + return of(new NextLink(nextLinkPosition, line.substring(nextLinkPosition, nextLinkPosition + endOfLink), line.substring(nextLinkPosition + endOfLink), this)); + } + + private static int findEndOfLink(String line) { + Matcher matcher = whitespacePattern.matcher(line); + int endOfLink = matcher.find() ? matcher.start() : line.length(); + while (isPunctuation(line.charAt(endOfLink - 1))) { + endOfLink--; + } + int openParens = 0; + for (int i = 0; i < endOfLink; i++) { + switch (line.charAt(i)) { + case '(': + openParens++; + break; + case ')': + openParens--; + if (openParens < 0) { + return i; + } + default: + } + } + return endOfLink; + } + } /** The Sone provider. */ @@ -130,11 +222,12 @@ public class SoneTextParser implements Parser { /** * {@inheritDoc} */ + @Nonnull @Override - public Iterable parse(SoneTextParserContext context, Reader source) throws IOException { - PartContainer parts = new PartContainer(); - BufferedReader bufferedReader = (source instanceof BufferedReader) ? (BufferedReader) source : new BufferedReader(source); - try { + public Iterable parse(@Nonnull String source, @Nullable SoneTextParserContext context) { + List parts = new ArrayList<>(); + try (Reader sourceReader = new StringReader(source); + BufferedReader bufferedReader = new BufferedReader(sourceReader)) { String line; boolean lastLineEmpty = true; int emptyLines = 0; @@ -156,15 +249,8 @@ public class SoneTextParser implements Parser { */ boolean lineComplete = true; while (line.length() > 0) { - int nextKsk = line.indexOf("KSK@"); - int nextChk = line.indexOf("CHK@"); - int nextSsk = line.indexOf("SSK@"); - int nextUsk = line.indexOf("USK@"); - int nextHttp = line.indexOf("http://"); - int nextHttps = line.indexOf("https://"); - int nextSone = line.indexOf("sone://"); - int nextPost = line.indexOf("post://"); - if ((nextKsk == -1) && (nextChk == -1) && (nextSsk == -1) && (nextUsk == -1) && (nextHttp == -1) && (nextHttps == -1) && (nextSone == -1) && (nextPost == -1)) { + Optional nextLink = findNextLink(line); + if (!nextLink.isPresent()) { if (lineComplete && !lastLineEmpty) { parts.add(new PlainTextPart("\n" + line)); } else { @@ -172,43 +258,11 @@ public class SoneTextParser implements Parser { } break; } - int next = Integer.MAX_VALUE; - LinkType linkType = null; - if ((nextKsk > -1) && (nextKsk < next)) { - next = nextKsk; - linkType = LinkType.KSK; - } - if ((nextChk > -1) && (nextChk < next)) { - next = nextChk; - linkType = LinkType.CHK; - } - if ((nextSsk > -1) && (nextSsk < next)) { - next = nextSsk; - linkType = LinkType.SSK; - } - if ((nextUsk > -1) && (nextUsk < next)) { - next = nextUsk; - linkType = LinkType.USK; - } - if ((nextHttp > -1) && (nextHttp < next)) { - next = nextHttp; - linkType = LinkType.HTTP; - } - if ((nextHttps > -1) && (nextHttps < next)) { - next = nextHttps; - linkType = LinkType.HTTPS; - } - if ((nextSone > -1) && (nextSone < next)) { - next = nextSone; - linkType = LinkType.SONE; - } - if ((nextPost > -1) && (nextPost < next)) { - next = nextPost; - linkType = LinkType.POST; - } + LinkType linkType = nextLink.get().getLinkType(); + int next = nextLink.get().getPosition(); /* cut off “freenet:” from before keys. */ - if (((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) && (next >= 8) && (line.substring(next - 8, next).equals("freenet:"))) { + if (linkType.isFreenetLink() && (next >= 8) && (line.substring(next - 8, next).equals("freenet:"))) { next -= 8; line = line.substring(0, next) + line.substring(next + 8); } @@ -220,16 +274,11 @@ public class SoneTextParser implements Parser { if (next > 0) { parts.add(new PlainTextPart(line.substring(0, next))); line = line.substring(next); - next = 0; } lineComplete = false; - Matcher matcher = whitespacePattern.matcher(line); - int nextSpace = matcher.find(0) ? matcher.start() : line.length(); - String link = line.substring(0, nextSpace); - String name = link; + String link = nextLink.get().getLink(); logger.log(Level.FINER, String.format("Found link: %s", link)); - logger.log(Level.FINEST, String.format("CHK: %d, SSK: %d, USK: %d", nextChk, nextSsk, nextUsk)); /* if there is no text after the scheme, it’s not a link! */ if (link.equals(linkType.getScheme())) { @@ -238,106 +287,145 @@ public class SoneTextParser implements Parser { continue; } - if (linkType == LinkType.SONE) { - if (line.length() >= (7 + 43)) { - String soneId = line.substring(7, 50); - Sone sone = soneProvider.getSone(soneId, false); - if (sone == null) { - /* - * don’t use create=true above, we don’t want - * the empty shell. - */ - sone = new Sone(soneId, false); - } - parts.add(new SonePart(sone)); - line = line.substring(50); - } else { - parts.add(new PlainTextPart(line)); - line = ""; - } - continue; - } - if (linkType == LinkType.POST) { - if (line.length() >= (7 + 36)) { - String postId = line.substring(7, 43); - Optional post = postProvider.getPost(postId); - if (post.isPresent()) { - parts.add(new PostPart(post.get())); - } else { - parts.add(new PlainTextPart(line.substring(0, 43))); - } - line = line.substring(43); - } else { - parts.add(new PlainTextPart(line)); - line = ""; - } - continue; + switch (linkType) { + case SONE: + renderSoneLink(parts, link); + break; + case POST: + renderPostLink(parts, link); + break; + case KSK: + case CHK: + case SSK: + case USK: + renderFreenetLink(parts, link, linkType, context); + break; + case HTTP: + case HTTPS: + renderHttpLink(parts, link, linkType); + break; + case FREEMAIL: + renderFreemailLink(parts, link); } - if ((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) { - FreenetURI uri; - if (name.indexOf('?') > -1) { - name = name.substring(0, name.indexOf('?')); - } - if (name.endsWith("/")) { - name = name.substring(0, name.length() - 1); - } - try { - uri = new FreenetURI(name); - name = uri.lastMetaString(); - if (name == null) { - name = uri.getDocName(); - } - if (name == null) { - name = link.substring(0, Math.min(9, link.length())); - } - boolean fromPostingSone = ((linkType == LinkType.SSK) || (linkType == LinkType.USK)) && (context != null) && (context.getPostingSone() != null) && link.substring(4, Math.min(link.length(), 47)).equals(context.getPostingSone().getId()); - parts.add(new FreenetLinkPart(link, name, fromPostingSone)); - } catch (MalformedURLException mue1) { - /* not a valid link, insert as plain text. */ - parts.add(new PlainTextPart(link)); - } catch (NullPointerException npe1) { - /* FreenetURI sometimes throws these, too. */ - parts.add(new PlainTextPart(link)); - } catch (ArrayIndexOutOfBoundsException aioobe1) { - /* oh, and these, too. */ - parts.add(new PlainTextPart(link)); - } - } else if ((linkType == LinkType.HTTP) || (linkType == LinkType.HTTPS)) { - name = link.substring(linkType == LinkType.HTTP ? 7 : 8); - int firstSlash = name.indexOf('/'); - int lastSlash = name.lastIndexOf('/'); - if ((lastSlash - firstSlash) > 3) { - name = name.substring(0, firstSlash + 1) + "…" + name.substring(lastSlash); - } - if (name.endsWith("/")) { - name = name.substring(0, name.length() - 1); - } - if (((name.indexOf('/') > -1) && (name.indexOf('.') < name.lastIndexOf('.', name.indexOf('/'))) || ((name.indexOf('/') == -1) && (name.indexOf('.') < name.lastIndexOf('.')))) && name.startsWith("www.")) { - name = name.substring(4); - } - if (name.indexOf('?') > -1) { - name = name.substring(0, name.indexOf('?')); - } - parts.add(new LinkPart(link, name)); - } - line = line.substring(nextSpace); + line = nextLink.get().getRemainder(); } lastLineEmpty = false; } - } finally { - if (bufferedReader != source) { - Closer.close(bufferedReader); - } + } catch (IOException ioe1) { + // a buffered reader around a string reader should never throw. + throw new RuntimeException(ioe1); } for (int partIndex = parts.size() - 1; partIndex >= 0; --partIndex) { - Part part = parts.getPart(partIndex); - if (!(part instanceof PlainTextPart) || !"\n".equals(((PlainTextPart) part).getText())) { + Part part = parts.get(partIndex); + if (!(part instanceof PlainTextPart) || !"\n".equals(part.getText())) { break; } - parts.removePart(partIndex); + parts.remove(partIndex); } return parts; } + public static Optional findNextLink(String line) { + int earliestLinkPosition = Integer.MAX_VALUE; + NextLink earliestNextLink = null; + for (LinkType possibleLinkType : LinkType.values()) { + Optional nextLink = possibleLinkType.findNext(line); + if (nextLink.isPresent()) { + if (nextLink.get().getPosition() < earliestLinkPosition) { + earliestNextLink = nextLink.get(); + earliestLinkPosition = earliestNextLink.getPosition(); + } + } + } + return Optional.fromNullable(earliestNextLink); + } + + private void renderSoneLink(List parts, String line) { + if (line.length() >= (7 + 43)) { + String soneId = line.substring(7, 50); + Optional sone = soneProvider.getSone(soneId); + parts.add(new SonePart(sone.or(new IdOnlySone(soneId)))); + } else { + parts.add(new PlainTextPart(line)); + } + } + + private void renderPostLink(List parts, String line) { + if (line.length() >= (7 + 36)) { + String postId = line.substring(7, 43); + Optional post = postProvider.getPost(postId); + if (post.isPresent()) { + parts.add(new PostPart(post.get())); + } else { + parts.add(new PlainTextPart(line.substring(0, 43))); + } + } else { + parts.add(new PlainTextPart(line)); + } + } + + private void renderFreenetLink(List parts, String link, LinkType linkType, @Nullable SoneTextParserContext context) { + String name = link; + String linkWithoutParameters = link; + if (name.indexOf('?') > -1) { + linkWithoutParameters = name = name.substring(0, name.indexOf('?')); + } + if (name.endsWith("/")) { + name = name.substring(0, name.length() - 1); + } + try { + FreenetURI uri = new FreenetURI(name); + name = uri.lastMetaString(); + if (name == null) { + name = uri.getDocName(); + } + if (name == null) { + name = link.substring(0, Math.min(9, link.length())); + } + boolean fromPostingSone = ((linkType == LinkType.SSK) || (linkType == LinkType.USK)) && (context != null) && (context.getPostingSone() != null) && link.substring(4, Math.min(link.length(), 47)).equals(context.getPostingSone().getId()); + parts.add(new FreenetLinkPart(link, name, linkWithoutParameters, fromPostingSone)); + } catch (MalformedURLException mue1) { + /* not a valid link, insert as plain text. */ + parts.add(new PlainTextPart(link)); + } catch (NullPointerException npe1) { + /* FreenetURI sometimes throws these, too. */ + parts.add(new PlainTextPart(link)); + } catch (ArrayIndexOutOfBoundsException aioobe1) { + /* oh, and these, too. */ + parts.add(new PlainTextPart(link)); + } + } + + private void renderHttpLink(List parts, String link, LinkType linkType) { + String name = link.substring(linkType == LinkType.HTTP ? 7 : 8); + int firstSlash = name.indexOf('/'); + int lastSlash = name.lastIndexOf('/'); + if ((lastSlash - firstSlash) > 3) { + name = name.substring(0, firstSlash + 1) + "…" + name.substring(lastSlash); + } + if (name.endsWith("/")) { + name = name.substring(0, name.length() - 1); + } + if (((name.indexOf('/') > -1) && (name.indexOf('.') < name.lastIndexOf('.', name.indexOf('/'))) || ((name.indexOf('/') == -1) && (name.indexOf('.') < name.lastIndexOf('.')))) && name.startsWith("www.")) { + name = name.substring(4); + } + if (name.indexOf('?') > -1) { + name = name.substring(0, name.indexOf('?')); + } + parts.add(new LinkPart(link, name)); + } + + private void renderFreemailLink(List parts, String line) { + int separator = line.indexOf('@'); + String freemailId = line.substring(separator + 1, separator + 53); + String identityId = Base64.encode(Base32.decode(freemailId)); + String emailLocalPart = line.substring(0, separator); + parts.add(new FreemailPart(emailLocalPart, freemailId, identityId)); + } + + private static boolean isPunctuation(char character) { + return (character == '.') || (character == ',') || (character == '!') || (character == '?'); + } + }