X-Git-Url: https://git.pterodactylus.net/?p=Sone.git;a=blobdiff_plain;f=src%2Fmain%2Fjava%2Fnet%2Fpterodactylus%2Fsone%2Ftext%2FSoneTextParser.java;h=b85f100bc8ebd67cb138707197ac075e03ba33af;hp=94c3db48818cfab8d3cebe3186df07480302db9d;hb=3a7092e48f27cba6286946442783f539ad73a911;hpb=282916447c6f35a5c461c58011c3fa7d1343cecc diff --git a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java index 94c3db4..b85f100 100644 --- a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java +++ b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java @@ -1,5 +1,5 @@ /* - * Sone - FreenetLinkParser.java - Copyright © 2010 David Roden + * Sone - SoneTextParser.java - Copyright © 2010–2013 David Roden * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,6 +17,8 @@ package net.pterodactylus.sone.text; +import static java.util.logging.Logger.getLogger; + import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; @@ -26,11 +28,15 @@ import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import net.pterodactylus.sone.core.PostProvider; -import net.pterodactylus.sone.core.SoneProvider; import net.pterodactylus.sone.data.Post; import net.pterodactylus.sone.data.Sone; -import net.pterodactylus.util.logging.Logging; +import net.pterodactylus.sone.data.impl.IdOnlySone; +import net.pterodactylus.sone.database.PostProvider; +import net.pterodactylus.sone.database.SoneProvider; +import net.pterodactylus.util.io.Closer; + +import com.google.common.base.Optional; + import freenet.keys.FreenetURI; /** @@ -41,7 +47,7 @@ import freenet.keys.FreenetURI; public class SoneTextParser implements Parser { /** The logger. */ - private static final Logger logger = Logging.getLogger(SoneTextParser.class); + private static final Logger logger = getLogger(SoneTextParser.class.getName()); /** Pattern to detect whitespace. */ private static final Pattern whitespacePattern = Pattern.compile("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]"); @@ -54,28 +60,50 @@ public class SoneTextParser implements Parser { private enum LinkType { /** Link is a KSK. */ - KSK, + KSK("KSK@"), /** Link is a CHK. */ - CHK, + CHK("CHK@"), /** Link is an SSK. */ - SSK, + SSK("SSK@"), /** Link is a USK. */ - USK, + USK("USK@"), /** Link is HTTP. */ - HTTP, + HTTP("http://"), /** Link is HTTPS. */ - HTTPS, + HTTPS("https://"), /** Link is a Sone. */ - SONE, + SONE("sone://"), /** Link is a post. */ - POST, + POST("post://"); + + /** The scheme identifying this link type. */ + private final String scheme; + + /** + * Creates a new link type identified by the given scheme. + * + * @param scheme + * The scheme of the link type + */ + private LinkType(String scheme) { + this.scheme = scheme; + } + + /** + * Returns the scheme of this link type. + * + * @return The scheme of this link type + */ + public String getScheme() { + return scheme; + } } @@ -109,130 +137,145 @@ public class SoneTextParser implements Parser { public Iterable parse(SoneTextParserContext context, Reader source) throws IOException { PartContainer parts = new PartContainer(); BufferedReader bufferedReader = (source instanceof BufferedReader) ? (BufferedReader) source : new BufferedReader(source); - String line; - boolean lastLineEmpty = true; - int emptyLines = 0; - while ((line = bufferedReader.readLine()) != null) { - if (line.trim().length() == 0) { - if (lastLineEmpty) { - continue; - } - parts.add(new PlainTextPart("\n")); - ++emptyLines; - lastLineEmpty = emptyLines == 2; - continue; - } - emptyLines = 0; - boolean lineComplete = true; - while (line.length() > 0) { - int nextKsk = line.indexOf("KSK@"); - int nextChk = line.indexOf("CHK@"); - int nextSsk = line.indexOf("SSK@"); - int nextUsk = line.indexOf("USK@"); - int nextHttp = line.indexOf("http://"); - int nextHttps = line.indexOf("https://"); - int nextSone = line.indexOf("sone://"); - int nextPost = line.indexOf("post://"); - if ((nextKsk == -1) && (nextChk == -1) && (nextSsk == -1) && (nextUsk == -1) && (nextHttp == -1) && (nextHttps == -1) && (nextSone == -1) && (nextPost == -1)) { - if (lineComplete && !lastLineEmpty) { - parts.add(new PlainTextPart("\n" + line)); - } else { - parts.add(new PlainTextPart(line)); + try { + String line; + boolean lastLineEmpty = true; + int emptyLines = 0; + while ((line = bufferedReader.readLine()) != null) { + if (line.trim().length() == 0) { + if (lastLineEmpty) { + continue; } - break; - } - int next = Integer.MAX_VALUE; - LinkType linkType = null; - if ((nextKsk > -1) && (nextKsk < next)) { - next = nextKsk; - linkType = LinkType.KSK; - } - if ((nextChk > -1) && (nextChk < next)) { - next = nextChk; - linkType = LinkType.CHK; - } - if ((nextSsk > -1) && (nextSsk < next)) { - next = nextSsk; - linkType = LinkType.SSK; - } - if ((nextUsk > -1) && (nextUsk < next)) { - next = nextUsk; - linkType = LinkType.USK; - } - if ((nextHttp > -1) && (nextHttp < next)) { - next = nextHttp; - linkType = LinkType.HTTP; - } - if ((nextHttps > -1) && (nextHttps < next)) { - next = nextHttps; - linkType = LinkType.HTTPS; - } - if ((nextSone > -1) && (nextSone < next)) { - next = nextSone; - linkType = LinkType.SONE; - } - if ((nextPost > -1) && (nextPost < next)) { - next = nextPost; - linkType = LinkType.POST; + parts.add(new PlainTextPart("\n")); + ++emptyLines; + lastLineEmpty = emptyLines == 2; + continue; } - if (linkType == LinkType.SONE) { - if (next > 0) { - parts.add(new PlainTextPart(line.substring(0, next))); - } - if (line.length() >= (next + 7 + 43)) { - String soneId = line.substring(next + 7, next + 50); - Sone sone = soneProvider.getSone(soneId, false); - if (sone != null) { - parts.add(new SonePart(sone)); + emptyLines = 0; + /* + * lineComplete tracks whether the block you are parsing is the + * first block of the line. this is important because sometimes + * you have to add an additional line break. + */ + boolean lineComplete = true; + while (line.length() > 0) { + int nextKsk = line.indexOf("KSK@"); + int nextChk = line.indexOf("CHK@"); + int nextSsk = line.indexOf("SSK@"); + int nextUsk = line.indexOf("USK@"); + int nextHttp = line.indexOf("http://"); + int nextHttps = line.indexOf("https://"); + int nextSone = line.indexOf("sone://"); + int nextPost = line.indexOf("post://"); + if ((nextKsk == -1) && (nextChk == -1) && (nextSsk == -1) && (nextUsk == -1) && (nextHttp == -1) && (nextHttps == -1) && (nextSone == -1) && (nextPost == -1)) { + if (lineComplete && !lastLineEmpty) { + parts.add(new PlainTextPart("\n" + line)); } else { - parts.add(new PlainTextPart(line.substring(next, next + 50))); + parts.add(new PlainTextPart(line)); } - line = line.substring(next + 50); - } else { - parts.add(new PlainTextPart(line.substring(next))); - line = ""; + break; + } + int next = Integer.MAX_VALUE; + LinkType linkType = null; + if ((nextKsk > -1) && (nextKsk < next)) { + next = nextKsk; + linkType = LinkType.KSK; + } + if ((nextChk > -1) && (nextChk < next)) { + next = nextChk; + linkType = LinkType.CHK; + } + if ((nextSsk > -1) && (nextSsk < next)) { + next = nextSsk; + linkType = LinkType.SSK; + } + if ((nextUsk > -1) && (nextUsk < next)) { + next = nextUsk; + linkType = LinkType.USK; + } + if ((nextHttp > -1) && (nextHttp < next)) { + next = nextHttp; + linkType = LinkType.HTTP; + } + if ((nextHttps > -1) && (nextHttps < next)) { + next = nextHttps; + linkType = LinkType.HTTPS; + } + if ((nextSone > -1) && (nextSone < next)) { + next = nextSone; + linkType = LinkType.SONE; + } + if ((nextPost > -1) && (nextPost < next)) { + next = nextPost; + linkType = LinkType.POST; + } + + /* cut off “freenet:” from before keys. */ + if (((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) && (next >= 8) && (line.substring(next - 8, next).equals("freenet:"))) { + next -= 8; + line = line.substring(0, next) + line.substring(next + 8); + } + + /* if there is text before the next item, write it out. */ + if (lineComplete && !lastLineEmpty) { + parts.add(new PlainTextPart("\n")); } - continue; - } - if (linkType == LinkType.POST) { if (next > 0) { parts.add(new PlainTextPart(line.substring(0, next))); + line = line.substring(next); + next = 0; + } + lineComplete = false; + + Matcher matcher = whitespacePattern.matcher(line); + int nextSpace = matcher.find(0) ? matcher.start() : line.length(); + String link = line.substring(0, nextSpace); + String name = link; + logger.log(Level.FINER, String.format("Found link: %s", link)); + logger.log(Level.FINEST, String.format("CHK: %d, SSK: %d, USK: %d", nextChk, nextSsk, nextUsk)); + + /* if there is no text after the scheme, it’s not a link! */ + if (link.equals(linkType.getScheme())) { + parts.add(new PlainTextPart(linkType.getScheme())); + line = line.substring(linkType.getScheme().length()); + continue; } - if (line.length() >= (next + 7 + 36)) { - String postId = line.substring(next + 7, next + 43); - Post post = postProvider.getPost(postId, false); - if ((post != null) && (post.getSone() != null)) { - String postText = post.getText(); - postText = postText.substring(0, Math.min(postText.length(), 20)) + "…"; - parts.add(new PostPart(post)); + + if (linkType == LinkType.SONE) { + if (line.length() >= (7 + 43)) { + String soneId = line.substring(7, 50); + Optional sone = soneProvider.getSone(soneId); + if (!sone.isPresent()) { + /* + * don’t use create=true above, we don’t want + * the empty shell. + */ + sone = Optional.of(new IdOnlySone(soneId)); + } + parts.add(new SonePart(sone.get())); + line = line.substring(50); } else { - parts.add(new PlainTextPart(line.substring(next, next + 43))); + parts.add(new PlainTextPart(line)); + line = ""; } - line = line.substring(next + 43); - } else { - parts.add(new PlainTextPart(line.substring(next))); - line = ""; + continue; } - continue; - } - if ((next >= 8) && (line.substring(next - 8, next).equals("freenet:"))) { - next -= 8; - line = line.substring(0, next) + line.substring(next + 8); - } - Matcher matcher = whitespacePattern.matcher(line); - int nextSpace = matcher.find(next) ? matcher.start() : line.length(); - if (nextSpace > (next + 4)) { - if (!lastLineEmpty && lineComplete) { - parts.add(new PlainTextPart("\n" + line.substring(0, next))); - } else { - if (next > 0) { - parts.add(new PlainTextPart(line.substring(0, next))); + if (linkType == LinkType.POST) { + if (line.length() >= (7 + 36)) { + String postId = line.substring(7, 43); + Optional post = postProvider.getPost(postId); + if (post.isPresent()) { + parts.add(new PostPart(post.get())); + } else { + parts.add(new PlainTextPart(line.substring(0, 43))); + } + line = line.substring(43); + } else { + parts.add(new PlainTextPart(line)); + line = ""; } + continue; } - String link = line.substring(next, nextSpace); - String name = link; - logger.log(Level.FINER, "Found link: %s", link); - logger.log(Level.FINEST, "Next: %d, CHK: %d, SSK: %d, USK: %d", new Object[] { next, nextChk, nextSsk, nextUsk }); if ((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) { FreenetURI uri; @@ -282,17 +325,13 @@ public class SoneTextParser implements Parser { parts.add(new LinkPart(link, name)); } line = line.substring(nextSpace); - } else { - if (!lastLineEmpty && lineComplete) { - parts.add(new PlainTextPart("\n" + line.substring(0, next + 4))); - } else { - parts.add(new PlainTextPart(line.substring(0, next + 4))); - } - line = line.substring(next + 4); } - lineComplete = false; + lastLineEmpty = false; + } + } finally { + if (bufferedReader != source) { + Closer.close(bufferedReader); } - lastLineEmpty = false; } for (int partIndex = parts.size() - 1; partIndex >= 0; --partIndex) { Part part = parts.getPart(partIndex);