X-Git-Url: https://git.pterodactylus.net/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Fnet%2Fpterodactylus%2Fsone%2Ftext%2FSoneTextParser.java;h=6752c12b45c7a9209386accf9b89a60756774713;hb=2241b13275a0dec86461aba67db92825424b9f1b;hp=7319b2b71baef195b5a1caa346dbc6f21659c5ec;hpb=2c66e80b5bc96df18f02757201edd83fd8af6d94;p=Sone.git diff --git a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java index 7319b2b..6752c12 100644 --- a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java +++ b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java @@ -1,5 +1,5 @@ /* - * Sone - SoneTextParser.java - Copyright © 2010–2013 David Roden + * Sone - SoneTextParser.java - Copyright © 2010–2016 David Roden * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,13 +21,16 @@ import static java.util.logging.Logger.getLogger; import java.io.BufferedReader; import java.io.IOException; -import java.io.Reader; +import java.io.StringReader; import java.net.MalformedURLException; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + import net.pterodactylus.sone.data.Post; import net.pterodactylus.sone.data.Sone; import net.pterodactylus.sone.data.impl.IdOnlySone; @@ -117,10 +120,11 @@ public class SoneTextParser implements Parser { /** * {@inheritDoc} */ + @Nonnull @Override - public Iterable parse(SoneTextParserContext context, Reader source) throws IOException { + public Iterable parse(@Nonnull String source, @Nullable SoneTextParserContext context) { PartContainer parts = new PartContainer(); - BufferedReader bufferedReader = (source instanceof BufferedReader) ? (BufferedReader) source : new BufferedReader(source); + BufferedReader bufferedReader = new BufferedReader(new StringReader(source)); try { String line; boolean lastLineEmpty = true; @@ -143,15 +147,8 @@ public class SoneTextParser implements Parser { */ boolean lineComplete = true; while (line.length() > 0) { - int nextKsk = line.indexOf("KSK@"); - int nextChk = line.indexOf("CHK@"); - int nextSsk = line.indexOf("SSK@"); - int nextUsk = line.indexOf("USK@"); - int nextHttp = line.indexOf("http://"); - int nextHttps = line.indexOf("https://"); - int nextSone = line.indexOf("sone://"); - int nextPost = line.indexOf("post://"); - if ((nextKsk == -1) && (nextChk == -1) && (nextSsk == -1) && (nextUsk == -1) && (nextHttp == -1) && (nextHttps == -1) && (nextSone == -1) && (nextPost == -1)) { + Optional nextLink = NextLink.findNextLink(line); + if (!nextLink.isPresent()) { if (lineComplete && !lastLineEmpty) { parts.add(new PlainTextPart("\n" + line)); } else { @@ -159,40 +156,8 @@ public class SoneTextParser implements Parser { } break; } - int next = Integer.MAX_VALUE; - LinkType linkType = null; - if ((nextKsk > -1) && (nextKsk < next)) { - next = nextKsk; - linkType = LinkType.KSK; - } - if ((nextChk > -1) && (nextChk < next)) { - next = nextChk; - linkType = LinkType.CHK; - } - if ((nextSsk > -1) && (nextSsk < next)) { - next = nextSsk; - linkType = LinkType.SSK; - } - if ((nextUsk > -1) && (nextUsk < next)) { - next = nextUsk; - linkType = LinkType.USK; - } - if ((nextHttp > -1) && (nextHttp < next)) { - next = nextHttp; - linkType = LinkType.HTTP; - } - if ((nextHttps > -1) && (nextHttps < next)) { - next = nextHttps; - linkType = LinkType.HTTPS; - } - if ((nextSone > -1) && (nextSone < next)) { - next = nextSone; - linkType = LinkType.SONE; - } - if ((nextPost > -1) && (nextPost < next)) { - next = nextPost; - linkType = LinkType.POST; - } + LinkType linkType = nextLink.get().getLinkType(); + int next = nextLink.get().getPosition(); /* cut off “freenet:” from before keys. */ if (linkType.isFreenetLink() && (next >= 8) && (line.substring(next - 8, next).equals("freenet:"))) { @@ -207,16 +172,13 @@ public class SoneTextParser implements Parser { if (next > 0) { parts.add(new PlainTextPart(line.substring(0, next))); line = line.substring(next); - next = 0; } lineComplete = false; - Matcher matcher = whitespacePattern.matcher(line); - int nextSpace = matcher.find(0) ? matcher.start() : line.length(); - String link = line.substring(0, nextSpace); + int endOfLink = findEndOfLink(line); + String link = line.substring(0, endOfLink); String name = link; logger.log(Level.FINER, String.format("Found link: %s", link)); - logger.log(Level.FINEST, String.format("CHK: %d, SSK: %d, USK: %d", nextChk, nextSsk, nextUsk)); /* if there is no text after the scheme, it’s not a link! */ if (link.equals(linkType.getScheme())) { @@ -261,7 +223,7 @@ public class SoneTextParser implements Parser { continue; } - if ((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) { + if (linkType.isFreenetLink()) { FreenetURI uri; if (name.indexOf('?') > -1) { name = name.substring(0, name.indexOf('?')); @@ -308,18 +270,19 @@ public class SoneTextParser implements Parser { } parts.add(new LinkPart(link, name)); } - line = line.substring(nextSpace); + line = line.substring(endOfLink); } lastLineEmpty = false; } + } catch (IOException ioe1) { + // a buffered reader around a string reader should never throw. + throw new RuntimeException(ioe1); } finally { - if (bufferedReader != source) { - Closer.close(bufferedReader); - } + Closer.close(bufferedReader); } for (int partIndex = parts.size() - 1; partIndex >= 0; --partIndex) { Part part = parts.getPart(partIndex); - if (!(part instanceof PlainTextPart) || !"\n".equals(((PlainTextPart) part).getText())) { + if (!(part instanceof PlainTextPart) || !"\n".equals(part.getText())) { break; } parts.removePart(partIndex); @@ -327,4 +290,74 @@ public class SoneTextParser implements Parser { return parts; } + private int findEndOfLink(String line) { + Matcher matcher = whitespacePattern.matcher(line); + if (!matcher.find(0)) { + return line.length(); + } + int nextWhitespace = matcher.start(); + int lastPunctuation = nextWhitespace; + while (isPunctuation(line.charAt(lastPunctuation - 1))) { + lastPunctuation -= 1; + } + if (lastPunctuation < nextWhitespace) { + return lastPunctuation; + } + int openParens = 0; + for (int i = 0; i < nextWhitespace; i++) { + switch (line.charAt(i)) { + case '(': + openParens++; + break; + case ')': + openParens--; + if (openParens < 0) { + return i; + } + default: + } + } + return nextWhitespace; + } + + private boolean isPunctuation(char character) { + return character == '.'; + } + + private static class NextLink { + + private final int position; + private final LinkType linkType; + + private NextLink(int position, LinkType linkType) { + this.position = position; + this.linkType = linkType; + } + + public int getPosition() { + return position; + } + + public LinkType getLinkType() { + return linkType; + } + + public static Optional findNextLink(String line) { + int earliestLinkPosition = Integer.MAX_VALUE; + LinkType linkType = null; + for (LinkType possibleLinkType : LinkType.values()) { + int nextLinkPosition = line.indexOf(possibleLinkType.getScheme()); + if (nextLinkPosition > -1) { + if (nextLinkPosition < earliestLinkPosition) { + earliestLinkPosition = nextLinkPosition; + linkType = possibleLinkType; + } + } + } + return earliestLinkPosition < Integer.MAX_VALUE ? + Optional.of(new NextLink(earliestLinkPosition, linkType)) : Optional.absent(); + } + + } + }