Ignore commas at the end of links, too
[Sone.git] / src / main / java / net / pterodactylus / sone / text / SoneTextParser.java
index f628279..dbfa0f3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Sone - SoneTextParser.java - Copyright © 2010–2013 David Roden
+ * Sone - SoneTextParser.java - Copyright © 2010–2016 David Roden
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -21,13 +21,16 @@ import static java.util.logging.Logger.getLogger;
 
 import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.Reader;
+import java.io.StringReader;
 import java.net.MalformedURLException;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
 import net.pterodactylus.sone.data.Post;
 import net.pterodactylus.sone.data.Sone;
 import net.pterodactylus.sone.data.impl.IdOnlySone;
@@ -117,10 +120,11 @@ public class SoneTextParser implements Parser<SoneTextParserContext> {
        /**
         * {@inheritDoc}
         */
+       @Nonnull
        @Override
-       public Iterable<Part> parse(SoneTextParserContext context, Reader source) throws IOException {
+       public Iterable<Part> parse(@Nonnull String source, @Nullable SoneTextParserContext context) {
                PartContainer parts = new PartContainer();
-               BufferedReader bufferedReader = (source instanceof BufferedReader) ? (BufferedReader) source : new BufferedReader(source);
+               BufferedReader bufferedReader = new BufferedReader(new StringReader(source));
                try {
                        String line;
                        boolean lastLineEmpty = true;
@@ -168,13 +172,11 @@ public class SoneTextParser implements Parser<SoneTextParserContext> {
                                        if (next > 0) {
                                                parts.add(new PlainTextPart(line.substring(0, next)));
                                                line = line.substring(next);
-                                               next = 0;
                                        }
                                        lineComplete = false;
 
-                                       Matcher matcher = whitespacePattern.matcher(line);
-                                       int nextSpace = matcher.find(0) ? matcher.start() : line.length();
-                                       String link = line.substring(0, nextSpace);
+                                       int endOfLink = findEndOfLink(line);
+                                       String link = line.substring(0, endOfLink);
                                        String name = link;
                                        logger.log(Level.FINER, String.format("Found link: %s", link));
 
@@ -221,7 +223,7 @@ public class SoneTextParser implements Parser<SoneTextParserContext> {
                                                continue;
                                        }
 
-                                       if ((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) {
+                                       if (linkType.isFreenetLink()) {
                                                FreenetURI uri;
                                                if (name.indexOf('?') > -1) {
                                                        name = name.substring(0, name.indexOf('?'));
@@ -268,18 +270,19 @@ public class SoneTextParser implements Parser<SoneTextParserContext> {
                                                }
                                                parts.add(new LinkPart(link, name));
                                        }
-                                       line = line.substring(nextSpace);
+                                       line = line.substring(endOfLink);
                                }
                                lastLineEmpty = false;
                        }
+               } catch (IOException ioe1) {
+                       // a buffered reader around a string reader should never throw.
+                       throw new RuntimeException(ioe1);
                } finally {
-                       if (bufferedReader != source) {
-                               Closer.close(bufferedReader);
-                       }
+                       Closer.close(bufferedReader);
                }
                for (int partIndex = parts.size() - 1; partIndex >= 0; --partIndex) {
                        Part part = parts.getPart(partIndex);
-                       if (!(part instanceof PlainTextPart) || !"\n".equals(((PlainTextPart) part).getText())) {
+                       if (!(part instanceof PlainTextPart) || !"\n".equals(part.getText())) {
                                break;
                        }
                        parts.removePart(partIndex);
@@ -287,6 +290,40 @@ public class SoneTextParser implements Parser<SoneTextParserContext> {
                return parts;
        }
 
+       private int findEndOfLink(String line) {
+               Matcher matcher = whitespacePattern.matcher(line);
+               if (!matcher.find(0)) {
+                       return line.length();
+               }
+               int nextWhitespace = matcher.start();
+               int lastPunctuation = nextWhitespace;
+               while (isPunctuation(line.charAt(lastPunctuation - 1))) {
+                       lastPunctuation -= 1;
+               }
+               if (lastPunctuation < nextWhitespace) {
+                       return lastPunctuation;
+               }
+               int openParens = 0;
+               for (int i = 0; i < nextWhitespace; i++) {
+                       switch (line.charAt(i)) {
+                               case '(':
+                                       openParens++;
+                                       break;
+                               case ')':
+                                       openParens--;
+                                       if (openParens < 0) {
+                                               return i;
+                                       }
+                               default:
+                       }
+               }
+               return nextWhitespace;
+       }
+
+       private boolean isPunctuation(char character) {
+               return (character == '.') || (character == ',');
+       }
+
        private static class NextLink {
 
                private final int position;