X-Git-Url: https://git.pterodactylus.net/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Fnet%2Fpterodactylus%2Fsone%2Ftext%2FSoneTextParser.java;h=efb36c4fef85cbc11ad1114472d1701242d92d46;hb=541f49177de5f63a97d94fe74ffb60badbf5d4c9;hp=b3593ec27254beedcb652720707d471006443389;hpb=82ac8f51a0ecdb93da35ebab63b25f161fceee21;p=Sone.git diff --git a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java index b3593ec..efb36c4 100644 --- a/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java +++ b/src/main/java/net/pterodactylus/sone/text/SoneTextParser.java @@ -1,5 +1,5 @@ /* - * Sone - FreenetLinkParser.java - Copyright © 2010–2012 David Roden + * Sone - SoneTextParser.java - Copyright © 2010–2013 David Roden * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,21 +17,30 @@ package net.pterodactylus.sone.text; +import static com.google.common.base.Optional.absent; +import static com.google.common.base.Optional.of; +import static com.google.common.collect.FluentIterable.from; + import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.net.MalformedURLException; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import net.pterodactylus.sone.core.PostProvider; -import net.pterodactylus.sone.core.SoneProvider; import net.pterodactylus.sone.data.Post; import net.pterodactylus.sone.data.Sone; +import net.pterodactylus.sone.data.impl.DefaultSone; +import net.pterodactylus.sone.database.Database; import net.pterodactylus.util.io.Closer; import net.pterodactylus.util.logging.Logging; + +import com.google.common.base.Optional; import freenet.keys.FreenetURI; /** @@ -55,40 +64,45 @@ public class SoneTextParser implements Parser { private enum LinkType { /** Link is a KSK. */ - KSK("KSK@"), + KSK("KSK@", true, false), /** Link is a CHK. */ - CHK("CHK@"), + CHK("CHK@", true, false), /** Link is an SSK. */ - SSK("SSK@"), + SSK("SSK@", true, false), /** Link is a USK. */ - USK("USK@"), + USK("USK@", true, false), /** Link is HTTP. */ - HTTP("http://"), + HTTP("http://", false, true), /** Link is HTTPS. */ - HTTPS("https://"), + HTTPS("https://", false, true), /** Link is a Sone. */ - SONE("sone://"), + SONE("sone://", false, false), /** Link is a post. */ - POST("post://"); + POST("post://", false, false); /** The scheme identifying this link type. */ private final String scheme; + private final boolean freenetLink; + private final boolean internetLink; /** * Creates a new link type identified by the given scheme. * * @param scheme * The scheme of the link type + * @param freenetLink */ - private LinkType(String scheme) { + private LinkType(String scheme, boolean freenetLink, boolean internetLink) { this.scheme = scheme; + this.freenetLink = freenetLink; + this.internetLink = internetLink; } /** @@ -100,34 +114,31 @@ public class SoneTextParser implements Parser { return scheme; } - } + public boolean isFreenetLink() { + return freenetLink; + } - /** The Sone provider. */ - private final SoneProvider soneProvider; + public boolean isInternetLink() { + return internetLink; + } + + } - /** The post provider. */ - private final PostProvider postProvider; + private final Database database; /** * Creates a new freenet link parser. * - * @param soneProvider - * The Sone provider - * @param postProvider - * The post provider + * @param database */ - public SoneTextParser(SoneProvider soneProvider, PostProvider postProvider) { - this.soneProvider = soneProvider; - this.postProvider = postProvider; + public SoneTextParser(Database database) { + this.database = database; } // // PART METHODS // - /** - * {@inheritDoc} - */ @Override public Iterable parse(SoneTextParserContext context, Reader source) throws IOException { PartContainer parts = new PartContainer(); @@ -154,15 +165,8 @@ public class SoneTextParser implements Parser { */ boolean lineComplete = true; while (line.length() > 0) { - int nextKsk = line.indexOf("KSK@"); - int nextChk = line.indexOf("CHK@"); - int nextSsk = line.indexOf("SSK@"); - int nextUsk = line.indexOf("USK@"); - int nextHttp = line.indexOf("http://"); - int nextHttps = line.indexOf("https://"); - int nextSone = line.indexOf("sone://"); - int nextPost = line.indexOf("post://"); - if ((nextKsk == -1) && (nextChk == -1) && (nextSsk == -1) && (nextUsk == -1) && (nextHttp == -1) && (nextHttps == -1) && (nextSone == -1) && (nextPost == -1)) { + Optional nextLink = findNextLink(line); + if (!nextLink.isPresent()) { if (lineComplete && !lastLineEmpty) { parts.add(new PlainTextPart("\n" + line)); } else { @@ -170,43 +174,12 @@ public class SoneTextParser implements Parser { } break; } - int next = Integer.MAX_VALUE; - LinkType linkType = null; - if ((nextKsk > -1) && (nextKsk < next)) { - next = nextKsk; - linkType = LinkType.KSK; - } - if ((nextChk > -1) && (nextChk < next)) { - next = nextChk; - linkType = LinkType.CHK; - } - if ((nextSsk > -1) && (nextSsk < next)) { - next = nextSsk; - linkType = LinkType.SSK; - } - if ((nextUsk > -1) && (nextUsk < next)) { - next = nextUsk; - linkType = LinkType.USK; - } - if ((nextHttp > -1) && (nextHttp < next)) { - next = nextHttp; - linkType = LinkType.HTTP; - } - if ((nextHttps > -1) && (nextHttps < next)) { - next = nextHttps; - linkType = LinkType.HTTPS; - } - if ((nextSone > -1) && (nextSone < next)) { - next = nextSone; - linkType = LinkType.SONE; - } - if ((nextPost > -1) && (nextPost < next)) { - next = nextPost; - linkType = LinkType.POST; - } + + int next = nextLink.get().getNextIndex(); + LinkType linkType = nextLink.get().getLinkType(); /* cut off “freenet:” from before keys. */ - if (((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) && (next >= 8) && (line.substring(next - 8, next).equals("freenet:"))) { + if (linkType.isFreenetLink() && (next >= 8) && (line.substring(next - 8, next).equals("freenet:"))) { next -= 8; line = line.substring(0, next) + line.substring(next + 8); } @@ -218,16 +191,13 @@ public class SoneTextParser implements Parser { if (next > 0) { parts.add(new PlainTextPart(line.substring(0, next))); line = line.substring(next); - next = 0; } lineComplete = false; - Matcher matcher = whitespacePattern.matcher(line); - int nextSpace = matcher.find(0) ? matcher.start() : line.length(); + int nextSpace = findNextWhitespace(line); String link = line.substring(0, nextSpace); String name = link; logger.log(Level.FINER, String.format("Found link: %s", link)); - logger.log(Level.FINEST, String.format("CHK: %d, SSK: %d, USK: %d", nextChk, nextSsk, nextUsk)); /* if there is no text after the scheme, it’s not a link! */ if (link.equals(linkType.getScheme())) { @@ -237,17 +207,17 @@ public class SoneTextParser implements Parser { } if (linkType == LinkType.SONE) { - if (line.length() >= (7 + 43)) { + if (lineIsLongEnoughToContainASoneLink(line)) { String soneId = line.substring(7, 50); - Sone sone = soneProvider.getSone(soneId, false); - if (sone == null) { + Optional sone = database.getSone(soneId); + if (!sone.isPresent()) { /* * don’t use create=true above, we don’t want * the empty shell. */ - sone = new Sone(soneId); + sone = Optional.of(new DefaultSone(database, soneId, false, null)); } - parts.add(new SonePart(sone)); + parts.add(new SonePart(sone.get())); line = line.substring(50); } else { parts.add(new PlainTextPart(line)); @@ -256,11 +226,11 @@ public class SoneTextParser implements Parser { continue; } if (linkType == LinkType.POST) { - if (line.length() >= (7 + 36)) { + if (lineIsLongEnoughToContainAPostLink(line)) { String postId = line.substring(7, 43); - Post post = postProvider.getPost(postId, false); - if ((post != null) && (post.getSone() != null)) { - parts.add(new PostPart(post)); + Optional post = database.getPost(postId); + if (post.isPresent()) { + parts.add(new PostPart(post.get())); } else { parts.add(new PlainTextPart(line.substring(0, 43))); } @@ -272,7 +242,7 @@ public class SoneTextParser implements Parser { continue; } - if ((linkType == LinkType.KSK) || (linkType == LinkType.CHK) || (linkType == LinkType.SSK) || (linkType == LinkType.USK)) { + if (linkType.isFreenetLink()) { FreenetURI uri; if (name.indexOf('?') > -1) { name = name.substring(0, name.indexOf('?')); @@ -289,7 +259,7 @@ public class SoneTextParser implements Parser { if (name == null) { name = link.substring(0, Math.min(9, link.length())); } - boolean fromPostingSone = ((linkType == LinkType.SSK) || (linkType == LinkType.USK)) && (context != null) && (context.getPostingSone() != null) && link.substring(4, Math.min(link.length(), 47)).equals(context.getPostingSone().getId()); + boolean fromPostingSone = ((linkType == LinkType.SSK) || (linkType == LinkType.USK)) && linkMatchesPostingSone(context, link); parts.add(new FreenetLinkPart(link, name, fromPostingSone)); } catch (MalformedURLException mue1) { /* not a valid link, insert as plain text. */ @@ -301,8 +271,8 @@ public class SoneTextParser implements Parser { /* oh, and these, too. */ parts.add(new PlainTextPart(link)); } - } else if ((linkType == LinkType.HTTP) || (linkType == LinkType.HTTPS)) { - name = link.substring(linkType == LinkType.HTTP ? 7 : 8); + } else if (linkType.isInternetLink()) { + name = link.substring(linkType.getScheme().length()); int firstSlash = name.indexOf('/'); int lastSlash = name.lastIndexOf('/'); if ((lastSlash - firstSlash) > 3) { @@ -328,14 +298,84 @@ public class SoneTextParser implements Parser { Closer.close(bufferedReader); } } + removeTrailingWhitespaceParts(parts); + return parts; + } + + private void removeTrailingWhitespaceParts(PartContainer parts) { for (int partIndex = parts.size() - 1; partIndex >= 0; --partIndex) { Part part = parts.getPart(partIndex); - if (!(part instanceof PlainTextPart) || !"\n".equals(((PlainTextPart) part).getText())) { + if (!(part instanceof PlainTextPart) || !"\n".equals(part.getText())) { break; } parts.removePart(partIndex); } - return parts; + } + + private boolean linkMatchesPostingSone(SoneTextParserContext context, String link) { + return (context != null) && (context.getPostingSone() != null) && link.substring(4, Math.min(link.length(), 47)).equals(context.getPostingSone().getId()); + } + + private boolean lineIsLongEnoughToContainAPostLink(String line) { + return line.length() >= (7 + 36); + } + + private boolean lineIsLongEnoughToContainASoneLink(String line) { + return line.length() >= (7 + 43); + } + + private int findNextWhitespace(String line) { + Matcher matcher = whitespacePattern.matcher(line); + return matcher.find(0) ? matcher.start() : line.length(); + } + + private Optional findNextLink(String line) { + EnumMap linkTypeIndexes = new EnumMap(LinkType.class); + for (LinkType linkType : LinkType.values()) { + int index = line.indexOf(linkType.getScheme()); + if (index != -1) { + linkTypeIndexes.put(linkType, index); + } + } + if (linkTypeIndexes.isEmpty()) { + return absent(); + } + Entry smallestEntry = from(linkTypeIndexes.entrySet()).toSortedList(locateSmallestIndex()).get(0); + return of(new NextLink(smallestEntry.getValue(), smallestEntry.getKey())); + } + + private Comparator> locateSmallestIndex() { + return new Comparator>() { + @Override + public int compare(Entry leftEntry, Entry rightEntry) { + return leftEntry.getValue() - rightEntry.getValue(); + } + }; + } + + /** + * Container for position and type of the next link in a line. + * + * @author David ‘Bombe’ Roden + */ + private static class NextLink { + + private final int nextIndex; + private final LinkType linkType; + + private NextLink(int nextIndex, LinkType linkType) { + this.nextIndex = nextIndex; + this.linkType = linkType; + } + + private int getNextIndex() { + return nextIndex; + } + + private LinkType getLinkType() { + return linkType; + } + } }