2 * Sone - SoneTextParser.java - Copyright © 2010–2013 David Roden
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 package net.pterodactylus.sone.text;
20 import static com.google.common.base.Optional.absent;
21 import static com.google.common.base.Optional.of;
22 import static com.google.common.collect.FluentIterable.from;
23 import static com.google.common.collect.Lists.newArrayList;
25 import java.io.BufferedReader;
26 import java.io.IOException;
27 import java.io.Reader;
28 import java.net.MalformedURLException;
29 import java.util.Comparator;
30 import java.util.EnumMap;
31 import java.util.List;
33 import java.util.Map.Entry;
34 import java.util.logging.Logger;
35 import java.util.regex.Matcher;
36 import java.util.regex.Pattern;
38 import net.pterodactylus.sone.data.Post;
39 import net.pterodactylus.sone.data.Sone;
40 import net.pterodactylus.sone.database.Database;
41 import net.pterodactylus.util.io.Closer;
42 import net.pterodactylus.util.logging.Logging;
44 import com.google.common.base.Optional;
45 import com.google.common.collect.ImmutableMap;
46 import freenet.keys.FreenetURI;
49 * {@link Parser} implementation that can recognize Freenet URIs.
51 * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
53 public class SoneTextParser implements Parser<SoneTextParserContext> {
56 private static final Logger logger = Logging.getLogger(SoneTextParser.class);
58 /** Pattern to detect whitespace. */
59 private static final Pattern whitespacePattern = Pattern.compile("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]");
62 * Enumeration for all recognized link types.
64 * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
66 private enum LinkType {
72 public boolean isSigned() {
78 public boolean isSigned() {
87 private final String scheme;
89 private LinkType(String scheme) {
93 public String getScheme() {
97 public boolean isSigned() {
103 private final PartCreators partCreators = new PartCreators();
104 private final Database database;
107 * Creates a new freenet link parser.
111 public SoneTextParser(Database database) {
112 this.database = database;
120 public Iterable<Part> parse(SoneTextParserContext context, Reader source) throws IOException {
121 PartContainer parts = new PartContainer();
122 BufferedReader bufferedReader = (source instanceof BufferedReader) ? (BufferedReader) source : new BufferedReader(source);
125 while ((line = bufferedReader.readLine()) != null) {
126 for (String pieceOfLine : splitLine(line)) {
127 parts.add(createPart(pieceOfLine, context));
131 if (bufferedReader != source) {
132 Closer.close(bufferedReader);
135 removeTrailingWhitespaceParts(parts);
136 return optimizeParts(parts);
139 private Iterable<Part> optimizeParts(PartContainer partContainer) {
140 PartContainer parts = new PartContainer();
141 boolean firstPart = true;
142 Part lastPart = null;
144 for (Part part : partContainer) {
146 if ("\n".equals(part.getText())) {
151 if ("\n".equals(part.getText())) {
153 if (emptyLines > 2) {
159 if ((lastPart != null) && lastPart.isPlainText() && part.isPlainText()) {
160 parts.removePart(parts.size() - 1);
161 PlainTextPart combinedPart = new PlainTextPart(lastPart.getText() + part.getText());
162 parts.add(combinedPart);
163 lastPart = combinedPart;
164 } else if ((lastPart != null) && part.isFreenetLink() && lastPart.isPlainText() && lastPart.getText().endsWith("freenet:")) {
165 parts.removePart(parts.size() - 1);
166 String lastPartText = lastPart.getText();
167 lastPartText = lastPartText.substring(0, lastPartText.length() - "freenet:".length());
168 if (lastPartText.length() > 0) {
169 parts.add(new PlainTextPart(lastPartText));
181 private Part createPart(String line, SoneTextParserContext context) {
182 Optional<Part> linkPart = createLinkPart(line, context);
183 return linkPart.or(new PlainTextPart(line));
186 private Optional<Part> createLinkPart(String line, SoneTextParserContext context) {
187 Optional<NextLink> nextLink = findNextLink(line);
188 if (!nextLink.isPresent()) {
191 return partCreators.createPart(nextLink.get().getLinkType(), line, context);
194 private List<String> splitLine(String line) {
195 List<String> linePieces = newArrayList();
196 int currentIndex = 0;
197 while (currentIndex < line.length()) {
198 Optional<NextLink> nextLink = findNextLink(line.substring(currentIndex));
199 if (!nextLink.isPresent()) {
200 linePieces.add(line.substring(currentIndex));
203 int nextIndex = currentIndex + nextLink.get().getNextIndex();
204 if (nextIndex > currentIndex) {
205 linePieces.add(line.substring(currentIndex, nextIndex));
207 int nextWhitespace = nextIndex + findNextWhitespaceOrEndOfLine(line.substring(nextIndex));
208 linePieces.add(line.substring(nextIndex, nextWhitespace));
209 currentIndex = nextWhitespace;
211 linePieces.add("\n");
215 private void removeTrailingWhitespaceParts(PartContainer parts) {
216 for (int partIndex = parts.size() - 1; partIndex >= 0; --partIndex) {
217 Part part = parts.getPart(partIndex);
218 if (!(part instanceof PlainTextPart) || !"\n".equals(part.getText())) {
221 parts.removePart(partIndex);
225 private boolean linkMatchesPostingSone(SoneTextParserContext context, String link) {
226 return (context != null) && (context.getPostingSone() != null) && link.substring(4, Math.min(link.length(), 47)).equals(context.getPostingSone().getId());
229 private boolean lineIsLongEnoughToContainAPostLink(String line) {
230 return line.length() >= (7 + 36);
233 private static boolean lineIsLongEnoughToContainASoneLink(String line) {
234 return line.length() >= (7 + 43);
237 private int findNextWhitespaceOrEndOfLine(String line) {
238 Matcher matcher = whitespacePattern.matcher(line);
239 return matcher.find(0) ? matcher.start() : line.length();
242 private Optional<NextLink> findNextLink(String line) {
243 EnumMap<LinkType, Integer> linkTypeIndexes = new EnumMap<LinkType, Integer>(LinkType.class);
244 for (LinkType linkType : LinkType.values()) {
245 int index = line.indexOf(linkType.getScheme());
247 linkTypeIndexes.put(linkType, index);
250 if (linkTypeIndexes.isEmpty()) {
253 Entry<LinkType, Integer> smallestEntry = from(linkTypeIndexes.entrySet()).toSortedList(locateSmallestIndex()).get(0);
254 return of(new NextLink(smallestEntry.getValue(), smallestEntry.getKey()));
257 private Comparator<Entry<LinkType, Integer>> locateSmallestIndex() {
258 return new Comparator<Entry<LinkType, Integer>>() {
260 public int compare(Entry<LinkType, Integer> leftEntry, Entry<LinkType, Integer> rightEntry) {
261 return leftEntry.getValue() - rightEntry.getValue();
266 private class PartCreators {
268 private final Map<LinkType, PartCreator> partCreators = ImmutableMap.<LinkType, PartCreator>builder()
269 .put(LinkType.SONE, new SonePartCreator())
270 .put(LinkType.POST, new PostPartCreator())
271 .put(LinkType.KSK, new FreenetLinkPartCreator(LinkType.KSK))
272 .put(LinkType.CHK, new FreenetLinkPartCreator(LinkType.CHK))
273 .put(LinkType.SSK, new FreenetLinkPartCreator(LinkType.SSK))
274 .put(LinkType.USK, new FreenetLinkPartCreator(LinkType.USK))
275 .put(LinkType.HTTP, new InternetLinkPartCreator(LinkType.HTTP))
276 .put(LinkType.HTTPS, new InternetLinkPartCreator(LinkType.HTTPS))
279 public Optional<Part> createPart(LinkType linkType, String line, SoneTextParserContext context) {
280 if (line.equals(linkType.getScheme())) {
281 return of((Part) new PlainTextPart(line));
283 return partCreators.get(linkType).createPart(line, context);
288 private class SonePartCreator implements PartCreator {
291 public Optional<Part> createPart(String line, SoneTextParserContext context) {
292 if (!lineIsLongEnoughToContainASoneLink(line)) {
295 String soneId = line.substring(7, 50);
296 Optional<Sone> sone = database.getSone(soneId);
297 if (!sone.isPresent()) {
300 return Optional.<Part>of(new SonePart(sone.get()));
305 private class PostPartCreator implements PartCreator {
308 public Optional<Part> createPart(String line, SoneTextParserContext context) {
309 if (!lineIsLongEnoughToContainAPostLink(line)) {
312 String postId = line.substring(7, 43);
313 Optional<Post> post = database.getPost(postId);
314 if (!post.isPresent()) {
317 return Optional.<Part>of(new PostPart(post.get()));
322 private class FreenetLinkPartCreator implements PartCreator {
324 private final LinkType linkType;
326 protected FreenetLinkPartCreator(LinkType linkType) {
327 this.linkType = linkType;
331 public Optional<Part> createPart(String link, SoneTextParserContext context) {
333 if (name.indexOf('?') > -1) {
334 name = name.substring(0, name.indexOf('?'));
336 if (name.endsWith("/")) {
337 name = name.substring(0, name.length() - 1);
340 FreenetURI uri = new FreenetURI(name);
341 name = uri.lastMetaString();
343 name = uri.getDocName();
346 name = link.substring(0, Math.min(9, link.length()));
348 boolean fromPostingSone = linkType.isSigned() && linkMatchesPostingSone(context, link);
349 return Optional.<Part>of(new FreenetLinkPart(link, name, fromPostingSone));
350 } catch (MalformedURLException mue1) {
352 } catch (NullPointerException npe1) {
354 } catch (ArrayIndexOutOfBoundsException aioobe1) {
362 private class InternetLinkPartCreator implements PartCreator {
364 private final LinkType linkType;
366 private InternetLinkPartCreator(LinkType linkType) {
367 this.linkType = linkType;
371 public Optional<Part> createPart(String link, SoneTextParserContext context) {
373 name = link.substring(linkType.getScheme().length());
374 int firstSlash = name.indexOf('/');
375 int lastSlash = name.lastIndexOf('/');
376 if ((lastSlash - firstSlash) > 3) {
377 name = name.substring(0, firstSlash + 1) + "…" + name.substring(lastSlash);
379 if (name.endsWith("/")) {
380 name = name.substring(0, name.length() - 1);
382 if (((name.indexOf('/') > -1) && (name.indexOf('.') < name.lastIndexOf('.', name.indexOf('/'))) || ((name.indexOf('/') == -1) && (name.indexOf('.') < name.lastIndexOf('.')))) && name.startsWith("www.")) {
383 name = name.substring(4);
385 if (name.indexOf('?') > -1) {
386 name = name.substring(0, name.indexOf('?'));
388 return Optional.<Part>of(new LinkPart(link, name));
393 private interface PartCreator {
395 Optional<Part> createPart(String line, SoneTextParserContext context);
400 * Container for position and type of the next link in a line.
402 * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
404 private static class NextLink {
406 private final int nextIndex;
407 private final LinkType linkType;
409 private NextLink(int nextIndex, LinkType linkType) {
410 this.nextIndex = nextIndex;
411 this.linkType = linkType;
414 private int getNextIndex() {
418 private LinkType getLinkType() {