2 * rhynodge - ComicFilter.java - Copyright © 2013 David Roden
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 package net.pterodactylus.rhynodge.filters;
20 import static com.google.common.base.Preconditions.checkArgument;
23 import java.net.URISyntaxException;
24 import java.util.List;
26 import net.pterodactylus.rhynodge.Filter;
27 import net.pterodactylus.rhynodge.State;
28 import net.pterodactylus.rhynodge.states.ComicState;
29 import net.pterodactylus.rhynodge.states.ComicState.Comic;
30 import net.pterodactylus.rhynodge.states.ComicState.Strip;
31 import net.pterodactylus.rhynodge.states.FailedState;
32 import net.pterodactylus.rhynodge.states.HtmlState;
34 import com.google.common.base.Optional;
35 import org.jetbrains.annotations.NotNull;
36 import org.jsoup.nodes.Document;
39 * {@link Filter} implementation that can extract {@link ComicState}s from
42 * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
44 public abstract class ComicSiteFilter implements Filter {
48 public State filter(@NotNull State state) {
49 checkArgument(state instanceof HtmlState, "state must be an HTML state");
51 /* initialize states: */
52 HtmlState htmlState = (HtmlState) state;
55 Optional<String> title = extractTitle(htmlState.document());
56 List<String> imageUrls = extractImageUrls(htmlState.document());
57 List<String> imageComments = extractImageComments(htmlState.document());
59 /* store comic, if found, into state. */
60 if (imageUrls.isEmpty()) {
61 return new FailedState();
64 ComicState comicState = new ComicState();
65 Comic comic = new Comic(title.or(""));
67 for (String imageUrl : imageUrls) {
68 String imageComment = (imageCounter < imageComments.size()) ? imageComments.get(imageCounter) : "";
70 URI stripUri = new URI(htmlState.uri()).resolve(imageUrl.replaceAll(" ", "%20"));
71 Strip strip = new Strip(stripUri.toString(), imageComment);
74 } catch (URISyntaxException use1) {
75 throw new IllegalStateException(String.format("Could not resolve image URL “%s” against base URL “%s”.", imageUrl, htmlState.uri()), use1);
78 comicState.add(comic);
88 * Extracts the title of the comic from the given document.
91 * The document to extract the title from
92 * @return The extracted title, or {@link Optional#absent()}} if no title could
95 protected abstract Optional<String> extractTitle(Document document);
98 * Extracts the image URLs from the given document.
101 * The document to extract the image URLs from
102 * @return The extracted image URLs, or an empty list if no URLs could be
105 protected abstract List<String> extractImageUrls(Document document);
108 * Extracts the image comments from the given document. The elements of this
109 * last and of the list returned by {@link #extractImageUrls(org.jsoup.nodes.Document)}
110 * are paired up and added as {@link Strip}s. If the list returned by this
111 * method has less elements, an empty string is used for the remaining images.
114 * The document to extract the image comments from
115 * @return The extracted image comments
117 protected abstract List<String> extractImageComments(Document document);