From: David ‘Bombe’ Roden Date: Sun, 24 Feb 2013 14:52:15 +0000 (+0100) Subject: Add filter for comic sites. X-Git-Tag: v2~324 X-Git-Url: https://git.pterodactylus.net/?a=commitdiff_plain;h=0279bd0b97ae8353d4cfd114214cafd7e6f8f3ae;p=rhynodge.git Add filter for comic sites. --- diff --git a/src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java b/src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java new file mode 100644 index 0000000..5f6a11e --- /dev/null +++ b/src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java @@ -0,0 +1,88 @@ +/* + * rhynodge - ComicFilter.java - Copyright © 2013 David Roden + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.pterodactylus.rhynodge.filters; + +import static com.google.common.base.Preconditions.*; + +import java.util.List; + +import net.pterodactylus.rhynodge.Filter; +import net.pterodactylus.rhynodge.State; +import net.pterodactylus.rhynodge.states.ComicState; +import net.pterodactylus.rhynodge.states.ComicState.Comic; +import net.pterodactylus.rhynodge.states.HtmlState; + +import com.google.common.base.Optional; +import org.jsoup.nodes.Document; + +/** + * {@link Filter} implementation that can extract {@link ComicState}s from + * {@link HtmlState}s. + * + * @author David ‘Bombe’ Roden + */ +public abstract class ComicSiteFilter implements Filter { + + @Override + public State filter(State state) { + checkArgument(state instanceof HtmlState, "state must be an HTML state"); + + /* initialize states: */ + HtmlState htmlState = (HtmlState) state; + ComicState comicState = new ComicState(); + + /* extract comics. */ + Optional title = extractTitle(htmlState.document()); + List imageUrls = extractImageUrls(htmlState.document()); + + /* store comic, if found, into state. */ + if (title.isPresent() && !imageUrls.isEmpty()) { + Comic comic = new Comic(title.get()); + for (String imageUrl : imageUrls) { + comic.addImageUrl(imageUrl); + } + } + + return comicState; + } + + // + // PROTECTED METHODS + // + + /** + * Extracts the title of the comic from the given document. + * + * @param document + * The document to extract the title from + * @return The extracted title, or {@link Optional#absent()}} if no title could + * be found + */ + protected abstract Optional extractTitle(Document document); + + /** + * Extracts the image URLs from the given document. + * + * @param document + * The document to extract the image URLs from + * @return The extracted image URLs, or an empty list if no URLs could be + * found + */ + protected abstract List extractImageUrls(Document document); + +}