2 * rhynodge - XkcdFilter.java - Copyright © 2013 David Roden
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 package net.pterodactylus.rhynodge.filters.comics;
20 import java.util.List;
22 import net.pterodactylus.rhynodge.filters.ComicSiteFilter;
24 import com.google.common.base.Function;
25 import com.google.common.base.Optional;
26 import com.google.common.collect.FluentIterable;
27 import org.jsoup.nodes.Document;
28 import org.jsoup.nodes.Element;
29 import org.jsoup.select.Elements;
32 * {@link ComicSiteFilter} implementation that can parse XKCD comics.
34 * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
36 public class XkcdComicFilter extends ComicSiteFilter {
39 protected Optional<String> extractTitle(Document document) {
40 Elements titleElement = document.select("div#ctitle");
41 return titleElement.hasText() ? Optional.of(titleElement.text()) : Optional.<String>absent();
45 protected List<String> extractImageUrls(Document document) {
46 return extractImages(document).transform(new Function<String[], String>() {
49 public String apply(String[] input) {
56 protected List<String> extractImageComments(Document document) {
57 return extractImages(document).transform(new Function<String[], String>() {
60 public String apply(String[] input) {
71 * Extracts pairs of image URLs and image comments from the given document.
74 * The document to extract the images from
75 * @return An iterable containing all image URL and comment pairs
77 private FluentIterable<String[]> extractImages(Document document) {
78 return FluentIterable.from(document.select("div#comic img")).transform(new Function<Element, String[]>() {
81 public String[] apply(Element image) {
82 return new String[] { image.attr("src"), image.attr("title") };