From 344dffe063f142ebf11aa6b1f069d6974ec026aa Mon Sep 17 00:00:00 2001 From: =?utf8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= Date: Wed, 20 Nov 2013 08:33:57 +0100 Subject: [PATCH] Add parser for Abstruse Goose comics. --- .../filters/comics/AbstruseGooseComicFilter.java | 69 ++++++++++++++ .../rhynodge/watchers/AbstruseGooseWatcher.java | 39 ++++++++ .../comics/AbstruseGooseComicFilterTest.java | 72 +++++++++++++++ src/test/resources/comics/abstrusegoose.html | 102 +++++++++++++++++++++ 4 files changed, 282 insertions(+) create mode 100644 src/main/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilter.java create mode 100644 src/main/java/net/pterodactylus/rhynodge/watchers/AbstruseGooseWatcher.java create mode 100644 src/test/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilterTest.java create mode 100644 src/test/resources/comics/abstrusegoose.html diff --git a/src/main/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilter.java b/src/main/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilter.java new file mode 100644 index 0000000..9d98650 --- /dev/null +++ b/src/main/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilter.java @@ -0,0 +1,69 @@ +/* + * rhynodge - AbstruseGooseComicFilter.java - Copyright © 2013 David Roden + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.pterodactylus.rhynodge.filters.comics; + +import static com.google.common.base.Optional.fromNullable; +import static com.google.common.collect.FluentIterable.from; + +import java.util.List; + +import net.pterodactylus.rhynodge.filters.ComicSiteFilter; + +import com.google.common.base.Function; +import com.google.common.base.Optional; +import com.google.common.collect.FluentIterable; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +/** + * {@link ComicSiteFilter} implementation that can parse Abstruse Goose comics. + * + * @author David ‘Bombe’ Roden + */ +public class AbstruseGooseComicFilter extends ComicSiteFilter { + + @Override + protected Optional extractTitle(Document document) { + return fromNullable(document.select("h1.storytitle a").text()); + } + + @Override + protected List extractImageUrls(Document document) { + return getComicImages(document).transform(new Function() { + @Override + public String apply(Element element) { + return element.attr("src"); + } + }).toList(); + } + + @Override + protected List extractImageComments(Document document) { + return getComicImages(document).transform(new Function() { + @Override + public String apply(Element element) { + return element.attr("title"); + } + }).toList(); + } + + private FluentIterable getComicImages(Document document) { + return from(document.select("section img")); + } + +} diff --git a/src/main/java/net/pterodactylus/rhynodge/watchers/AbstruseGooseWatcher.java b/src/main/java/net/pterodactylus/rhynodge/watchers/AbstruseGooseWatcher.java new file mode 100644 index 0000000..1355408 --- /dev/null +++ b/src/main/java/net/pterodactylus/rhynodge/watchers/AbstruseGooseWatcher.java @@ -0,0 +1,39 @@ +/* + * rhynodge - AbstruseGooseWatcher.java - Copyright © 2013 David Roden + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.pterodactylus.rhynodge.watchers; + +import java.util.Arrays; + +import net.pterodactylus.rhynodge.Watcher; +import net.pterodactylus.rhynodge.filters.HtmlFilter; +import net.pterodactylus.rhynodge.filters.comics.AbstruseGooseComicFilter; +import net.pterodactylus.rhynodge.queries.HttpQuery; +import net.pterodactylus.rhynodge.triggers.NewComicTrigger; + +/** + * {@link Watcher} implementation that watches for new Abstruse Goose comics. + * + * @author David ‘Bombe’ Roden + */ +public class AbstruseGooseWatcher extends DefaultWatcher { + + public AbstruseGooseWatcher() { + super(new HttpQuery("http://abstrusegoose.com/"), Arrays.asList(new HtmlFilter(), new AbstruseGooseComicFilter()), new NewComicTrigger()); + } + +} diff --git a/src/test/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilterTest.java b/src/test/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilterTest.java new file mode 100644 index 0000000..8ad16b0 --- /dev/null +++ b/src/test/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilterTest.java @@ -0,0 +1,72 @@ +/* + * rhynodge - AbstruseGooseComicFilterTest.java - Copyright © 2013 David Roden + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.pterodactylus.rhynodge.filters.comics; + +import static com.google.common.base.Optional.of; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.is; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import com.google.common.base.Optional; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.junit.Test; + +/** + * Unit test for {@link AbstruseGooseComicFilter}. + * + * @author David ‘Bombe’ Roden + */ +public class AbstruseGooseComicFilterTest { + + private final AbstruseGooseComicFilter abstruseGooseComicFilter = new AbstruseGooseComicFilter(); + private final Document document; + + public AbstruseGooseComicFilterTest() throws IOException { + document = loadDocument("/comics/abstrusegoose.html", "http://abstrusegoose.com/"); + } + + private Document loadDocument(String resourceName, String baseUri) throws IOException { + InputStream inputStream = getClass().getResourceAsStream(resourceName); + Document document = Jsoup.parse(inputStream, "UTF-8", baseUri); + return document; + } + + @Test + public void extractsComicTitleCorrectly() { + Optional title = abstruseGooseComicFilter.extractTitle(document); + assertThat(title, is(of("Bizarro"))); + } + + @Test + public void extractComicImagesCorrectly() { + List images = abstruseGooseComicFilter.extractImageUrls(document); + assertThat(images, contains("http://abstrusegoose.com/strips/bizero.png")); + } + + @Test + public void extractImageCommentsCorrectly() { + List comments = abstruseGooseComicFilter.extractImageComments(document); + assertThat(comments, contains("In the additive group of the integers, bizarro zero is... well... zero.")); + } + +} diff --git a/src/test/resources/comics/abstrusegoose.html b/src/test/resources/comics/abstrusegoose.html new file mode 100644 index 0000000..7e06768 --- /dev/null +++ b/src/test/resources/comics/abstrusegoose.html @@ -0,0 +1,102 @@ + + + + + + Abstruse Goose | Bizarro + + + + + + + + + +
+ + + + + +
+ + + + +
+ + + +
+
+

«« First    « Previous    |   Random   |   Next »    Current »»

Bizarro


+ bizero
+

«« First    « Previous    |   Random   |   Next »    Current »»

+ + + + \ No newline at end of file -- 2.7.4