From 07f030b452fa2146b8e0c7c338465736e08cc250 Mon Sep 17 00:00:00 2001 From: =?utf8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= Date: Tue, 10 Feb 2015 20:37:08 +0100 Subject: [PATCH] =?utf8?q?Add=20parser=20for=20Chief=20O=E2=80=99Brien=20a?= =?utf8?q?t=20Work.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- .../comics/ChiefOBrienAtWorkComicFilter.java | 35 ++ .../comics/ChiefOBrienAtWorkComicFilterTest.java | 40 +++ .../rhynodge/filters/comics/ComicLoader.java | 22 ++ .../filters/comics/chief-obrien-at-work.html | 371 +++++++++++++++++++++ 4 files changed, 468 insertions(+) create mode 100644 src/main/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilter.java create mode 100644 src/test/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilterTest.java create mode 100644 src/test/java/net/pterodactylus/rhynodge/filters/comics/ComicLoader.java create mode 100644 src/test/resources/net/pterodactylus/rhynodge/filters/comics/chief-obrien-at-work.html diff --git a/src/main/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilter.java b/src/main/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilter.java new file mode 100644 index 0000000..8ec8e21 --- /dev/null +++ b/src/main/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilter.java @@ -0,0 +1,35 @@ +package net.pterodactylus.rhynodge.filters.comics; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import net.pterodactylus.rhynodge.filters.ComicSiteFilter; + +import com.google.common.base.Optional; +import org.jsoup.nodes.Document; + +/** + * {@link ComicSiteFilter} implementation that can parse “Chief O’Brien at Work” + * comics. + * + * @author David ‘Bombe’ Roden + */ +public class ChiefOBrienAtWorkComicFilter extends ComicSiteFilter { + + @Override + protected List extractImageUrls(Document document) { + return Arrays.asList(document.select(".P .P-H .larger-width img").get(0).attr("src")); + } + + @Override + protected List extractImageComments(Document document) { + return Collections.emptyList(); + } + + @Override + protected Optional extractTitle(Document document) { + return Optional.fromNullable(document.select(".P-post .captions p").get(0).text()); + } + +} diff --git a/src/test/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilterTest.java b/src/test/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilterTest.java new file mode 100644 index 0000000..d511ced --- /dev/null +++ b/src/test/java/net/pterodactylus/rhynodge/filters/comics/ChiefOBrienAtWorkComicFilterTest.java @@ -0,0 +1,40 @@ +package net.pterodactylus.rhynodge.filters.comics; + +import java.io.IOException; + +import com.google.common.base.Optional; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.jsoup.nodes.Document; +import org.junit.Test; + +/** + * Unit test for {@link ChiefOBrienAtWorkComicFilter}. + * + * @author David ‘Bombe’ Roden + */ +public class ChiefOBrienAtWorkComicFilterTest { + + private final ChiefOBrienAtWorkComicFilter filter = new ChiefOBrienAtWorkComicFilter(); + private final Document document; + + public ChiefOBrienAtWorkComicFilterTest() throws IOException { + document = ComicLoader.loadDocument("chief-obrien-at-work.html", "http://chiefobrienatwork.com/"); + } + + @Test + public void filterCanParseComics() { + MatcherAssert.assertThat(filter.extractImageUrls(document), Matchers.contains("http://41.media.tumblr.com/db92f4218b8a100f216863ce980e19a9/tumblr_njaewe7vNU1tjd8fao1_1280.jpg")); + } + + @Test + public void filterReturnsEmptyListForImageComments() { + MatcherAssert.assertThat(filter.extractImageComments(document), Matchers.empty()); + } + + @Test + public void filterCanParseComicTitles() { + MatcherAssert.assertThat(filter.extractTitle(document), Matchers.is(Optional.of("EPISODE 60: Being Human\u00a0"))); + } + +} diff --git a/src/test/java/net/pterodactylus/rhynodge/filters/comics/ComicLoader.java b/src/test/java/net/pterodactylus/rhynodge/filters/comics/ComicLoader.java new file mode 100644 index 0000000..c6a4cec --- /dev/null +++ b/src/test/java/net/pterodactylus/rhynodge/filters/comics/ComicLoader.java @@ -0,0 +1,22 @@ +package net.pterodactylus.rhynodge.filters.comics; + +import java.io.IOException; +import java.io.InputStream; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +/** + * Loads a resource from the classpath and parses it as HTML. + * + * @author David ‘Bombe’ Roden + */ +public class ComicLoader { + + static Document loadDocument(String resourceName, String baseUri) throws IOException { + InputStream inputStream = ComicLoader.class.getResourceAsStream(resourceName); + Document document = Jsoup.parse(inputStream, "UTF-8", baseUri); + return document; + } + +} diff --git a/src/test/resources/net/pterodactylus/rhynodge/filters/comics/chief-obrien-at-work.html b/src/test/resources/net/pterodactylus/rhynodge/filters/comics/chief-obrien-at-work.html new file mode 100644 index 0000000..cb0485c --- /dev/null +++ b/src/test/resources/net/pterodactylus/rhynodge/filters/comics/chief-obrien-at-work.html @@ -0,0 +1,371 @@ + + + + + + + + + +CHIEF O'BRIEN AT WORK + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CHIEF O'BRIEN AT WORK
From the man who brought you Chief O’Brien after Star Trek brought you Chief O’Brien,
comes a comic for fans of space travel, dead-end jobs, and ennui.
+ + + + +
+ +
+ +
+ +
+ + + + + + + + + + + + + + + +
+ + + +
+ + + + + + + + + + + \ No newline at end of file -- 2.7.4