Add parser for Abstruse Goose comics.
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Wed, 20 Nov 2013 07:33:57 +0000 (08:33 +0100)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Wed, 20 Nov 2013 07:33:57 +0000 (08:33 +0100)
src/main/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilter.java [new file with mode: 0644]
src/main/java/net/pterodactylus/rhynodge/watchers/AbstruseGooseWatcher.java [new file with mode: 0644]
src/test/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilterTest.java [new file with mode: 0644]
src/test/resources/comics/abstrusegoose.html [new file with mode: 0644]

diff --git a/src/main/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilter.java b/src/main/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilter.java
new file mode 100644 (file)
index 0000000..9d98650
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * rhynodge - AbstruseGooseComicFilter.java - Copyright © 2013 David Roden
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.pterodactylus.rhynodge.filters.comics;
+
+import static com.google.common.base.Optional.fromNullable;
+import static com.google.common.collect.FluentIterable.from;
+
+import java.util.List;
+
+import net.pterodactylus.rhynodge.filters.ComicSiteFilter;
+
+import com.google.common.base.Function;
+import com.google.common.base.Optional;
+import com.google.common.collect.FluentIterable;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+/**
+ * {@link ComicSiteFilter} implementation that can parse Abstruse Goose comics.
+ *
+ * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
+ */
+public class AbstruseGooseComicFilter extends ComicSiteFilter {
+
+       @Override
+       protected Optional<String> extractTitle(Document document) {
+               return fromNullable(document.select("h1.storytitle a").text());
+       }
+
+       @Override
+       protected List<String> extractImageUrls(Document document) {
+               return getComicImages(document).transform(new Function<Element, String>() {
+                       @Override
+                       public String apply(Element element) {
+                               return element.attr("src");
+                       }
+               }).toList();
+       }
+
+       @Override
+       protected List<String> extractImageComments(Document document) {
+               return getComicImages(document).transform(new Function<Element, String>() {
+                       @Override
+                       public String apply(Element element) {
+                               return element.attr("title");
+                       }
+               }).toList();
+       }
+
+       private FluentIterable<Element> getComicImages(Document document) {
+               return from(document.select("section img"));
+       }
+
+}
diff --git a/src/main/java/net/pterodactylus/rhynodge/watchers/AbstruseGooseWatcher.java b/src/main/java/net/pterodactylus/rhynodge/watchers/AbstruseGooseWatcher.java
new file mode 100644 (file)
index 0000000..1355408
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * rhynodge - AbstruseGooseWatcher.java - Copyright © 2013 David Roden
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.pterodactylus.rhynodge.watchers;
+
+import java.util.Arrays;
+
+import net.pterodactylus.rhynodge.Watcher;
+import net.pterodactylus.rhynodge.filters.HtmlFilter;
+import net.pterodactylus.rhynodge.filters.comics.AbstruseGooseComicFilter;
+import net.pterodactylus.rhynodge.queries.HttpQuery;
+import net.pterodactylus.rhynodge.triggers.NewComicTrigger;
+
+/**
+ * {@link Watcher} implementation that watches for new Abstruse Goose comics.
+ *
+ * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
+ */
+public class AbstruseGooseWatcher extends DefaultWatcher {
+
+       public AbstruseGooseWatcher() {
+               super(new HttpQuery("http://abstrusegoose.com/"), Arrays.asList(new HtmlFilter(), new AbstruseGooseComicFilter()), new NewComicTrigger());
+       }
+
+}
diff --git a/src/test/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilterTest.java b/src/test/java/net/pterodactylus/rhynodge/filters/comics/AbstruseGooseComicFilterTest.java
new file mode 100644 (file)
index 0000000..8ad16b0
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * rhynodge - AbstruseGooseComicFilterTest.java - Copyright © 2013 David Roden
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.pterodactylus.rhynodge.filters.comics;
+
+import static com.google.common.base.Optional.of;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.is;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+import com.google.common.base.Optional;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.junit.Test;
+
+/**
+ * Unit test for {@link AbstruseGooseComicFilter}.
+ *
+ * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
+ */
+public class AbstruseGooseComicFilterTest {
+
+       private final AbstruseGooseComicFilter abstruseGooseComicFilter = new AbstruseGooseComicFilter();
+       private final Document document;
+
+       public AbstruseGooseComicFilterTest() throws IOException {
+               document = loadDocument("/comics/abstrusegoose.html", "http://abstrusegoose.com/");
+       }
+
+       private Document loadDocument(String resourceName, String baseUri) throws IOException {
+               InputStream inputStream = getClass().getResourceAsStream(resourceName);
+               Document document = Jsoup.parse(inputStream, "UTF-8", baseUri);
+               return document;
+       }
+
+       @Test
+       public void extractsComicTitleCorrectly() {
+               Optional<String> title = abstruseGooseComicFilter.extractTitle(document);
+               assertThat(title, is(of("Bizarro")));
+       }
+
+       @Test
+       public void extractComicImagesCorrectly() {
+               List<String> images = abstruseGooseComicFilter.extractImageUrls(document);
+               assertThat(images, contains("http://abstrusegoose.com/strips/bizero.png"));
+       }
+
+       @Test
+       public void extractImageCommentsCorrectly() {
+               List<String> comments = abstruseGooseComicFilter.extractImageComments(document);
+               assertThat(comments, contains("In the additive group of the integers, bizarro zero is... well... zero."));
+       }
+
+}
diff --git a/src/test/resources/comics/abstrusegoose.html b/src/test/resources/comics/abstrusegoose.html
new file mode 100644 (file)
index 0000000..7e06768
--- /dev/null
@@ -0,0 +1,102 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="Description" content="Strip cartoon about math, science, and geek culture." />
+    <title>Abstruse Goose | Bizarro</title>
+    <link rel="stylesheet" href="styles/main.css" />
+    <link rel="alternate" type="application/rss+xml" title="RSS 2.0" href="feed.xml" />
+    <link rel="alternate" type="application/atom+xml" title="Atom 1.0" href="atomfeed.xml" />
+    <link rel="shortcut icon" href="images/favicon.ico" />
+
+    <script>
+      document.createElement(footer);
+      document.createElement(header);
+      document.createElement(nav);
+      document.createElement(section);
+    </script>
+  </head>
+<body>\r
+\r
+  <header>\r
+  <table>\r
+    <tr>\r
+      <td>\r
+        <a href="http://abstrusegoose.com/"><img src="http://abstrusegoose.com/images/AGlogo.PNG"></a>\r
+      </td>\r
+      <td>\r
+        <script type="text/javascript"><!--\r
+        google_ad_client = "ca-pub-0580318607844761";\r
+        /* 468x60, created 6/26/08 */\r
+        google_ad_slot = "7038958302";\r
+        google_ad_width = 468;\r
+        google_ad_height = 60;\r
+        //-->\r
+        </script>\r
+        <script type="text/javascript"\r
+        src="http://pagead2.googlesyndication.com/pagead/show_ads.js">\r
+        </script>\r
+      </td>\r
+    </tr>\r
+  </table>\r
+\r
+  <div id="menu_top"></div>\r
+\r
+  </header>\r
+  <section>\r
+  <p><a href="http://abstrusegoose.com/1">&laquo;&laquo; First</a>&nbsp;&nbsp;&nbsp;&nbsp;<a href="http://abstrusegoose.com/544">&laquo; Previous</a>&nbsp;&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;<a href="http://abstrusegoose.com/pseudorandom.php" >Random</a>&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;Next &raquo;&nbsp;&nbsp;&nbsp;&nbsp;Current &raquo;&raquo;</p>  <h1 class="storytitle"><a href="http://abstrusegoose.com/545">Bizarro</a></h1><br>\r
+  <img src="http://abstrusegoose.com/strips/bizero.png" alt="bizero" width="744" height="567" title="In the additive group of the integers, bizarro zero is... well... zero."/>  <div id="blog_text"></div>\r
+  <p><a href="http://abstrusegoose.com/1">&laquo;&laquo; First</a>&nbsp;&nbsp;&nbsp;&nbsp;<a href="http://abstrusegoose.com/544">&laquo; Previous</a>&nbsp;&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;<a href="http://abstrusegoose.com/pseudorandom.php" >Random</a>&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;Next &raquo;&nbsp;&nbsp;&nbsp;&nbsp;Current &raquo;&raquo;</p>  </section>\r
+  <footer>\r
+    <nav>\r
+      <a href="http://abstrusegoose.com">HOME</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\r
+      <a href="http://abstrusegoose.com/archive">ARCHIVE</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\r
+      <a href="http://abstrusegoose.com/feedthegoose">FEED THE GOOSE</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\r
+      <a href="http://www.cafepress.com/abstrusegoose">STORE</a>\r
+    </nav>\r
+\r
+   <script type="text/javascript"><!--\r
+     google_ad_client = "ca-pub-0580318607844761";\r
+    /* 728x90, created 6/26/08 */\r
+    google_ad_slot = "7299138344";\r
+    google_ad_width = 728;\r
+    google_ad_height = 90;\r
+    //-->\r
+    </script>\r
+    <script type="text/javascript"\r
+    src="http://pagead2.googlesyndication.com/pagead/show_ads.js">\r
+    </script>\r
+\r
+    <div id="melikes">\r
+    <p><b>melikes</b></p>\r
+    <a href="http://brownsharpie.courtneygibbons.org">Brown Sharpie</a>&nbsp;&nbsp;\r
+    <a href="http://www.explosm.net/comics/new">Cy&H</a>&nbsp;&nbsp;\r
+    <a href="http://www.exocomics.com">EXTRAORDINARY</a>&nbsp;&nbsp;\r
+    <a href="http://pbfcomics.com">PBF</a>&nbsp;&nbsp;\r
+    <a href="http://popstrip.com">popstrip</a>&nbsp;&nbsp;\r
+    <a href="http://spikedmath.com">spiked math</a>&nbsp;&nbsp;\r
+    <a href="http://www.xkcd.com">xkcd</a>\r
+    </div>\r
+\r
+    <div class="creativecommons">\r
+\r
+    <a rel="license" href="http://creativecommons.org/licenses/by-nc/3.0/us/"><img alt="Creative Commons License" style="border-width:0" src="http://creativecommons.org/images/public/somerights20.png"/></a>&nbsp;<img src="images/designation.PNG">\r
+\r
+    <br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc/3.0/us/">Creative Commons Attribution-Noncommercial 3.0 United States License</a>.\r
+\r
+    </div>\r
+\r
+    <div class="credit">\r
+    <p><br />A webcomic......... that is all.</p>\r
+    </div>\r
+\r
+    <div class="privacy">\r
+    <p><a href="http://abstrusegoose.com/about">about</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\r
+    <a href="http://abstrusegoose.com/faq">faq</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\r
+    <a href="http://abstrusegoose.com/privacy">privacy</a></p>\r
+    </div>\r
+\r
+  </footer>\r
+</body>\r
+</html>\r
+<!-- cached with Cache Goose -->
\ No newline at end of file