Fix Sinfest filter and add a test
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Fri, 20 Feb 2015 05:42:54 +0000 (06:42 +0100)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Fri, 20 Feb 2015 05:42:54 +0000 (06:42 +0100)
src/main/java/net/pterodactylus/rhynodge/filters/comics/SinfestComicFilter.java
src/test/java/net/pterodactylus/rhynodge/filters/comics/SinfestComicFilterTest.java [new file with mode: 0644]
src/test/resources/net/pterodactylus/rhynodge/filters/comics/sinfest.html [new file with mode: 0644]

index 76089e3..f94c355 100644 (file)
@@ -19,6 +19,7 @@ package net.pterodactylus.rhynodge.filters.comics;
 
 import java.util.Collections;
 import java.util.List;
+import java.util.stream.Collectors;
 
 import net.pterodactylus.rhynodge.filters.ComicSiteFilter;
 
@@ -39,32 +40,18 @@ public class SinfestComicFilter extends ComicSiteFilter {
 
        @Override
        protected Optional<String> extractTitle(Document document) {
-               Elements imageCell = document.select("table#AutoNumber2 tr:eq(1) img");
+               Elements imageCell = selectImageAttributes(document);
                return imageCell.hasAttr("alt") ? Optional.of(imageCell.attr("alt")) : Optional.<String>absent();
        }
 
        @Override
        protected List<String> extractImageUrls(Document document) {
-               Elements imageCell = document.select("table#AutoNumber2 tr:eq(1) img");
-               return imageCell.hasAttr("src") ? FluentIterable.from(imageCell).transform(new Function<Element, Optional<String>>() {
-
-                       @Override
-                       public Optional<String> apply(Element elements) {
-                               return elements.hasAttr("src") ? Optional.of(elements.attr("src")) : Optional.<String>absent();
-                       }
-               }).filter(new Predicate<Optional<String>>() {
-
-                       @Override
-                       public boolean apply(Optional<String> input) {
-                               return input.isPresent();
-                       }
-               }).transform(new Function<Optional<String>, String>() {
+               Elements imageCells = selectImageAttributes(document);
+               return imageCells.stream().map(cell -> cell.attr("src")).collect(Collectors.toList());
+       }
 
-                       @Override
-                       public String apply(Optional<String> input) {
-                               return input.get();
-                       }
-               }).toList() : Collections.<String>emptyList();
+       private Elements selectImageAttributes(Document document) {
+               return document.select("tbody.style5 img");
        }
 
        @Override
diff --git a/src/test/java/net/pterodactylus/rhynodge/filters/comics/SinfestComicFilterTest.java b/src/test/java/net/pterodactylus/rhynodge/filters/comics/SinfestComicFilterTest.java
new file mode 100644 (file)
index 0000000..dc20d36
--- /dev/null
@@ -0,0 +1,82 @@
+package net.pterodactylus.rhynodge.filters.comics;
+
+import java.io.IOException;
+
+import net.pterodactylus.rhynodge.Filter;
+import net.pterodactylus.rhynodge.State;
+import net.pterodactylus.rhynodge.filters.ResourceLoader;
+import net.pterodactylus.rhynodge.states.ComicState;
+import net.pterodactylus.rhynodge.states.ComicState.Comic;
+import net.pterodactylus.rhynodge.states.ComicState.Strip;
+import net.pterodactylus.rhynodge.states.HtmlState;
+
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.hamcrest.TypeSafeDiagnosingMatcher;
+import org.jsoup.nodes.Document;
+import org.junit.Test;
+
+
+/**
+ * Unit test for {@link SinfestComicFilter}.
+ *
+ * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
+ */
+public class SinfestComicFilterTest {
+
+       private final Filter sinfestFilter = new SinfestComicFilter();
+       private final HtmlState htmlState;
+
+       public SinfestComicFilterTest() throws IOException {
+               Document document = ResourceLoader.loadDocument(SinfestComicFilter.class, "sinfest.html", "http://www.sinfest.net/");
+               htmlState = new HtmlState("http://www.sinfest.net/", document);
+       }
+
+       @Test
+       public void canParseComicsFromHtml() {
+               State state = sinfestFilter.filter(htmlState);
+               MatcherAssert.assertThat(state, Matchers.instanceOf(ComicState.class));
+       }
+
+       @Test
+       public void imageUrlsAreParsedCorrectly() {
+               ComicState comicState = (ComicState) sinfestFilter.filter(htmlState);
+               MatcherAssert.assertThat(comicState.comics(), Matchers.contains(matchesComic("Search 9", "http://www.sinfest.net/btphp/comics/2015-02-20.gif", "")));
+       }
+
+       private Matcher<Comic> matchesComic(String title, String url, String comment) {
+               return new TypeSafeDiagnosingMatcher<Comic>() {
+                       @Override
+                       protected boolean matchesSafely(Comic comic, Description mismatchDescription) {
+                               if (!comic.title().equals(title)) {
+                                       mismatchDescription.appendText("comic is named ").appendValue(comic.title());
+                                       return false;
+                               }
+                               if (comic.strips().size() != 1) {
+                                       mismatchDescription.appendText("comic has ").appendValue(comic.strips().size()).appendText(" strips");
+                                       return false;
+                               }
+                               Strip strip = comic.strips().get(0);
+                               if (!strip.imageUrl().equals(url)) {
+                                       mismatchDescription.appendText("image url is ").appendValue(strip.imageUrl());
+                                       return false;
+                               }
+                               if (!strip.comment().equals(comment)) {
+                                       mismatchDescription.appendText("comment is ").appendValue(strip.comment());
+                                       return false;
+                               }
+                               return true;
+                       }
+
+                       @Override
+                       public void describeTo(Description description) {
+                               description.appendText("comic named ").appendValue(title);
+                               description.appendText(" at ").appendValue(url);
+                               description.appendText(" with comment ").appendValue(comment);
+                       }
+               };
+       }
+
+}
diff --git a/src/test/resources/net/pterodactylus/rhynodge/filters/comics/sinfest.html b/src/test/resources/net/pterodactylus/rhynodge/filters/comics/sinfest.html
new file mode 100644 (file)
index 0000000..9de933f
--- /dev/null
@@ -0,0 +1,274 @@
+<html>
+<head>
+       <meta http-equiv="Content-Language" content="en-us">
+       <title>Sinfest</title>
+       <style type="text/css">
+       <!--
+       body, td { font: 13px Verdana, Geneva, Arial, Helvetica, sans-serif; }
+       table.cal th { background: #dddddd; }
+       table.cal tr.cal-row { background: #ffffff; }
+       table.cal tr.cal-week { background: #eeeeee; }
+       table.tb { color: #ffffff; background: #000000; }
+       table.tb td { color: #000000; background: #ffffff; }
+       table.tb tr.th td {
+               color: #000000;
+               background: #dddddd;
+               font-size: 12px;
+               font-weight: bold;
+       }
+       .border { border: 1px solid #000000; }
+       .h1 { font-size: 20px; font-weight: bold; }
+       .maintitle { font-size: 30px; font-weight: bold; }
+       .style2 {
+       background-image: url('../images/borderless_spectrum.gif');
+}
+.style3{
+       font-family: "Bookman Old Style";
+       font-size: medium;
+       color: #000000;
+}
+
+       .style4 {
+       font-family: "verdana";
+       font-size: medium;
+       color: #fefbb0;
+}
+       .style5 {
+       text-align: center;
+}
+.style6 {
+       font-family: "meiryo";
+       font-size: small;
+}
+
+       a {
+       color: #000000;
+}
+a:visited {
+       color: #333333;
+}
+a:active {
+       color: #3399FF;
+}
+a:hover {
+       color: #0000FF;
+}
+       -->
+       </style>
+</head>
+<body>
+<div style="position: absolute; float: left; left: 0; top: 0; width: 100%; height: 90px">
+
+<table style="width: 100%" cellspacing="0" cellpadding="0" bgcolor="black">
+ <tr><td valign="bottom" class="style6">
+       <center><script type="text/javascript" src="http://ap.lijit.com/www/delivery/fpi.js?z=221380&u=zenbomf&width=728&height=90"></script>
+       </tr>
+ </center>
+
+       </table>
+       
+<table style="width: 100%; height: 64px;" cellspacing="0" cellpadding="0" class="style2" >
+       <tr>
+               <td class="style5">             <img alt="" src="../images/bright_sinfest.gif" width="286" height="24"> 
+               <span class="style4">By Tatsuya Ishida</span></td>
+               
+               
+       </tr>
+</table>
+
+<table style="width: 100%" height="20" cellspacing="0" cellpadding="0" align="center"  bgcolor="white">
+                       <tr>
+                               <td  valign="top" class="style5"><a href="index.php"><span class="style6">HOME</span></a>
+                               <span class="style6">&nbsp;|&nbsp;</span>
+               <a href="archiveb.php"><span class="style6">ARCHIVE</span></a>
+                               <span class="style6">&nbsp;|&nbsp;</span>
+               <a href="phpbb2"><span class="style6">FORUM</span></a>
+                               <span class="style6">&nbsp;|&nbsp;</span>
+                                               <a href="books.php"><span class="style6">BOOKS</span></a>
+                               <span class="style6">&nbsp;|&nbsp;</span> 
+                               <a href="comicdb.php"><span class="style6">SEARCH</span></td>
+                               </tr>
+                               </table>                <center><table border="0" align="center" cellspacing="0" cellpadding="2">
+
+<tr><td colspan="2" class="style1"><table border="0">
+<tbody class="style5">
+<tr class="style5"><td class="style3"><nobr>February 20, 2015: </nobr>Search 9</td></tr>
+<tr><td colspan="2"><img src="btphp/comics/2015-02-20.gif" alt="Search 9"></td></tr>
+</table>
+<br></td></tr>
+<tr>
+       <td width="50%" align="center"> <a href="view.php?date=2000-01-17">
+       <img alt="" src="../images/first.gif" class="style3" ></td>
+       <td width="50%" align="center"> <a href="view.php?date=2015-02-19">
+       <img alt="" src="../images/prev.gif" class="style3"></td>
+</tr>
+</table>
+
+</center>
+<div class="style1">
+<br>
+
+
+<center>
+<table style="width: 480" cellpadding="8">
+       <tr>
+               <td><!-- Begin Project Wonderful ad code: -->
+<!-- IMPORTANT: All lines, including these comments, must be included. -->
+<!-- Removal or unauthorized alteration will result in your ads being automatically suspended! -->
+<!-- Ad box ID: 13807 -->
+<script language='JavaScript' type='text/javascript'>
+// <![CDATA[
+
+r = new String (Math.random()*1000);
+r = r.substr(0, 5);
+s = new String (String.fromCharCode(60));
+s += "script language='JavaScript' type='text/javascript'";
+s += "src = http://www.projectwonderful.com/gen.php";
+s += "?id=13807&amp;type=5";
+s += "&amp;r=" + r;
+if (document.referrer){
+s += "&amp;referer=" + escape(document.referrer);
+}
+s += "><\/scr";
+s += "ipt>";
+document.write(s);
+// ]]>
+ </script>
+<noscript>
+ <map name="admap13807" id="admap13807"><area href="http://www.projectwonderful.com/out_nojs.php?r=0&amp;c=0&amp;id=13807&amp;type=5" shape="rect" coords="0,0,728,90" title="" alt="" target="_blank" /></map>
+<table cellpadding="0" border="0" cellspacing="0" width="728" bgcolor="#ffffff"><tr><td><img src="http://www.projectwonderful.com/nojs.php?id=13807&amp;type=5" width="728" height="90" usemap="#admap13807" border="0" alt="" /></td></tr><tr><td bgcolor="#ffffff" colspan="1"><center><a style="font-size:10px;color:#0000ff;text-decoration:none;line-height:1.2;font-weight:bold;font-family:Tahoma, verdana,arial,helvetica,sans-serif;text-transform: none;letter-spacing:normal;text-shadow:none;white-space:normal;word-spacing:normal;" href="http://www.projectwonderful.com/advertisehere.php?id=13807&amp;type=5" target="_blank">Project Wonderful - Your ad here, right now, for as low as $0.00</a></center></td></tr><tr><td colspan=1 valign="top" width=728 bgcolor="#000000" style="height:3px;font-size:1px;padding:0px;max-height:3px;"></td></tr></table></noscript>
+<!-- End Project Wonderful ad code. --> 
+</td>
+       </tr>
+</table>
+</center>
+
+<center> 
+
+
+
+       <table style="width: 600" cellpadding="10">
+               <tr>
+                       <td style="width: 350px" valign="top">
+                       <table style="width: 375px" cellspacing="4">
+                               <tr>
+                                       <td style="width: 369px">
+                                       <img alt="" src="../images/resistance_star.gif" width="394" height="58"><br>
+                                       </td>
+                               </tr>
+                               <tr>
+                                       <td style="width: 369px">
+                                       
+                                       <table cellspacing="0" border="0" class="tb" align="center" cellpadding="5">
+       &nbsp;<head>\r
+<style type="text/css">\r
+.style1 {\r
+       text-align: center;\r
+}\r
+</style>\r
+</head>\r
+\r
+\r
+<body style="background-color: #FFFFFF">\r
+\r
+<tr><td>\r
+<div style="font-size:90%">June  1, 2014</div>\r
+<div style="font-weight:bold; width: 451px; height: 24px;" font face="latha">I know my kingdom awaits and they've forgiven my mistakes</div>\r
+<div style="font-size:80%">Posted by Tatsuya Ishida</div>\r
+<p>Hi. New design.<br>\r
+That is all.<br>\r
+-T.</p>\r
+</td></tr>\r
+       </table>
+       <div class="style5"><a href="news.php"><span class="style3">More Resistance</span></a></div>
+                                       </td>
+                               </tr>
+                       </table>
+                       </td>
+                       <td>
+                       <script type="text/javascript" src="http://ap.lijit.com/www/delivery/fpi.js?z=221385&u=zenbomf&width=160&height=600">
+                       </script></td>
+               </tr>
+       </table></center>
+</div>
+       
+
+<br>
+<head>
+<style type="text/css">
+.style10 {
+       color: #FBFD93;
+       font-family: sans-serif;
+       font-size: x-small;
+}
+.style12 {
+       background-color: #94976C;
+}
+.style17 {
+       color: #FBFD93;
+       font-family: VERDANA;
+}
+.style18 {
+       text-align: center;
+}
+</style>
+</head>
+
+
+ <center><div id="lijit_region_255795"></div>
+<script type="text/javascript" src="http://www.lijit.com/delivery/fp?u=zenbomf&z=255795"></script>
+ </center>
+
+<table style="width:100%" height="100" cellspacing="0" cellpadding="0" class="style1" bgcolor="94976C">
+       <tr> 
+               <td >
+ <center>
+ <div align="center"> <center><!-- BEGIN TECHNORATI MEDIA TAG -->
+<script type="text/javascript">
+document.write('<scri' + 'pt type="text/javascript" src="'
++ (document.location.protocol == 'https:' ? 'https://uat-secure' : 'http://ad-cdn')
++ '.technoratimedia.com/00/93/27/uat_32793.js?ad_size=728x90"></scri' + 'pt>');
+</script>
+<!-- END TECHNORATI MEDIA TAG --><br>
+
+<table class="style12" cellspacing="0" cellpadding="4" style="width: 755px" ><tr>
+                               <td style="width: 625px" class="style18" >
+                                       <div class="style10">
+                                       © COPYRIGHT 2000-2015
+                                        BY TATSUYA 
+                                       ISHIDA/MUSEWORKS. <a href="/contact.php">CONTACT</a><a href="/contact.php"> </a>
+                                       <br>
+                                       Powered by
+                                       
+                                       
+                                       <a href="http://www.enisoc.com/" style="color: #FBFD93">btPHP 1.3.2</a></div>
+                                       </div>
+<div align="center" class="style10">page generated in 0.02347 seconds</div></font>
+    </td> </tr> </table>       
+
+</td>
+</tr>  
+
+
+       
+       
+</table>
+
+</div>
+
+
+<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 START -->
+<script type='text/javascript' src='https://count.carrierzone.com/app/count_server/count.js'></script>
+<script type='text/javascript'><!--
+wm_custnum='7424cb7686d18f43';
+wm_page_name='index.php';
+wm_group_name='/services/webpages/s/i/sinfest.net/public';
+wm_campaign_key='campaign_id';
+wm_track_alt='';
+wiredminds.count();
+// -->
+</script>
+<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 END -->
+</body>
+</html>
\ No newline at end of file