import java.util.Collections;
import java.util.List;
+import java.util.stream.Collectors;
import net.pterodactylus.rhynodge.filters.ComicSiteFilter;
@Override
protected Optional<String> extractTitle(Document document) {
- Elements imageCell = document.select("table#AutoNumber2 tr:eq(1) img");
+ Elements imageCell = selectImageAttributes(document);
return imageCell.hasAttr("alt") ? Optional.of(imageCell.attr("alt")) : Optional.<String>absent();
}
@Override
protected List<String> extractImageUrls(Document document) {
- Elements imageCell = document.select("table#AutoNumber2 tr:eq(1) img");
- return imageCell.hasAttr("src") ? FluentIterable.from(imageCell).transform(new Function<Element, Optional<String>>() {
-
- @Override
- public Optional<String> apply(Element elements) {
- return elements.hasAttr("src") ? Optional.of(elements.attr("src")) : Optional.<String>absent();
- }
- }).filter(new Predicate<Optional<String>>() {
-
- @Override
- public boolean apply(Optional<String> input) {
- return input.isPresent();
- }
- }).transform(new Function<Optional<String>, String>() {
+ Elements imageCells = selectImageAttributes(document);
+ return imageCells.stream().map(cell -> cell.attr("src")).collect(Collectors.toList());
+ }
- @Override
- public String apply(Optional<String> input) {
- return input.get();
- }
- }).toList() : Collections.<String>emptyList();
+ private Elements selectImageAttributes(Document document) {
+ return document.select("tbody.style5 img");
}
@Override
--- /dev/null
+package net.pterodactylus.rhynodge.filters.comics;
+
+import java.io.IOException;
+
+import net.pterodactylus.rhynodge.Filter;
+import net.pterodactylus.rhynodge.State;
+import net.pterodactylus.rhynodge.filters.ResourceLoader;
+import net.pterodactylus.rhynodge.states.ComicState;
+import net.pterodactylus.rhynodge.states.ComicState.Comic;
+import net.pterodactylus.rhynodge.states.ComicState.Strip;
+import net.pterodactylus.rhynodge.states.HtmlState;
+
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.hamcrest.TypeSafeDiagnosingMatcher;
+import org.jsoup.nodes.Document;
+import org.junit.Test;
+
+
+/**
+ * Unit test for {@link SinfestComicFilter}.
+ *
+ * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
+ */
+public class SinfestComicFilterTest {
+
+ private final Filter sinfestFilter = new SinfestComicFilter();
+ private final HtmlState htmlState;
+
+ public SinfestComicFilterTest() throws IOException {
+ Document document = ResourceLoader.loadDocument(SinfestComicFilter.class, "sinfest.html", "http://www.sinfest.net/");
+ htmlState = new HtmlState("http://www.sinfest.net/", document);
+ }
+
+ @Test
+ public void canParseComicsFromHtml() {
+ State state = sinfestFilter.filter(htmlState);
+ MatcherAssert.assertThat(state, Matchers.instanceOf(ComicState.class));
+ }
+
+ @Test
+ public void imageUrlsAreParsedCorrectly() {
+ ComicState comicState = (ComicState) sinfestFilter.filter(htmlState);
+ MatcherAssert.assertThat(comicState.comics(), Matchers.contains(matchesComic("Search 9", "http://www.sinfest.net/btphp/comics/2015-02-20.gif", "")));
+ }
+
+ private Matcher<Comic> matchesComic(String title, String url, String comment) {
+ return new TypeSafeDiagnosingMatcher<Comic>() {
+ @Override
+ protected boolean matchesSafely(Comic comic, Description mismatchDescription) {
+ if (!comic.title().equals(title)) {
+ mismatchDescription.appendText("comic is named ").appendValue(comic.title());
+ return false;
+ }
+ if (comic.strips().size() != 1) {
+ mismatchDescription.appendText("comic has ").appendValue(comic.strips().size()).appendText(" strips");
+ return false;
+ }
+ Strip strip = comic.strips().get(0);
+ if (!strip.imageUrl().equals(url)) {
+ mismatchDescription.appendText("image url is ").appendValue(strip.imageUrl());
+ return false;
+ }
+ if (!strip.comment().equals(comment)) {
+ mismatchDescription.appendText("comment is ").appendValue(strip.comment());
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public void describeTo(Description description) {
+ description.appendText("comic named ").appendValue(title);
+ description.appendText(" at ").appendValue(url);
+ description.appendText(" with comment ").appendValue(comment);
+ }
+ };
+ }
+
+}
--- /dev/null
+<html>
+<head>
+ <meta http-equiv="Content-Language" content="en-us">
+ <title>Sinfest</title>
+ <style type="text/css">
+ <!--
+ body, td { font: 13px Verdana, Geneva, Arial, Helvetica, sans-serif; }
+ table.cal th { background: #dddddd; }
+ table.cal tr.cal-row { background: #ffffff; }
+ table.cal tr.cal-week { background: #eeeeee; }
+ table.tb { color: #ffffff; background: #000000; }
+ table.tb td { color: #000000; background: #ffffff; }
+ table.tb tr.th td {
+ color: #000000;
+ background: #dddddd;
+ font-size: 12px;
+ font-weight: bold;
+ }
+ .border { border: 1px solid #000000; }
+ .h1 { font-size: 20px; font-weight: bold; }
+ .maintitle { font-size: 30px; font-weight: bold; }
+ .style2 {
+ background-image: url('../images/borderless_spectrum.gif');
+}
+.style3{
+ font-family: "Bookman Old Style";
+ font-size: medium;
+ color: #000000;
+}
+
+ .style4 {
+ font-family: "verdana";
+ font-size: medium;
+ color: #fefbb0;
+}
+ .style5 {
+ text-align: center;
+}
+.style6 {
+ font-family: "meiryo";
+ font-size: small;
+}
+
+ a {
+ color: #000000;
+}
+a:visited {
+ color: #333333;
+}
+a:active {
+ color: #3399FF;
+}
+a:hover {
+ color: #0000FF;
+}
+ -->
+ </style>
+</head>
+<body>
+<div style="position: absolute; float: left; left: 0; top: 0; width: 100%; height: 90px">
+
+<table style="width: 100%" cellspacing="0" cellpadding="0" bgcolor="black">
+ <tr><td valign="bottom" class="style6">
+ <center><script type="text/javascript" src="http://ap.lijit.com/www/delivery/fpi.js?z=221380&u=zenbomf&width=728&height=90"></script>
+ </tr>
+ </center>
+
+
+ </table>
+
+<table style="width: 100%; height: 64px;" cellspacing="0" cellpadding="0" class="style2" >
+ <tr>
+ <td class="style5"> <img alt="" src="../images/bright_sinfest.gif" width="286" height="24">
+ <span class="style4">By Tatsuya Ishida</span></td>
+
+
+ </tr>
+</table>
+
+<table style="width: 100%" height="20" cellspacing="0" cellpadding="0" align="center" bgcolor="white">
+ <tr>
+ <td valign="top" class="style5"><a href="index.php"><span class="style6">HOME</span></a>
+ <span class="style6"> | </span>
+ <a href="archiveb.php"><span class="style6">ARCHIVE</span></a>
+ <span class="style6"> | </span>
+ <a href="phpbb2"><span class="style6">FORUM</span></a>
+ <span class="style6"> | </span>
+ <a href="books.php"><span class="style6">BOOKS</span></a>
+ <span class="style6"> | </span>
+ <a href="comicdb.php"><span class="style6">SEARCH</span></td>
+ </tr>
+ </table> <center><table border="0" align="center" cellspacing="0" cellpadding="2">
+
+<tr><td colspan="2" class="style1"><table border="0">
+<tbody class="style5">
+<tr class="style5"><td class="style3"><nobr>February 20, 2015: </nobr>Search 9</td></tr>
+<tr><td colspan="2"><img src="btphp/comics/2015-02-20.gif" alt="Search 9"></td></tr>
+</table>
+<br></td></tr>
+<tr>
+ <td width="50%" align="center"> <a href="view.php?date=2000-01-17">
+ <img alt="" src="../images/first.gif" class="style3" ></td>
+ <td width="50%" align="center"> <a href="view.php?date=2015-02-19">
+ <img alt="" src="../images/prev.gif" class="style3"></td>
+</tr>
+</table>
+
+</center>
+<div class="style1">
+<br>
+
+
+<center>
+<table style="width: 480" cellpadding="8">
+ <tr>
+ <td><!-- Begin Project Wonderful ad code: -->
+<!-- IMPORTANT: All lines, including these comments, must be included. -->
+<!-- Removal or unauthorized alteration will result in your ads being automatically suspended! -->
+<!-- Ad box ID: 13807 -->
+<script language='JavaScript' type='text/javascript'>
+// <![CDATA[
+
+r = new String (Math.random()*1000);
+r = r.substr(0, 5);
+s = new String (String.fromCharCode(60));
+s += "script language='JavaScript' type='text/javascript'";
+s += "src = http://www.projectwonderful.com/gen.php";
+s += "?id=13807&type=5";
+s += "&r=" + r;
+if (document.referrer){
+s += "&referer=" + escape(document.referrer);
+}
+s += "><\/scr";
+s += "ipt>";
+document.write(s);
+// ]]>
+ </script>
+<noscript>
+ <map name="admap13807" id="admap13807"><area href="http://www.projectwonderful.com/out_nojs.php?r=0&c=0&id=13807&type=5" shape="rect" coords="0,0,728,90" title="" alt="" target="_blank" /></map>
+<table cellpadding="0" border="0" cellspacing="0" width="728" bgcolor="#ffffff"><tr><td><img src="http://www.projectwonderful.com/nojs.php?id=13807&type=5" width="728" height="90" usemap="#admap13807" border="0" alt="" /></td></tr><tr><td bgcolor="#ffffff" colspan="1"><center><a style="font-size:10px;color:#0000ff;text-decoration:none;line-height:1.2;font-weight:bold;font-family:Tahoma, verdana,arial,helvetica,sans-serif;text-transform: none;letter-spacing:normal;text-shadow:none;white-space:normal;word-spacing:normal;" href="http://www.projectwonderful.com/advertisehere.php?id=13807&type=5" target="_blank">Project Wonderful - Your ad here, right now, for as low as $0.00</a></center></td></tr><tr><td colspan=1 valign="top" width=728 bgcolor="#000000" style="height:3px;font-size:1px;padding:0px;max-height:3px;"></td></tr></table></noscript>
+<!-- End Project Wonderful ad code. -->
+</td>
+ </tr>
+</table>
+</center>
+
+<center>
+
+
+
+ <table style="width: 600" cellpadding="10">
+ <tr>
+ <td style="width: 350px" valign="top">
+ <table style="width: 375px" cellspacing="4">
+ <tr>
+ <td style="width: 369px">
+ <img alt="" src="../images/resistance_star.gif" width="394" height="58"><br>
+ </td>
+ </tr>
+ <tr>
+ <td style="width: 369px">
+
+ <table cellspacing="0" border="0" class="tb" align="center" cellpadding="5">
+ <head>\r
+<style type="text/css">\r
+.style1 {\r
+ text-align: center;\r
+}\r
+</style>\r
+</head>\r
+\r
+\r
+<body style="background-color: #FFFFFF">\r
+\r
+<tr><td>\r
+<div style="font-size:90%">June 1, 2014</div>\r
+<div style="font-weight:bold; width: 451px; height: 24px;" font face="latha">I know my kingdom awaits and they've forgiven my mistakes</div>\r
+<div style="font-size:80%">Posted by Tatsuya Ishida</div>\r
+<p>Hi. New design.<br>\r
+That is all.<br>\r
+-T.</p>\r
+</td></tr>\r
+ </table>
+ <div class="style5"><a href="news.php"><span class="style3">More Resistance</span></a></div>
+ </td>
+ </tr>
+ </table>
+ </td>
+ <td>
+ <script type="text/javascript" src="http://ap.lijit.com/www/delivery/fpi.js?z=221385&u=zenbomf&width=160&height=600">
+ </script></td>
+ </tr>
+ </table></center>
+</div>
+
+
+<br>
+<head>
+<style type="text/css">
+.style10 {
+ color: #FBFD93;
+ font-family: sans-serif;
+ font-size: x-small;
+}
+.style12 {
+ background-color: #94976C;
+}
+.style17 {
+ color: #FBFD93;
+ font-family: VERDANA;
+}
+.style18 {
+ text-align: center;
+}
+</style>
+</head>
+
+
+ <center><div id="lijit_region_255795"></div>
+<script type="text/javascript" src="http://www.lijit.com/delivery/fp?u=zenbomf&z=255795"></script>
+ </center>
+
+<table style="width:100%" height="100" cellspacing="0" cellpadding="0" class="style1" bgcolor="94976C">
+ <tr>
+ <td >
+ <center>
+ <div align="center"> <center><!-- BEGIN TECHNORATI MEDIA TAG -->
+<script type="text/javascript">
+document.write('<scri' + 'pt type="text/javascript" src="'
++ (document.location.protocol == 'https:' ? 'https://uat-secure' : 'http://ad-cdn')
++ '.technoratimedia.com/00/93/27/uat_32793.js?ad_size=728x90"></scri' + 'pt>');
+</script>
+<!-- END TECHNORATI MEDIA TAG --><br>
+
+<table class="style12" cellspacing="0" cellpadding="4" style="width: 755px" ><tr>
+ <td style="width: 625px" class="style18" >
+ <div class="style10">
+ © COPYRIGHT 2000-2015
+ BY TATSUYA
+ ISHIDA/MUSEWORKS. <a href="/contact.php">CONTACT</a><a href="/contact.php"> </a>
+ <br>
+ Powered by
+
+
+ <a href="http://www.enisoc.com/" style="color: #FBFD93">btPHP 1.3.2</a></div>
+ </div>
+<div align="center" class="style10">page generated in 0.02347 seconds</div></font>
+ </td> </tr> </table>
+
+</td>
+</tr>
+
+
+
+
+</table>
+
+</div>
+
+
+<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 START -->
+<script type='text/javascript' src='https://count.carrierzone.com/app/count_server/count.js'></script>
+<script type='text/javascript'><!--
+wm_custnum='7424cb7686d18f43';
+wm_page_name='index.php';
+wm_group_name='/services/webpages/s/i/sinfest.net/public';
+wm_campaign_key='campaign_id';
+wm_track_alt='';
+wiredminds.count();
+// -->
+</script>
+<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 END -->
+</body>
+</html>
\ No newline at end of file