From 787213fe5fb1cda97bea888cbefc53c265c3be4b Mon Sep 17 00:00:00 2001 From: =?utf8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= Date: Wed, 26 Aug 2015 20:51:00 +0200 Subject: [PATCH] Fix SATW parser, add test --- .../comics/ScandinaviaAndTheWorldComicFilter.java | 14 +- .../filters/comics/scandinavia-and-the-world.html | 835 +++++++++++++++++++++ .../watchers/ScandinaviaAndTheWorldWatcher.java | 31 +- .../ScandinaviaAndTheWorldComicFilterTest.java | 94 +++ 4 files changed, 965 insertions(+), 9 deletions(-) create mode 100644 src/main/java/net/pterodactylus/rhynodge/filters/comics/scandinavia-and-the-world.html create mode 100644 src/test/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilterTest.java diff --git a/src/main/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilter.java b/src/main/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilter.java index 23590cb..2a32f6a 100644 --- a/src/main/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilter.java +++ b/src/main/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilter.java @@ -22,6 +22,7 @@ import com.google.common.base.Optional; import net.pterodactylus.rhynodge.filters.ComicSiteFilter; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import java.util.List; @@ -39,19 +40,18 @@ public class ScandinaviaAndTheWorldComicFilter extends ComicSiteFilter { // COMICSITEFILTER METHODS // + private Elements findImageElements(Document document) { + return document.select("img[itemprop=image]"); + } + @Override protected Optional extractTitle(Document document) { - return Optional.of(document.select(".comicmid img").attr("title")); + return Optional.of(findImageElements(document).attr("title")); } @Override protected List extractImageUrls(Document document) { - return from(document.select(".comicmid img")).transform(new Function() { - @Override - public String apply(Element element) { - return element.attr("src"); - } - }).toList(); + return from(findImageElements(document)).transform(element -> element.attr("src")).toList(); } @Override diff --git a/src/main/java/net/pterodactylus/rhynodge/filters/comics/scandinavia-and-the-world.html b/src/main/java/net/pterodactylus/rhynodge/filters/comics/scandinavia-and-the-world.html new file mode 100644 index 0000000..b87beac --- /dev/null +++ b/src/main/java/net/pterodactylus/rhynodge/filters/comics/scandinavia-and-the-world.html @@ -0,0 +1,835 @@ + + + + + + + + + The whale in the room - Scandinavia and the World + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+
+
+ Scandinavia and the World +
+
+
+
+ advert
+ +
+ +
+
+ + + + + + +
+ + + + + +
+
The whale in the room satwcomic.com
+
+
+

The whale in the room

+
+ I was talking with a woman from the Netherlands and somehow we ended up talking about how our countries treat stranded marine life. Of course the Netherlands have regular beaches where whales can get stranded, but the first thing that popped into my head were whales flopping over the sides of Netherlands' dams.
+
+ + 25th August 2015 +
+ + + + + +
+
Tagged in Whale Netherlands Denmark
+
+
+
+ Please support SatW via Patreon
+
+ + + + Share Scandinavia and the World:
+
+
+
+ +
+ +
+
+
+ Latest comic in your News Feed:
+ +
+
+ +
+
+
+
+ +
+
+ advert
+ +
+
+
+
+ advert
+ +
+
+
+ +
+

We have an awesome newsletter with 2,971 subscribers!

+ Latest comic news
+ Merchandise news
+ Iceland's Demon Cat
+ and more!
+
+
+
+
+
+
+ +
+
+
 
+ 57 Comments:
 
+
+ + + + + sort by: + + + + + + + direction: + + + + + + + + +
+
+
+ photondancer
+
+ O
+ + + +
+
+ 9 hours ago #9400586   +      
6

0

 
+
+ I really love the fact that Denmark gave the whale his beer. He's such a softy :-) +

show replies
+ +
+
+
+
+ +
+ + + +
+ +
+
+ ryttyr
+
+ 19 M
+ + + +
+
+ 5 hours ago #9400660   +      
2

0

 
+
+ He he. I like Denmark's way of hydrating the whale.
+
+ BTW. Can we get an Iceland based comic soon? He is one of my favorite characters but he is not in the comic that often. + + +
+
+
+
+ +
+ +
+
+ bubba
+
+ 54 F
+ + + +
+
+ 20 hours ago #9400452   +      
2

0

 
+
+ Iceland would just have waited for the whale to die (or killed it) and ate the meat!
+
+ Not really - the little dear would have been too sick to eat. + + +
+
+
+
+ +
+ +
+
+ riani
+
+ F
+ + + +
+
+ 21 hours ago #9400444   +      
2

0

 
+
+ Of course Denmark gives the whale a beer. + + +
+
+
+
+ +
+ +
+
+ gctwnl
+
+ M
+ + + +
+
+ 21 hours ago #9400443   +      
2

0

 
+
+ That whale drinking beer. Brilliant!
+
+ Actually, much of The Netherlands is not below sea level but roughly 'at' sea level and it would only flood during a high tide.
+
+ Maybe a good one for another comic: one of the reasons The Netherlands is below sea level in many places is that we pumped the water out of the ground to make the swamps, that was there originally in many places, dry. As a result, the ground compacted and The Netherlands got lower and lower over the centuries. Then we had to build higher dikes to keep the sea out. Self-inflicted in part...
+
+ Another thing we have done is pumped gas out of the North of the country (used to be the biggest gas reserve in the world) and as a result the ground is compacting there as well. That also results in earthquakes strong enough to damage buildings there. +

show replies
+ +
+
+
+
+ +
+ + + +
+ + + +
+ + + +
+ +
+
+ Fundir
+
+ 21 M
+ + + +
+
+ 21 hours ago #9400423   +      
2

0

 
+
+ Yay the whale hvalborg! <3 (they got the name from a danish song) + + +
+
+
+
+ +
+ +
+
+ Dutchbag
+
+ 18 M
+ Moderator + + +
+
+ 21 hours ago #9400419   +      
2

0

 
+
+ >fuck my like
+
+ Humon pls + + +
+
+
+
+ +
+ +
+
+ kyrtuck
+
+ 24 M
+ + + +
+
+ 4 hours ago #9400669   +      
1

1

 
+
+ No Netherlands, I don't wanna fuck your life, its not sexy enough. + + +
+
+
+
+ +
+ +
+
+ senia5
+
+ 18 F
+ + + +
+
+ 8 hours ago #9400600   +      
1

0

 
+
+ I love how you called him Hvalborg. Nice touch. ;) + + +
+
+
+
+ +
+ +
+
+ Dorkymike
+
+ 32 M
+ + + +
+
+ 12 hours ago #9400569   +      
1

0

 
+
+ As long as they don't die on the beach and explode. That tends to be quite messy. + + +
+
+
+
+ +
+ +
+ Add comment: Please Sign in or create an accout to comment. +
+ +
+

View all 57 comments

+ +
+ +
+ + + +
+ + + + + + + diff --git a/src/main/java/net/pterodactylus/rhynodge/watchers/ScandinaviaAndTheWorldWatcher.java b/src/main/java/net/pterodactylus/rhynodge/watchers/ScandinaviaAndTheWorldWatcher.java index 6e3cbac..fdcae7d 100644 --- a/src/main/java/net/pterodactylus/rhynodge/watchers/ScandinaviaAndTheWorldWatcher.java +++ b/src/main/java/net/pterodactylus/rhynodge/watchers/ScandinaviaAndTheWorldWatcher.java @@ -17,12 +17,20 @@ package net.pterodactylus.rhynodge.watchers; +import java.util.List; + +import net.pterodactylus.rhynodge.Filter; +import net.pterodactylus.rhynodge.filters.ExtractUrlFilter; import net.pterodactylus.rhynodge.filters.HtmlFilter; +import net.pterodactylus.rhynodge.filters.HttpQueryFilter; import net.pterodactylus.rhynodge.filters.comics.ScandinaviaAndTheWorldComicFilter; import net.pterodactylus.rhynodge.queries.HttpQuery; import net.pterodactylus.rhynodge.triggers.NewComicTrigger; -import java.util.Arrays; +import com.google.common.base.Optional; +import com.google.common.collect.ImmutableList; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; /** * {@link net.pterodactylus.rhynodge.Watcher} implementation that watches for new Scandinavia and the World comics. @@ -32,7 +40,26 @@ import java.util.Arrays; public class ScandinaviaAndTheWorldWatcher extends DefaultWatcher { public ScandinaviaAndTheWorldWatcher() { - super(new HttpQuery("http://satwcomic.com/"), Arrays.asList(new HtmlFilter(), new ScandinaviaAndTheWorldComicFilter()), new NewComicTrigger()); + super(new HttpQuery("http://satwcomic.com/"), createFilters(), new NewComicTrigger()); + } + + private static List createFilters() { + ImmutableList.Builder filters = ImmutableList.builder(); + + filters.add(new HtmlFilter()); + filters.add(new ExtractUrlFilter() { + + @Override + protected Optional extractUrl(Document document) { + Elements linkTag = document.select("a.btn-success"); + return linkTag.hasAttr("href") ? Optional.of(linkTag.attr("href")) : Optional.absent(); + } + }); + filters.add(new HttpQueryFilter()); + filters.add(new HtmlFilter()); + filters.add(new ScandinaviaAndTheWorldComicFilter()); + + return filters.build(); } } diff --git a/src/test/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilterTest.java b/src/test/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilterTest.java new file mode 100644 index 0000000..be54a38 --- /dev/null +++ b/src/test/java/net/pterodactylus/rhynodge/filters/comics/ScandinaviaAndTheWorldComicFilterTest.java @@ -0,0 +1,94 @@ +package net.pterodactylus.rhynodge.filters.comics; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; + +import java.io.IOException; +import java.util.Objects; + +import net.pterodactylus.rhynodge.Filter; +import net.pterodactylus.rhynodge.filters.ResourceLoader; +import net.pterodactylus.rhynodge.states.ComicState; +import net.pterodactylus.rhynodge.states.ComicState.Comic; +import net.pterodactylus.rhynodge.states.ComicState.Strip; +import net.pterodactylus.rhynodge.states.HtmlState; + +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.hamcrest.TypeSafeDiagnosingMatcher; +import org.jsoup.nodes.Document; +import org.junit.Test; + +/** + * Unit test for {@link ScandinaviaAndTheWorldComicFilterTest}. + * + * @author David Roden + */ +public class ScandinaviaAndTheWorldComicFilterTest { + + private final Filter satwFilter = new ScandinaviaAndTheWorldComicFilter(); + private final HtmlState htmlState; + + public ScandinaviaAndTheWorldComicFilterTest() throws IOException { + Document document = ResourceLoader.loadDocument(ScandinaviaAndTheWorldComicFilter.class, "scandinavia-and-the-world.html", + "http://satwcomic.com/"); + htmlState = new HtmlState("http://satwcomic.com/", document); + } + + @Test + public void comicIsParsedCorrectly() { + ComicState comicState = (ComicState) satwFilter.filter(htmlState); + assertThat(comicState.comics(), contains( + isComic("The whale in the room", contains( + isStrip("http://satwcomic.com/art/the-whale-in-the-room.png", "") + )) + )); + } + + private Matcher isComic(String title, Matcher> stripsMatcher) { + return new TypeSafeDiagnosingMatcher() { + @Override + protected boolean matchesSafely(Comic comic, Description mismatchDescription) { + if (!Objects.equals(comic.title(), title)) { + mismatchDescription.appendText("title is ").appendValue(comic.title()); + return false; + } + if (!stripsMatcher.matches(comic.strips())) { + stripsMatcher.describeMismatch(comic.strips(), mismatchDescription); + return false; + } + return true; + } + + @Override + public void describeTo(Description description) { + description.appendText("is comic with title ").appendValue(title); + description.appendText(" and strips ").appendValueList("(", ", ", ")", stripsMatcher); + } + }; + } + + private Matcher isStrip(String url, String comment) { + return new TypeSafeDiagnosingMatcher() { + @Override + protected boolean matchesSafely(Strip strip, Description mismatchDescription) { + if (!Objects.equals(strip.imageUrl(), url)) { + mismatchDescription.appendText("image URL is ").appendValue(strip.imageUrl()); + return false; + } + if (!Objects.equals(strip.comment(), comment)) { + mismatchDescription.appendText("comment is ").appendValue(strip.comment()); + return false; + } + return true; + } + + @Override + public void describeTo(Description description) { + description.appendText("is strip from ").appendValue(url); + description.appendText(" with comment ").appendValue(comment); + } + }; + } + +} -- 2.7.4