X-Git-Url: https://git.pterodactylus.net/?a=blobdiff_plain;ds=sidebyside;f=src%2Fmain%2Fjava%2Fnet%2Fpterodactylus%2Frhynodge%2Ffilters%2FComicSiteFilter.java;h=19286a6c3b2d0990046f551dcb91d67e5eb7dc4b;hb=cf2f4441705f9a5941022fb0837d08c95a1d0633;hp=fb01349e683369e1e0524e88a703ba6cc05a3645;hpb=a4dfb17e4e1d00440a049cb1063e1b1509216739;p=rhynodge.git diff --git a/src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java b/src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java index fb01349..19286a6 100644 --- a/src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java +++ b/src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java @@ -17,17 +17,22 @@ package net.pterodactylus.rhynodge.filters; -import static com.google.common.base.Preconditions.*; +import static com.google.common.base.Preconditions.checkArgument; +import java.net.URI; +import java.net.URISyntaxException; import java.util.List; import net.pterodactylus.rhynodge.Filter; import net.pterodactylus.rhynodge.State; import net.pterodactylus.rhynodge.states.ComicState; import net.pterodactylus.rhynodge.states.ComicState.Comic; +import net.pterodactylus.rhynodge.states.ComicState.Strip; +import net.pterodactylus.rhynodge.states.FailedState; import net.pterodactylus.rhynodge.states.HtmlState; import com.google.common.base.Optional; +import org.jetbrains.annotations.NotNull; import org.jsoup.nodes.Document; /** @@ -38,26 +43,39 @@ import org.jsoup.nodes.Document; */ public abstract class ComicSiteFilter implements Filter { + @NotNull @Override - public State filter(State state) { + public State filter(@NotNull State state) { checkArgument(state instanceof HtmlState, "state must be an HTML state"); /* initialize states: */ HtmlState htmlState = (HtmlState) state; - ComicState comicState = new ComicState(); /* extract comics. */ Optional title = extractTitle(htmlState.document()); List imageUrls = extractImageUrls(htmlState.document()); + List imageComments = extractImageComments(htmlState.document()); /* store comic, if found, into state. */ - if (title.isPresent() && !imageUrls.isEmpty()) { - Comic comic = new Comic(title.get()); - for (String imageUrl : imageUrls) { - comic.addImageUrl(imageUrl); + if (!title.isPresent() || imageUrls.isEmpty()) { + return new FailedState(); + } + + ComicState comicState = new ComicState(); + Comic comic = new Comic(title.get()); + int imageCounter = 0; + for (String imageUrl : imageUrls) { + String imageComment = (imageCounter < imageComments.size()) ? imageComments.get(imageCounter) : ""; + try { + URI stripUri = new URI(htmlState.uri()).resolve(imageUrl.replaceAll(" ", "%20")); + Strip strip = new Strip(stripUri.toString(), imageComment); + imageCounter++; + comic.add(strip); + } catch (URISyntaxException use1) { + throw new IllegalStateException(String.format("Could not resolve image URL “%s” against base URL “%s”.", imageUrl, htmlState.uri()), use1); } - comicState.add(comic); } + comicState.add(comic); return comicState; } @@ -86,4 +104,16 @@ public abstract class ComicSiteFilter implements Filter { */ protected abstract List extractImageUrls(Document document); + /** + * Extracts the image comments from the given document. The elements of this + * last and of the list returned by {@link #extractImageUrls(org.jsoup.nodes.Document)} + * are paired up and added as {@link Strip}s. If the list returned by this + * method has less elements, an empty string is used for the remaining images. + * + * @param document + * The document to extract the image comments from + * @return The extracted image comments + */ + protected abstract List extractImageComments(Document document); + }