src/main/java/net/pterodactylus/rhynodge/filters/ComicSiteFilter.java

   1 /*
   2  * rhynodge - ComicFilter.java - Copyright © 2013 David Roden
   3  *
   4  * This program is free software: you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation, either version 3 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 package net.pterodactylus.rhynodge.filters;
  19
  20 import static com.google.common.base.Preconditions.checkArgument;
  21
  22 import java.net.URI;
  23 import java.net.URISyntaxException;
  24 import java.util.List;
  25
  26 import net.pterodactylus.rhynodge.Filter;
  27 import net.pterodactylus.rhynodge.State;
  28 import net.pterodactylus.rhynodge.states.ComicState;
  29 import net.pterodactylus.rhynodge.states.ComicState.Comic;
  30 import net.pterodactylus.rhynodge.states.ComicState.Strip;
  31 import net.pterodactylus.rhynodge.states.FailedState;
  32 import net.pterodactylus.rhynodge.states.HtmlState;
  33
  34 import com.google.common.base.Optional;
  35 import org.jetbrains.annotations.NotNull;
  36 import org.jsoup.nodes.Document;
  37
  38 /**
  39  * {@link Filter} implementation that can extract {@link ComicState}s from
  40  * {@link HtmlState}s.
  41  *
  42  * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
  43  */
  44 public abstract class ComicSiteFilter implements Filter {
  45
  46         @NotNull
  47         @Override
  48         public State filter(@NotNull State state) {
  49                 checkArgument(state instanceof HtmlState, "state must be an HTML state");
  50
  51                 /* initialize states: */
  52                 HtmlState htmlState = (HtmlState) state;
  53
  54                 /* extract comics. */
  55                 Optional<String> title = extractTitle(htmlState.document());
  56                 List<String> imageUrls = extractImageUrls(htmlState.document());
  57                 List<String> imageComments = extractImageComments(htmlState.document());
  58
  59                 /* store comic, if found, into state. */
  60                 if (!title.isPresent() || imageUrls.isEmpty()) {
  61                         return new FailedState();
  62                 }
  63
  64                 ComicState comicState = new ComicState();
  65                 Comic comic = new Comic(title.get());
  66                 int imageCounter = 0;
  67                 for (String imageUrl : imageUrls) {
  68                         String imageComment = (imageCounter < imageComments.size()) ? imageComments.get(imageCounter) : "";
  69                         try {
  70                                 URI stripUri = new URI(htmlState.uri()).resolve(imageUrl.replaceAll(" ", "%20"));
  71                                 Strip strip = new Strip(stripUri.toString(), imageComment);
  72                                 imageCounter++;
  73                                 comic.add(strip);
  74                         } catch (URISyntaxException use1) {
  75                                 throw new IllegalStateException(String.format("Could not resolve image URL “%s” against base URL “%s”.", imageUrl, htmlState.uri()), use1);
  76                         }
  77                 }
  78                 comicState.add(comic);
  79
  80                 return comicState;
  81         }
  82
  83         //
  84         // PROTECTED METHODS
  85         //
  86
  87         /**
  88          * Extracts the title of the comic from the given document.
  89          *
  90          * @param document
  91          *              The document to extract the title from
  92          * @return The extracted title, or {@link Optional#absent()}} if no title could
  93          *         be found
  94          */
  95         protected abstract Optional<String> extractTitle(Document document);
  96
  97         /**
  98          * Extracts the image URLs from the given document.
  99          *
 100          * @param document
 101          *              The document to extract the image URLs from
 102          * @return The extracted image URLs, or an empty list if no URLs could be
 103          *         found
 104          */
 105         protected abstract List<String> extractImageUrls(Document document);
 106
 107         /**
 108          * Extracts the image comments from the given document. The elements of this
 109          * last and of the list returned by {@link #extractImageUrls(org.jsoup.nodes.Document)}
 110          * are paired up and added as {@link Strip}s. If the list returned by this
 111          * method has less elements, an empty string is used for the remaining images.
 112          *
 113          * @param document
 114          *              The document to extract the image comments from
 115          * @return The extracted image comments
 116          */
 117         protected abstract List<String> extractImageComments(Document document);
 118
 119 }