From: David ‘Bombe’ Roden Date: Wed, 2 Jan 2013 17:56:21 +0000 (+0100) Subject: Add HTML filter and state. X-Git-Tag: 0.1~102 X-Git-Url: https://git.pterodactylus.net/?a=commitdiff_plain;ds=sidebyside;h=598d9786ca16aaad667702d309756d9fdbd7f117;p=rhynodge.git Add HTML filter and state. --- diff --git a/pom.xml b/pom.xml index 3d6265c..d16a214 100644 --- a/pom.xml +++ b/pom.xml @@ -35,5 +35,10 @@ httpclient 4.2.2 + + org.jsoup + jsoup + 1.7.1 + diff --git a/src/main/java/net/pterodactylus/reactor/filters/HtmlFilter.java b/src/main/java/net/pterodactylus/reactor/filters/HtmlFilter.java new file mode 100644 index 0000000..83b961d --- /dev/null +++ b/src/main/java/net/pterodactylus/reactor/filters/HtmlFilter.java @@ -0,0 +1,50 @@ +/* + * Reactor - HtmlFilter.java - Copyright © 2013 David Roden + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.pterodactylus.reactor.filters; + +import static com.google.common.base.Preconditions.checkState; +import net.pterodactylus.reactor.Filter; +import net.pterodactylus.reactor.State; +import net.pterodactylus.reactor.states.FailedState; +import net.pterodactylus.reactor.states.HtmlState; +import net.pterodactylus.reactor.states.HttpState; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +/** + * {@link Filter} that converts a {@link HttpState} into an {@link HtmlState}. + * + * @author David ‘Bombe’ Roden + */ +public class HtmlFilter implements Filter { + + /** + * {@inheritDoc} + */ + @Override + public State filter(State state) { + if (!state.success()) { + return FailedState.from(state); + } + checkState(state instanceof HttpState, "state is not a HttpState but a %s", state.getClass().getName()); + Document document = Jsoup.parse(((HttpState) state).content(), ((HttpState) state).uri()); + return new HtmlState(((HttpState) state).uri(), document); + } + +} diff --git a/src/main/java/net/pterodactylus/reactor/states/HtmlState.java b/src/main/java/net/pterodactylus/reactor/states/HtmlState.java new file mode 100644 index 0000000..ce89a98 --- /dev/null +++ b/src/main/java/net/pterodactylus/reactor/states/HtmlState.java @@ -0,0 +1,84 @@ +/* + * Reactor - HtmlState.java - Copyright © 2013 David Roden + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.pterodactylus.reactor.states; + +import net.pterodactylus.reactor.State; + +import org.jsoup.nodes.Document; + +/** + * {@link State} implementation that contains a parsed HTML {@link Document}. + * + * @author David ‘Bombe’ Roden + */ +public class HtmlState extends AbstractState { + + /** The URI of the parsed document. */ + private final String uri; + + /** The parsed document. */ + private final Document document; + + /** + * Creates a new HTML state. + * + * @param uri + * The URI of the parsed document + * @param document + * The parsed documnet + */ + public HtmlState(String uri, Document document) { + this.uri = uri; + this.document = document; + } + + // + // ACCESSORS + // + + /** + * Returns the URI of the parsed document. + * + * @return The URI of the parsed document + */ + public String uri() { + return uri; + } + + /** + * Returns the parsed document. + * + * @return The parsed document + */ + public Document document() { + return document; + } + + // + // OBJECT METHODS + // + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return String.format("%s[document=(%s chars)]", getClass().getSimpleName(), document().toString().length()); + } + +}