checkState(state instanceof HttpState, "state is not a HttpState but a %s", state.getClass().getName());
logger.trace(String.format("Got HTML: %s, %s", ((HttpState) state).contentType(), ((HttpState) state).content()));
Document document = Jsoup.parse(((HttpState) state).content(), ((HttpState) state).uri());
- return new HtmlState(((HttpState) state).uri(), document);
+ var htmlState = new HtmlState(((HttpState) state).uri(), document);
+ state.getAdditionalStates().forEach(additionalState -> {
+ if (additionalState instanceof HttpState) {
+ var additionalDocument = Jsoup.parse(((HttpState) additionalState).content(), ((HttpState) additionalState).uri());
+ htmlState.addState(new HtmlState(((HttpState) additionalState).uri(), additionalDocument));
+ }
+ });
+ return htmlState;
}
}
--- /dev/null
+package net.pterodactylus.rhynodge.filters
+
+import net.pterodactylus.rhynodge.states.HtmlState
+import net.pterodactylus.rhynodge.states.HttpState
+import org.hamcrest.MatcherAssert.assertThat
+import org.hamcrest.Matchers.equalTo
+import org.junit.jupiter.api.Test
+
+class HtmlFilterTest {
+
+ @Test
+ fun `html filter can extract html from http state`() {
+ val httpState = HttpState("uri", 123, "content/test", "<html><body class='test'>test</body></html>".toByteArray())
+ val htmlFilter = HtmlFilter()
+ val htmlState = htmlFilter.filter(httpState) as HtmlState
+ assertThat(htmlState.document().select("body.test").text(), equalTo("test"))
+ }
+
+ @Test
+ fun `html filter can extract html from multi-http state`() {
+ val httpState = HttpState("uri", 123, "content/test", "<html><body class='foo'>bar</body></html>".toByteArray())
+ httpState.addState(HttpState("uri", 123, "content/test", "<html><body class='baz'>quo</body></html>".toByteArray()))
+ val htmlFilter = HtmlFilter()
+ val htmlState = htmlFilter.filter(httpState) as HtmlState
+ assertThat(htmlState.document().select("body.foo").text(), equalTo("bar"))
+ assertThat((htmlState.additionalStates.first() as HtmlState).document().select("body.baz").text(), equalTo("quo"))
+ }
+
+}