🚧 HTML filter can now extract HTML from additional states, too
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Fri, 24 Apr 2026 10:11:32 +0000 (12:11 +0200)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Fri, 24 Apr 2026 10:11:32 +0000 (12:11 +0200)
src/main/java/net/pterodactylus/rhynodge/filters/HtmlFilter.java
src/test/kotlin/net/pterodactylus/rhynodge/filters/HtmlFilterTest.kt [new file with mode: 0644]

index 9c83745..d314abd 100644 (file)
@@ -50,7 +50,14 @@ public class HtmlFilter implements Filter {
                checkState(state instanceof HttpState, "state is not a HttpState but a %s", state.getClass().getName());
                logger.trace(String.format("Got HTML: %s, %s", ((HttpState) state).contentType(), ((HttpState) state).content()));
                Document document = Jsoup.parse(((HttpState) state).content(), ((HttpState) state).uri());
-               return new HtmlState(((HttpState) state).uri(), document);
+               var htmlState = new HtmlState(((HttpState) state).uri(), document);
+               state.getAdditionalStates().forEach(additionalState -> {
+                       if (additionalState instanceof HttpState) {
+                               var additionalDocument = Jsoup.parse(((HttpState) additionalState).content(), ((HttpState) additionalState).uri());
+                               htmlState.addState(new HtmlState(((HttpState) additionalState).uri(), additionalDocument));
+                       }
+               });
+               return htmlState;
        }
 
 }
diff --git a/src/test/kotlin/net/pterodactylus/rhynodge/filters/HtmlFilterTest.kt b/src/test/kotlin/net/pterodactylus/rhynodge/filters/HtmlFilterTest.kt
new file mode 100644 (file)
index 0000000..e5f482e
--- /dev/null
@@ -0,0 +1,29 @@
+package net.pterodactylus.rhynodge.filters
+
+import net.pterodactylus.rhynodge.states.HtmlState
+import net.pterodactylus.rhynodge.states.HttpState
+import org.hamcrest.MatcherAssert.assertThat
+import org.hamcrest.Matchers.equalTo
+import org.junit.jupiter.api.Test
+
+class HtmlFilterTest {
+
+       @Test
+       fun `html filter can extract html from http state`() {
+               val httpState = HttpState("uri", 123, "content/test", "<html><body class='test'>test</body></html>".toByteArray())
+               val htmlFilter = HtmlFilter()
+               val htmlState = htmlFilter.filter(httpState) as HtmlState
+               assertThat(htmlState.document().select("body.test").text(), equalTo("test"))
+       }
+
+       @Test
+       fun `html filter can extract html from multi-http state`() {
+               val httpState = HttpState("uri", 123, "content/test", "<html><body class='foo'>bar</body></html>".toByteArray())
+               httpState.addState(HttpState("uri", 123, "content/test", "<html><body class='baz'>quo</body></html>".toByteArray()))
+               val htmlFilter = HtmlFilter()
+               val htmlState = htmlFilter.filter(httpState) as HtmlState
+               assertThat(htmlState.document().select("body.foo").text(), equalTo("bar"))
+               assertThat((htmlState.additionalStates.first() as HtmlState).document().select("body.baz").text(), equalTo("quo"))
+       }
+
+}