projects
/
Sone.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
🚸 Improve text extraction from freesites
[Sone.git]
/
src
/
main
/
kotlin
/
net
/
pterodactylus
/
sone
/
core
/
DefaultElementLoader.kt
diff --git
a/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt
b/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt
index
2849029
..
88cb1f5
100644
(file)
--- a/
src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt
+++ b/
src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt
@@
-6,7
+6,6
@@
import com.google.common.cache.CacheBuilder
import freenet.keys.FreenetURI
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import freenet.keys.FreenetURI
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
-import org.jsoup.nodes.TextNode
import java.io.ByteArrayInputStream
import java.net.URLDecoder
import java.nio.charset.Charset
import java.io.ByteArrayInputStream
import java.net.URLDecoder
import java.nio.charset.Charset
@@
-101,7
+100,6
@@
private val Document.metaDescription: String?
private val Document.firstNonHeadingParagraph: String?
get() = body().children()
private val Document.firstNonHeadingParagraph: String?
get() = body().children()
- .filter { it.children().all { it is TextNode } }
.map { it to it.text() }
.filterNot { it.second == "" }
.firstOrNull { !it.first.tagName().startsWith("h", ignoreCase = true) }
.map { it to it.text() }
.filterNot { it.second == "" }
.firstOrNull { !it.first.tagName().startsWith("h", ignoreCase = true) }