X-Git-Url: https://git.pterodactylus.net/?a=blobdiff_plain;f=src%2Fmain%2Fkotlin%2Fnet%2Fpterodactylus%2Fsone%2Fcore%2FDefaultElementLoader.kt;h=e497fdcb18f1493bba9f14d56a46f1b7f5518ac9;hb=refs%2Fheads%2Fnext;hp=2849029394504e205cd8e9476fcf484c09908c27;hpb=a72774c302cdb2c35508380fb2ec445be2705efe;p=Sone.git diff --git a/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt b/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt index 2849029..e497fdc 100644 --- a/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt +++ b/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt @@ -6,12 +6,12 @@ import com.google.common.cache.CacheBuilder import freenet.keys.FreenetURI import org.jsoup.Jsoup import org.jsoup.nodes.Document -import org.jsoup.nodes.TextNode import java.io.ByteArrayInputStream import java.net.URLDecoder import java.nio.charset.Charset import java.text.Normalizer import java.util.concurrent.TimeUnit.MINUTES +import java.util.logging.Logger import javax.activation.MimeType import javax.imageio.ImageIO import javax.inject.Inject @@ -26,9 +26,18 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke private val loadingLinks: Cache = CacheBuilder.newBuilder().build() private val failureCache: Cache = CacheBuilder.newBuilder().ticker(ticker).expireAfterWrite(30, MINUTES).build() private val elementCache: Cache = CacheBuilder.newBuilder().build() + private val logger = Logger.getLogger(DefaultElementLoader::class.qualifiedName) private val callback = object: FreenetInterface.BackgroundFetchCallback { override fun shouldCancel(uri: FreenetURI, mimeType: String, size: Long): Boolean { - return (size > 2097152) || (!mimeType.startsWith("image/") && !mimeType.startsWith("text/html")) + if (size > 2097152) { + logger.fine { "Canceling download of $uri because it’s > 2 MiB." } + return true + } + if (!mimeType.startsWith("image/") && !mimeType.startsWith("text/html")) { + logger.fine { "Canceling download of $uri because of its MIME type, $mimeType." } + return true + } + return false } override fun loaded(uri: FreenetURI, mimeTypeText: String, data: ByteArray) { @@ -40,6 +49,9 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke }?.let { elementCache.get(uri.toString().decode().normalize()) { LinkedElement(uri.toString(), properties = mapOf("type" to "image", "size" to data.size, "sizeHuman" to data.size.human)) + .apply { + logger.fine("Downloaded image from $link: size=${properties["size"]}.") + } } } } @@ -50,7 +62,9 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke "type" to "html", "size" to data.size, "sizeHuman" to data.size.human, "title" to document.title().emptyToNull, "description" to (document.metaDescription ?: document.firstNonHeadingParagraph) - )) + )).apply { + logger.fine { "Extracted information from $link: title=${properties["title"]}, description=${properties["description"]}." } + } } } } @@ -61,6 +75,7 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke override fun failed(uri: FreenetURI) { failureCache.put(uri.toString().decode().normalize(), true) removeLoadingLink(uri) + logger.fine { "Download failed for $uri." } } private fun removeLoadingLink(uri: FreenetURI) { @@ -89,7 +104,7 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke } -private fun String.decode() = URLDecoder.decode(this, "UTF-8")!! +private fun String.decode() = try { URLDecoder.decode(this, "UTF-8")!! } catch (e: RuntimeException) { freenet.support.Logger.error(DefaultElementLoader::class.java, "Could not decode %s!".format(this), e); throw e } private fun String.normalize() = Normalizer.normalize(this, Normalizer.Form.NFC)!! private val String?.emptyToNull get() = if (this == "") null else this @@ -100,11 +115,10 @@ private val Document.metaDescription: String? ?.second private val Document.firstNonHeadingParagraph: String? - get() = body().children() - .filter { it.children().all { it is TextNode } } + get() = body().select("div, p") + .filter { it.textNodes().isNotEmpty() } .map { it to it.text() } - .filterNot { it.second == "" } - .firstOrNull { !it.first.tagName().startsWith("h", ignoreCase = true) } + .firstOrNull { it.second != "" } ?.second private val Int.human get() = when (this) {