X-Git-Url: https://git.pterodactylus.net/?a=blobdiff_plain;f=src%2Fmain%2Fkotlin%2Fnet%2Fpterodactylus%2Fsone%2Fcore%2FDefaultElementLoader.kt;h=e497fdcb18f1493bba9f14d56a46f1b7f5518ac9;hb=refs%2Fheads%2Fnext;hp=ebdae49b4b9c22d57b455c96f0bc15ff4c36157e;hpb=27a0934b7e68315756020e0de0bc2564355be90a;p=Sone.git diff --git a/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt b/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt index ebdae49..e497fdc 100644 --- a/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt +++ b/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt @@ -6,12 +6,12 @@ import com.google.common.cache.CacheBuilder import freenet.keys.FreenetURI import org.jsoup.Jsoup import org.jsoup.nodes.Document -import org.jsoup.nodes.TextNode import java.io.ByteArrayInputStream import java.net.URLDecoder import java.nio.charset.Charset import java.text.Normalizer import java.util.concurrent.TimeUnit.MINUTES +import java.util.logging.Logger import javax.activation.MimeType import javax.imageio.ImageIO import javax.inject.Inject @@ -23,23 +23,35 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke @Inject constructor(freenetInterface: FreenetInterface): this(freenetInterface, Ticker.systemTicker()) - private val loadingLinks: Cache = CacheBuilder.newBuilder().build() - private val failureCache: Cache = CacheBuilder.newBuilder().ticker(ticker).expireAfterWrite(30, MINUTES).build() - private val elementCache: Cache = CacheBuilder.newBuilder().build() + private val loadingLinks: Cache = CacheBuilder.newBuilder().build() + private val failureCache: Cache = CacheBuilder.newBuilder().ticker(ticker).expireAfterWrite(30, MINUTES).build() + private val elementCache: Cache = CacheBuilder.newBuilder().build() + private val logger = Logger.getLogger(DefaultElementLoader::class.qualifiedName) private val callback = object: FreenetInterface.BackgroundFetchCallback { override fun shouldCancel(uri: FreenetURI, mimeType: String, size: Long): Boolean { - return (size > 2097152) || (!mimeType.startsWith("image/") && !mimeType.startsWith("text/html")) + if (size > 2097152) { + logger.fine { "Canceling download of $uri because it’s > 2 MiB." } + return true + } + if (!mimeType.startsWith("image/") && !mimeType.startsWith("text/html")) { + logger.fine { "Canceling download of $uri because of its MIME type, $mimeType." } + return true + } + return false } - override fun loaded(uri: FreenetURI, mimeType: String, data: ByteArray) { - MimeType(mimeType).also { mimeType -> + override fun loaded(uri: FreenetURI, mimeTypeText: String, data: ByteArray) { + MimeType(mimeTypeText).also { mimeType -> when { mimeType.primaryType == "image" -> { ByteArrayInputStream(data).use { ImageIO.read(it) }?.let { elementCache.get(uri.toString().decode().normalize()) { - LinkedElement(uri.toString(), properties = mapOf("size" to data.size, "sizeHuman" to data.size.human)) + LinkedElement(uri.toString(), properties = mapOf("type" to "image", "size" to data.size, "sizeHuman" to data.size.human)) + .apply { + logger.fine("Downloaded image from $link: size=${properties["size"]}.") + } } } } @@ -47,10 +59,12 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke val document = Jsoup.parse(data.toString(Charset.forName(mimeType.getParameter("charset") ?: "UTF-8"))) elementCache.get(uri.toString().decode().normalize()) { LinkedElement(uri.toString(), properties = mapOf( - "size" to data.size, "sizeHuman" to data.size.human, + "type" to "html", "size" to data.size, "sizeHuman" to data.size.human, "title" to document.title().emptyToNull, "description" to (document.metaDescription ?: document.firstNonHeadingParagraph) - )) + )).apply { + logger.fine { "Extracted information from $link: title=${properties["title"]}, description=${properties["description"]}." } + } } } } @@ -58,32 +72,10 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke } } - private val String?.emptyToNull get() = if (this == "") null else this - - private val Document.metaDescription: String? - get() = head().getElementsByTag("meta") - .map { it.attr("name") to it.attr("content") } - .firstOrNull { it.first == "description" } - ?.second - - private val Document.firstNonHeadingParagraph: String? - get() = body().children() - .filter { it.children().all { it is TextNode } } - .map { it to it.text() } - .filterNot { it.second == "" } - .firstOrNull { !it.first.tagName().startsWith("h", ignoreCase = true) } - ?.second - - private val Int.human get() = when (this) { - in 0..1023 -> "$this B" - in 1024..1048575 -> "${this / 1024} KiB" - in 1048576..1073741823 -> "${this / 1048576} MiB" - else -> "${this / 1073741824} GiB" - } - override fun failed(uri: FreenetURI) { failureCache.put(uri.toString().decode().normalize(), true) removeLoadingLink(uri) + logger.fine { "Download failed for $uri." } } private fun removeLoadingLink(uri: FreenetURI) { @@ -110,7 +102,28 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke return LinkedElement(link, loading = true) } - private fun String.decode() = URLDecoder.decode(this, "UTF-8")!! - private fun String.normalize() = Normalizer.normalize(this, Normalizer.Form.NFC)!! +} + +private fun String.decode() = try { URLDecoder.decode(this, "UTF-8")!! } catch (e: RuntimeException) { freenet.support.Logger.error(DefaultElementLoader::class.java, "Could not decode %s!".format(this), e); throw e } +private fun String.normalize() = Normalizer.normalize(this, Normalizer.Form.NFC)!! +private val String?.emptyToNull get() = if (this == "") null else this + +private val Document.metaDescription: String? + get() = head().getElementsByTag("meta") + .map { it.attr("name") to it.attr("content") } + .firstOrNull { it.first == "description" } + ?.second + +private val Document.firstNonHeadingParagraph: String? + get() = body().select("div, p") + .filter { it.textNodes().isNotEmpty() } + .map { it to it.text() } + .firstOrNull { it.second != "" } + ?.second +private val Int.human get() = when (this) { + in 0..1023 -> "$this B" + in 1024..1048575 -> "${this / 1024} KiB" + in 1048576..1073741823 -> "${this / 1048576} MiB" + else -> "${this / 1073741824} GiB" }