🎨 πŸ”Š log string leading to exception
[Sone.git] / src / main / kotlin / net / pterodactylus / sone / core / DefaultElementLoader.kt
index ebdae49..e497fdc 100644 (file)
@@ -6,12 +6,12 @@ import com.google.common.cache.CacheBuilder
 import freenet.keys.FreenetURI
 import org.jsoup.Jsoup
 import org.jsoup.nodes.Document
-import org.jsoup.nodes.TextNode
 import java.io.ByteArrayInputStream
 import java.net.URLDecoder
 import java.nio.charset.Charset
 import java.text.Normalizer
 import java.util.concurrent.TimeUnit.MINUTES
+import java.util.logging.Logger
 import javax.activation.MimeType
 import javax.imageio.ImageIO
 import javax.inject.Inject
@@ -23,23 +23,35 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke
 
        @Inject constructor(freenetInterface: FreenetInterface): this(freenetInterface, Ticker.systemTicker())
 
-       private val loadingLinks: Cache<String, Boolean> = CacheBuilder.newBuilder().build<String, Boolean>()
-       private val failureCache: Cache<String, Boolean> = CacheBuilder.newBuilder().ticker(ticker).expireAfterWrite(30, MINUTES).build<String, Boolean>()
-       private val elementCache: Cache<String, LinkedElement> = CacheBuilder.newBuilder().build<String, LinkedElement>()
+       private val loadingLinks: Cache<String, Boolean> = CacheBuilder.newBuilder().build()
+       private val failureCache: Cache<String, Boolean> = CacheBuilder.newBuilder().ticker(ticker).expireAfterWrite(30, MINUTES).build()
+       private val elementCache: Cache<String, LinkedElement> = CacheBuilder.newBuilder().build()
+       private val logger = Logger.getLogger(DefaultElementLoader::class.qualifiedName)
        private val callback = object: FreenetInterface.BackgroundFetchCallback {
                override fun shouldCancel(uri: FreenetURI, mimeType: String, size: Long): Boolean {
-                       return (size > 2097152) || (!mimeType.startsWith("image/") && !mimeType.startsWith("text/html"))
+                       if (size > 2097152) {
+                               logger.fine { "Canceling download of $uri because it’s > 2 MiB." }
+                               return true
+                       }
+                       if (!mimeType.startsWith("image/") && !mimeType.startsWith("text/html")) {
+                               logger.fine { "Canceling download of $uri because of its MIME type, $mimeType." }
+                               return true
+                       }
+                       return false
                }
 
-               override fun loaded(uri: FreenetURI, mimeType: String, data: ByteArray) {
-                       MimeType(mimeType).also { mimeType ->
+               override fun loaded(uri: FreenetURI, mimeTypeText: String, data: ByteArray) {
+                       MimeType(mimeTypeText).also { mimeType ->
                                when {
                                        mimeType.primaryType == "image" -> {
                                                ByteArrayInputStream(data).use {
                                                        ImageIO.read(it)
                                                }?.let {
                                                        elementCache.get(uri.toString().decode().normalize()) {
-                                                               LinkedElement(uri.toString(), properties = mapOf("size" to data.size, "sizeHuman" to data.size.human))
+                                                               LinkedElement(uri.toString(), properties = mapOf("type" to "image", "size" to data.size, "sizeHuman" to data.size.human))
+                                                                       .apply {
+                                                                               logger.fine("Downloaded image from $link: size=${properties["size"]}.")
+                                                                       }
                                                        }
                                                }
                                        }
@@ -47,10 +59,12 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke
                                                val document = Jsoup.parse(data.toString(Charset.forName(mimeType.getParameter("charset") ?: "UTF-8")))
                                                elementCache.get(uri.toString().decode().normalize()) {
                                                        LinkedElement(uri.toString(), properties = mapOf(
-                                                                       "size" to data.size, "sizeHuman" to data.size.human,
+                                                                       "type" to "html", "size" to data.size, "sizeHuman" to data.size.human,
                                                                        "title" to document.title().emptyToNull,
                                                                        "description" to (document.metaDescription ?: document.firstNonHeadingParagraph)
-                                                       ))
+                                                       )).apply {
+                                                               logger.fine { "Extracted information from $link: title=${properties["title"]}, description=${properties["description"]}." }
+                                                       }
                                                }
                                        }
                                }
@@ -58,32 +72,10 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke
                        }
                }
 
-               private val String?.emptyToNull get() = if (this == "") null else this
-
-               private val Document.metaDescription: String?
-                       get() = head().getElementsByTag("meta")
-                                       .map { it.attr("name") to it.attr("content") }
-                                       .firstOrNull { it.first == "description" }
-                                       ?.second
-
-               private val Document.firstNonHeadingParagraph: String?
-                       get() = body().children()
-                                       .filter { it.children().all { it is TextNode } }
-                                       .map { it to it.text() }
-                                       .filterNot { it.second == "" }
-                                       .firstOrNull { !it.first.tagName().startsWith("h", ignoreCase = true) }
-                                       ?.second
-
-               private val Int.human get() = when (this) {
-                       in 0..1023 -> "$this B"
-                       in 1024..1048575 -> "${this / 1024} KiB"
-                       in 1048576..1073741823 -> "${this / 1048576} MiB"
-                       else -> "${this / 1073741824} GiB"
-               }
-
                override fun failed(uri: FreenetURI) {
                        failureCache.put(uri.toString().decode().normalize(), true)
                        removeLoadingLink(uri)
+                       logger.fine { "Download failed for $uri." }
                }
 
                private fun removeLoadingLink(uri: FreenetURI) {
@@ -110,7 +102,28 @@ class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticke
                return LinkedElement(link, loading = true)
        }
 
-       private fun String.decode() = URLDecoder.decode(this, "UTF-8")!!
-       private fun String.normalize() = Normalizer.normalize(this, Normalizer.Form.NFC)!!
+}
+
+private fun String.decode() = try { URLDecoder.decode(this, "UTF-8")!! } catch (e: RuntimeException) { freenet.support.Logger.error(DefaultElementLoader::class.java, "Could not decode %s!".format(this), e); throw e }
+private fun String.normalize() = Normalizer.normalize(this, Normalizer.Form.NFC)!!
+private val String?.emptyToNull get() = if (this == "") null else this
+
+private val Document.metaDescription: String?
+       get() = head().getElementsByTag("meta")
+               .map { it.attr("name") to it.attr("content") }
+               .firstOrNull { it.first == "description" }
+               ?.second
+
+private val Document.firstNonHeadingParagraph: String?
+       get() = body().select("div, p")
+               .filter { it.textNodes().isNotEmpty() }
+               .map { it to it.text() }
+               .firstOrNull { it.second != "" }
+               ?.second
 
+private val Int.human get() = when (this) {
+       in 0..1023 -> "$this B"
+       in 1024..1048575 -> "${this / 1024} KiB"
+       in 1048576..1073741823 -> "${this / 1048576} MiB"
+       else -> "${this / 1073741824} GiB"
 }