X-Git-Url: https://git.pterodactylus.net/?p=Sone.git;a=blobdiff_plain;f=src%2Fmain%2Fkotlin%2Fnet%2Fpterodactylus%2Fsone%2Fcore%2FDefaultElementLoader.kt;h=409d18c6f2004c1d698ea60913fa07ef4fc79001;hp=98a2caa7f2e78a31b34f5a757600aa3309eeb377;hb=HEAD;hpb=b91ce252d94a8876097b939e129dc33264cef2f5 diff --git a/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt b/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt index 98a2caa..409d18c 100644 --- a/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt +++ b/src/main/kotlin/net/pterodactylus/sone/core/DefaultElementLoader.kt @@ -1,64 +1,116 @@ package net.pterodactylus.sone.core import com.google.common.base.Ticker +import com.google.common.cache.Cache import com.google.common.cache.CacheBuilder import freenet.keys.FreenetURI +import org.jsoup.Jsoup +import org.jsoup.nodes.Document +import org.jsoup.nodes.TextNode import java.io.ByteArrayInputStream +import java.net.URLDecoder +import java.nio.charset.Charset +import java.text.Normalizer import java.util.concurrent.TimeUnit.MINUTES +import javax.activation.MimeType import javax.imageio.ImageIO import javax.inject.Inject /** * [ElementLoader] implementation that uses a simple Guava [com.google.common.cache.Cache]. */ -class DefaultElementLoader @Inject constructor(private val freenetInterface: FreenetInterface, ticker: Ticker = Ticker.systemTicker()) : ElementLoader { +class DefaultElementLoader(private val freenetInterface: FreenetInterface, ticker: Ticker): ElementLoader { - private val loadingLinks = CacheBuilder.newBuilder().build() - private val failureCache = CacheBuilder.newBuilder().ticker(ticker).expireAfterWrite(30, MINUTES).build() - private val imageCache = CacheBuilder.newBuilder().build() - private val callback = object : FreenetInterface.BackgroundFetchCallback { - override fun cancelForMimeType(uri: FreenetURI, mimeType: String): Boolean { - return !mimeType.startsWith("image/") + @Inject constructor(freenetInterface: FreenetInterface): this(freenetInterface, Ticker.systemTicker()) + + private val loadingLinks: Cache = CacheBuilder.newBuilder().build() + private val failureCache: Cache = CacheBuilder.newBuilder().ticker(ticker).expireAfterWrite(30, MINUTES).build() + private val elementCache: Cache = CacheBuilder.newBuilder().build() + private val callback = object: FreenetInterface.BackgroundFetchCallback { + override fun shouldCancel(uri: FreenetURI, mimeType: String, size: Long): Boolean { + return (size > 2097152) || (!mimeType.startsWith("image/") && !mimeType.startsWith("text/html")) } - override fun loaded(uri: FreenetURI, mimeType: String, data: ByteArray) { - if (!mimeType.startsWith("image/")) { - return - } - ByteArrayInputStream(data).use { - ImageIO.read(it) - }?.let { - imageCache.get(uri.toString()) { LinkedElement(uri.toString()) } + override fun loaded(uri: FreenetURI, mimeTypeText: String, data: ByteArray) { + MimeType(mimeTypeText).also { mimeType -> + when { + mimeType.primaryType == "image" -> { + ByteArrayInputStream(data).use { + ImageIO.read(it) + }?.let { + elementCache.get(uri.toString().decode().normalize()) { + LinkedElement(uri.toString(), properties = mapOf("type" to "image", "size" to data.size, "sizeHuman" to data.size.human)) + } + } + } + mimeType.baseType == "text/html" -> { + val document = Jsoup.parse(data.toString(Charset.forName(mimeType.getParameter("charset") ?: "UTF-8"))) + elementCache.get(uri.toString().decode().normalize()) { + LinkedElement(uri.toString(), properties = mapOf( + "type" to "html", "size" to data.size, "sizeHuman" to data.size.human, + "title" to document.title().emptyToNull, + "description" to (document.metaDescription ?: document.firstNonHeadingParagraph) + )) + } + } + } + removeLoadingLink(uri) } - removeLoadingLink(uri) + } + + private val String?.emptyToNull get() = if (this == "") null else this + + private val Document.metaDescription: String? + get() = head().getElementsByTag("meta") + .map { it.attr("name") to it.attr("content") } + .firstOrNull { it.first == "description" } + ?.second + + private val Document.firstNonHeadingParagraph: String? + get() = body().children() + .filter { it.children().all { it is TextNode } } + .map { it to it.text() } + .filterNot { it.second == "" } + .firstOrNull { !it.first.tagName().startsWith("h", ignoreCase = true) } + ?.second + + private val Int.human get() = when (this) { + in 0..1023 -> "$this B" + in 1024..1048575 -> "${this / 1024} KiB" + in 1048576..1073741823 -> "${this / 1048576} MiB" + else -> "${this / 1073741824} GiB" } override fun failed(uri: FreenetURI) { - failureCache.put(uri.toString(), true) + failureCache.put(uri.toString().decode().normalize(), true) removeLoadingLink(uri) } private fun removeLoadingLink(uri: FreenetURI) { synchronized(loadingLinks) { - loadingLinks.invalidate(uri.toString()) + loadingLinks.invalidate(uri.toString().decode().normalize()) } } } override fun loadElement(link: String): LinkedElement { + val normalizedLink = link.decode().normalize() synchronized(loadingLinks) { - imageCache.getIfPresent(link)?.run { + elementCache.getIfPresent(normalizedLink)?.run { return this } - failureCache.getIfPresent(link)?.run { + failureCache.getIfPresent(normalizedLink)?.run { return LinkedElement(link, failed = true) } - if (loadingLinks.getIfPresent(link) == null) { - loadingLinks.put(link, true) + if (loadingLinks.getIfPresent(normalizedLink) == null) { + loadingLinks.put(normalizedLink, true) freenetInterface.startFetch(FreenetURI(link), callback) } } return LinkedElement(link, loading = true) } + private fun String.decode() = URLDecoder.decode(this, "UTF-8")!! + private fun String.normalize() = Normalizer.normalize(this, Normalizer.Form.NFC)!! + }