1 package net.pterodactylus.sone.text
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import org.bitpedia.util.Base32
19 import java.net.MalformedURLException
23 * [Parser] implementation that can recognize Freenet URIs.
25 class SoneTextParser @Inject constructor(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
27 fun parse(source: String, context: SoneTextParserContext?) =
29 .dropWhile { it.trim() == "" }
30 .dropLastWhile { it.trim() == "" }
31 .mergeMultipleEmptyLines()
32 .flatMap { splitLineIntoParts(it, context) }
33 .removeEmptyPlainTextParts()
34 .mergeAdjacentPlainTextParts()
36 private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
37 generateSequence(PlainTextPart("") as Part to line) { remainder ->
38 if (remainder.second == "")
42 .mapNotNull { it.findNext(remainder.second) }
43 .minBy { it.position }
46 it == null -> PlainTextPart(remainder.second) to ""
47 it.position == 0 -> it.toPart(context) to it.remainder
48 else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
51 }.map { it.first }.toList()
53 private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
55 FreenetURI(link).let { freenetUri ->
58 if (freenetUri.isKSK) {
59 freenetUri.guessableKey
61 freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
63 link.split('?').first()
66 } catch (e: MalformedURLException) {
71 FreenetURI(link).let { uri ->
73 ?.takeIf { (it.size > 1) || ((it.size == 1) && (it.single() != ""))}
76 ?: "${uri.keyType}@${uri.routingKey.freenetBase64}"
77 }.let { FreenetLinkPart(link.removeSuffix("/"), it, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true) }
78 } catch (e: MalformedURLException) {
81 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
82 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
83 FREEMAIL -> link.indexOf('@').let { atSign ->
84 link.substring(atSign + 1, link.length - 9).let { freemailId ->
85 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
88 HTTP, HTTPS -> LinkPart(link, link
92 .withoutMiddlePathComponents
93 .withoutTrailingSlash)
98 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
99 if (previous.isEmpty()) {
102 if ((previous.last() == "\n") && (current == "")) {
105 previous + ("\n" + current)
110 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
111 if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
112 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
118 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
120 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
121 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
122 private val String.withoutUrlParameters get() = split('?').first()
124 private val String.withoutWwwPrefix
126 .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
129 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
130 if (index == 0) replacement(element) else element
133 private val String.withoutMiddlePathComponents
134 get() = split("/").let {
136 "${it.first()}/…/${it.last()}"
141 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
142 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
143 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
145 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
151 HTTP("http://", false),
152 HTTPS("https://", false),
153 SONE("sone://", false) {
154 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
156 POST("post://", false),
158 override fun findNext(line: String): NextLink? {
159 val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
160 if (line[nextFreemailSuffix - 53] != '@') return null
161 if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
162 val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
163 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
164 }.lastOrNull() ?: return null
165 return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
168 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
171 open fun findNext(line: String): NextLink? {
172 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
173 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
174 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
175 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
176 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
179 private fun String.findEndOfLink() =
180 substring(0, whitespace.find(this)?.range?.start ?: length)
181 .dropLastWhile(::isPunctuation)
182 .upToFirstUnmatchedParen()
184 private fun Int.validate() = validateLinkLength(this)
185 protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
187 private fun String.upToFirstUnmatchedParen() =
188 foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
190 '(' -> (openParens + 1) to firstUnmatchedParen
191 ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
192 else -> openParens to firstUnmatchedParen
198 private val punctuationChars = listOf('.', ',', '?', '!')
199 private fun isPunctuation(char: Char) = char in punctuationChars
201 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
203 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
205 private val ByteArray.freenetBase64 get() = Base64.encode(this)!!