1 package net.pterodactylus.sone.text
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import org.bitpedia.util.Base32
19 import java.net.MalformedURLException
23 * [Parser] implementation that can recognize Freenet URIs.
25 class SoneTextParser @Inject constructor(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
27 fun parse(source: String, context: SoneTextParserContext?) =
29 .dropWhile { it.trim() == "" }
30 .dropLastWhile { it.trim() == "" }
31 .mergeMultipleEmptyLines()
32 .flatMap { splitLineIntoParts(it, context) }
33 .removeEmptyPlainTextParts()
34 .mergeAdjacentPlainTextParts()
36 private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
37 generateSequence(PlainTextPart("") as Part to line) { remainder ->
38 if (remainder.second == "")
42 .mapNotNull { it.findNext(remainder.second) }
43 .minBy { it.position }
46 it == null -> PlainTextPart(remainder.second) to ""
47 it.position == 0 -> it.toPart(context) to it.remainder
48 else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
51 }.map { it.first }.toList()
53 private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
55 FreenetURI(link).let { freenetUri ->
58 if (freenetUri.isKSK) {
59 freenetUri.guessableKey
61 freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
63 link.split('?').first()
66 } catch (e: MalformedURLException) {
71 FreenetURI(link).let { uri ->
72 uri.docName ?: "${uri.keyType}@${uri.routingKey.freenetBase64}"
73 }.let { FreenetLinkPart(link, it, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true) }
74 } catch (e: MalformedURLException) {
77 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
78 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
79 FREEMAIL -> link.indexOf('@').let { atSign ->
80 link.substring(atSign + 1, link.length - 9).let { freemailId ->
81 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
84 HTTP, HTTPS -> LinkPart(link, link
88 .withoutMiddlePathComponents
89 .withoutTrailingSlash)
94 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
95 if (previous.isEmpty()) {
98 if ((previous.last() == "\n") && (current == "")) {
101 previous + ("\n" + current)
106 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
107 if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
108 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
114 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
116 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
117 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
118 private val String.withoutUrlParameters get() = split('?').first()
120 private val String.withoutWwwPrefix
122 .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
125 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
126 if (index == 0) replacement(element) else element
129 private val String.withoutMiddlePathComponents
130 get() = split("/").let {
132 "${it.first()}/…/${it.last()}"
137 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
138 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
139 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
141 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
147 HTTP("http://", false),
148 HTTPS("https://", false),
149 SONE("sone://", false) {
150 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
152 POST("post://", false),
154 override fun findNext(line: String): NextLink? {
155 val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
156 if (line[nextFreemailSuffix - 53] != '@') return null
157 if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
158 val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
159 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
160 }.lastOrNull() ?: return null
161 return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
164 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
167 open fun findNext(line: String): NextLink? {
168 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
169 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
170 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
171 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
172 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
175 private fun String.findEndOfLink() =
176 substring(0, whitespace.find(this)?.range?.start ?: length)
177 .dropLastWhile(::isPunctuation)
178 .upToFirstUnmatchedParen()
180 private fun Int.validate() = validateLinkLength(this)
181 protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
183 private fun String.upToFirstUnmatchedParen() =
184 foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
186 '(' -> (openParens + 1) to firstUnmatchedParen
187 ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
188 else -> openParens to firstUnmatchedParen
194 private val punctuationChars = listOf('.', ',', '?', '!')
195 private fun isPunctuation(char: Char) = char in punctuationChars
197 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
199 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
201 private val ByteArray.freenetBase64 get() = Base64.encode(this)!!