1 package net.pterodactylus.sone.text
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import net.pterodactylus.sone.utils.let
19 import org.bitpedia.util.Base32
20 import java.net.MalformedURLException
23 * [Parser] implementation that can recognize Freenet URIs.
25 class SoneTextParser(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
27 fun parse(source: String, context: SoneTextParserContext?) =
29 .dropWhile { it.trim() == "" }
30 .dropLastWhile { it.trim() == "" }
31 .mergeMultipleEmptyLines()
32 .flatMap { splitLineIntoParts(it, context) }
33 .removeEmptyPlainTextParts()
34 .mergeAdjacentPlainTextParts()
36 private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
37 generateSequence(PlainTextPart("") as Part to line) { remainder ->
38 if (remainder.second == "")
42 .mapNotNull { it.findNext(remainder.second) }
43 .minBy { it.position }
46 it == null -> PlainTextPart(remainder.second) to ""
47 it.position == 0 -> it.toPart(context) to it.remainder
48 else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
51 }.map { it.first }.toList()
53 private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
55 FreenetURI(link).let { freenetUri ->
58 if (freenetUri.isKSK) {
59 freenetUri.guessableKey
61 freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
63 link.split('?').first()
66 } catch (e: MalformedURLException) {
71 FreenetLinkPart(link, FreenetURI(link).docName, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true)
72 } catch (e: MalformedURLException) {
75 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
76 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
77 FREEMAIL -> link.indexOf('@').let { atSign ->
78 link.substring(atSign + 1, link.length - 9).let { freemailId ->
79 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
82 HTTP, HTTPS -> LinkPart(link, link
86 .withoutMiddlePathComponents
87 .withoutTrailingSlash)
92 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
93 if (previous.isEmpty()) {
96 if ((previous.last() == "\n") && (current == "")) {
99 previous + ("\n" + current)
104 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
105 if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
106 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
112 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
114 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
115 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
116 private val String.withoutUrlParameters get() = split('?').first()
118 private val String.withoutWwwPrefix
120 .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
123 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
124 if (index == 0) replacement(element) else element
127 private val String.withoutMiddlePathComponents
128 get() = split("/").let {
130 "${it.first()}/…/${it.last()}"
135 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
136 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
137 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
139 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
145 HTTP("http://", false),
146 HTTPS("https://", false),
147 SONE("sone://", false) {
148 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
150 POST("post://", false),
152 override fun findNext(line: String): NextLink? {
153 val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
154 if (line[nextFreemailSuffix - 53] != '@') return null
155 if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
156 val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
157 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
158 }.lastOrNull() ?: return null
159 return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
162 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
165 open fun findNext(line: String): NextLink? {
166 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
167 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
168 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
169 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
170 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
173 private fun String.findEndOfLink() =
174 substring(0, whitespace.find(this)?.range?.start ?: length)
175 .dropLastWhile(::isPunctuation)
176 .upToFirstUnmatchedParen()
178 private fun Int.validate() = validateLinkLength(this)
179 protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
181 private fun String.upToFirstUnmatchedParen() =
182 foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
184 '(' -> (openParens + 1) to firstUnmatchedParen
185 ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
186 else -> openParens to firstUnmatchedParen
192 private val punctuationChars = listOf('.', ',', '?', '!')
193 private fun isPunctuation(char: Char) = char in punctuationChars
195 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
197 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)