1 package net.pterodactylus.sone.text
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import org.bitpedia.util.Base32
19 import java.net.MalformedURLException
22 * [Parser] implementation that can recognize Freenet URIs.
24 class SoneTextParser(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
26 fun parse(source: String, context: SoneTextParserContext?) =
28 .dropWhile { it.trim() == "" }
29 .dropLastWhile { it.trim() == "" }
30 .mergeMultipleEmptyLines()
31 .flatMap { splitLineIntoParts(it, context) }
32 .removeEmptyPlainTextParts()
33 .mergeAdjacentPlainTextParts()
35 private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
36 generateSequence(PlainTextPart("") as Part to line) { remainder ->
37 if (remainder.second == "")
41 .mapNotNull { it.findNext(remainder.second) }
42 .minBy { it.position }
45 it == null -> PlainTextPart(remainder.second) to ""
46 it.position == 0 -> it.toPart(context) to it.remainder
47 else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
50 }.map { it.first }.toList()
52 private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
54 FreenetURI(link).let { freenetUri ->
57 if (freenetUri.isKSK) {
58 freenetUri.guessableKey
60 freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
62 link.split('?').first()
65 } catch (e: MalformedURLException) {
70 FreenetURI(link).let { uri ->
71 uri.docName ?: "${uri.keyType}@${uri.routingKey.freenetBase64}"
72 }.let { FreenetLinkPart(link, it, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true) }
73 } catch (e: MalformedURLException) {
76 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
77 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
78 FREEMAIL -> link.indexOf('@').let { atSign ->
79 link.substring(atSign + 1, link.length - 9).let { freemailId ->
80 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
83 HTTP, HTTPS -> LinkPart(link, link
87 .withoutMiddlePathComponents
88 .withoutTrailingSlash)
93 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
94 if (previous.isEmpty()) {
97 if ((previous.last() == "\n") && (current == "")) {
100 previous + ("\n" + current)
105 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
106 if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
107 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
113 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
115 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
116 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
117 private val String.withoutUrlParameters get() = split('?').first()
119 private val String.withoutWwwPrefix
121 .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
124 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
125 if (index == 0) replacement(element) else element
128 private val String.withoutMiddlePathComponents
129 get() = split("/").let {
131 "${it.first()}/…/${it.last()}"
136 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
137 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
138 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
140 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
146 HTTP("http://", false),
147 HTTPS("https://", false),
148 SONE("sone://", false) {
149 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
151 POST("post://", false),
153 override fun findNext(line: String): NextLink? {
154 val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
155 if (line[nextFreemailSuffix - 53] != '@') return null
156 if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
157 val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
158 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
159 }.lastOrNull() ?: return null
160 return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
163 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
166 open fun findNext(line: String): NextLink? {
167 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
168 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
169 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
170 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
171 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
174 private fun String.findEndOfLink() =
175 substring(0, whitespace.find(this)?.range?.start ?: length)
176 .dropLastWhile(::isPunctuation)
177 .upToFirstUnmatchedParen()
179 private fun Int.validate() = validateLinkLength(this)
180 protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
182 private fun String.upToFirstUnmatchedParen() =
183 foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
185 '(' -> (openParens + 1) to firstUnmatchedParen
186 ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
187 else -> openParens to firstUnmatchedParen
193 private val punctuationChars = listOf('.', ',', '?', '!')
194 private fun isPunctuation(char: Char) = char in punctuationChars
196 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
198 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
200 private val ByteArray.freenetBase64 get() = Base64.encode(this)!!