1 package net.pterodactylus.sone.text
4 import net.pterodactylus.sone.data.*
5 import net.pterodactylus.sone.data.impl.*
6 import net.pterodactylus.sone.database.*
7 import net.pterodactylus.sone.text.LinkType.*
8 import net.pterodactylus.sone.text.LinkType.USK
9 import net.pterodactylus.sone.utils.*
10 import org.bitpedia.util.*
15 * [Parser] implementation that can recognize Freenet URIs.
17 class SoneTextParser @Inject constructor(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
19 fun parse(source: String, context: SoneTextParserContext?) =
21 .dropWhile { it.trim() == "" }
22 .dropLastWhile { it.trim() == "" }
23 .mergeMultipleEmptyLines()
24 .flatMap { splitLineIntoParts(it, context) }
25 .removeEmptyPlainTextParts()
26 .mergeAdjacentPlainTextParts()
28 private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
29 generateSequence(PlainTextPart("") as Part to line) { remainder ->
30 if (remainder.second == "")
34 .mapNotNull { it.findNext(remainder.second) }
35 .minByOrNull { it.position }
38 it == null -> PlainTextPart(remainder.second) to ""
39 it.position == 0 -> it.toPart(context) to it.remainder
40 else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
43 }.map { it.first }.toList()
45 private val NextLink.linkWithoutBacklink: String
47 val backlink = link.indexOf("/../")
48 val query = link.indexOf("?")
49 return if ((backlink > -1) && ((query == -1) || (query > -1) && (backlink < query)))
50 link.substring(0, backlink)
55 private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
57 FreenetURI(linkWithoutBacklink).let { freenetUri ->
60 freenetUri.allMetaStrings?.lastOrNull { it != "" } ?: freenetUri.docName ?: linkWithoutBacklink.substring(0, 9),
61 linkWithoutBacklink.split('?').first()
64 } catch (e: MalformedURLException) {
65 PlainTextPart(linkWithoutBacklink)
69 FreenetURI(linkWithoutBacklink)
70 .workaroundForFaultyConstructorInFred1485AndBelow()
73 ?.takeIf { (it.size > 1) || ((it.size == 1) && (it.single() != "")) }
76 ?: "${uri.keyType}@${uri.routingKey.asFreenetBase64}"
77 }.let { FreenetLinkPart(linkWithoutBacklink.removeSuffix("/"), it, trusted = context?.routingKey?.contentEquals(FreenetURI(linkWithoutBacklink).routingKey) == true) }
78 } catch (e: MalformedURLException) {
79 PlainTextPart(linkWithoutBacklink)
81 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
82 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
83 FREEMAIL -> link.indexOf('@').let { atSign ->
84 link.substring(atSign + 1, link.length - 9).let { freemailId ->
85 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
88 HTTP, HTTPS -> LinkPart(link, link
92 .withoutMiddlePathComponents
93 .withoutTrailingSlash)
98 private fun FreenetURI.workaroundForFaultyConstructorInFred1485AndBelow() =
99 also { if (it.routingKey == null) throw MalformedURLException("SSK/USK without routing key") }
101 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
102 if (previous.isEmpty()) {
105 if ((previous.last() == "\n") && (current == "")) {
108 previous + ("\n" + current)
113 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
114 if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
115 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
121 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
123 private val String.decodedId: String get() = Base32.decode(this).asFreenetBase64
124 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
125 private val String.withoutUrlParameters get() = split('?').first()
127 private val String.withoutWwwPrefix
129 .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
132 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
133 if (index == 0) replacement(element) else element
136 private val String.withoutMiddlePathComponents
137 get() = split("/").let {
139 "${it.first()}/…/${it.last()}"
144 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
145 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
146 private val Sone.routingKey: ByteArray get() = id.fromFreenetBase64
148 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
154 HTTP("http://", false),
155 HTTPS("https://", false),
156 SONE("sone://", false) {
157 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
159 POST("post://", false),
161 override fun findNext(line: String): NextLink? {
162 val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
163 if (line[nextFreemailSuffix - 53] != '@') return null
164 if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
165 val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
166 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
167 }.lastOrNull() ?: return null
168 return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
171 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
174 open fun findNext(line: String): NextLink? {
175 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
176 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
177 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
178 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
179 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
182 private fun String.findEndOfLink() =
183 substring(0, whitespace.find(this)?.range?.start ?: length)
184 .dropLastWhile(::isPunctuation)
185 .upToFirstUnmatchedParen()
187 private fun Int.validate() = validateLinkLength(this)
188 protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
190 private fun String.upToFirstUnmatchedParen() =
191 foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
193 '(' -> (openParens + 1) to firstUnmatchedParen
194 ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
195 else -> openParens to firstUnmatchedParen
201 private val punctuationChars = listOf('.', ',', '?', '!')
202 private fun isPunctuation(char: Char) = char in punctuationChars
204 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
206 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)