♻️ Copy session-handling code to FreenetRequest
[Sone.git] / src / main / kotlin / net / pterodactylus / sone / text / SoneTextParser.kt
1 package net.pterodactylus.sone.text
2
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import org.bitpedia.util.Base32
19 import java.net.MalformedURLException
20 import javax.inject.*
21
22 /**
23  * [Parser] implementation that can recognize Freenet URIs.
24  */
25 class SoneTextParser @Inject constructor(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
26
27         fun parse(source: String, context: SoneTextParserContext?) =
28                         source.split("\n")
29                                         .dropWhile { it.trim() == "" }
30                                         .dropLastWhile { it.trim() == "" }
31                                         .mergeMultipleEmptyLines()
32                                         .flatMap { splitLineIntoParts(it, context) }
33                                         .removeEmptyPlainTextParts()
34                                         .mergeAdjacentPlainTextParts()
35
36         private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
37                         generateSequence(PlainTextPart("") as Part to line) { remainder ->
38                                 if (remainder.second == "")
39                                         null
40                                 else
41                                         LinkType.values()
42                                                         .mapNotNull { it.findNext(remainder.second) }
43                                                         .minBy { it.position }
44                                                         .let {
45                                                                 when {
46                                                                         it == null -> PlainTextPart(remainder.second) to ""
47                                                                         it.position == 0 -> it.toPart(context) to it.remainder
48                                                                         else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
49                                                                 }
50                                                         }
51                         }.map { it.first }.toList()
52
53         private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
54                 KSK, CHK -> try {
55                         FreenetURI(link).let { freenetUri ->
56                                 FreenetLinkPart(
57                                                 link,
58                                                 if (freenetUri.isKSK) {
59                                                         freenetUri.guessableKey
60                                                 } else {
61                                                         freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
62                                                 },
63                                                 link.split('?').first()
64                                 )
65                         }
66                 } catch (e: MalformedURLException) {
67                         PlainTextPart(link)
68                 }
69                 SSK, USK ->
70                         try {
71                                 FreenetURI(link).let { uri ->
72                                         uri.allMetaStrings
73                                                         ?.takeIf { (it.size > 1) || ((it.size == 1) && (it.single() != ""))}
74                                                         ?.lastOrNull()
75                                                         ?: uri.docName
76                                                         ?: "${uri.keyType}@${uri.routingKey.freenetBase64}"
77                                 }.let { FreenetLinkPart(link.removeSuffix("/"), it, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true) }
78                         } catch (e: MalformedURLException) {
79                                 PlainTextPart(link)
80                         }
81                 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
82                 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
83                 FREEMAIL -> link.indexOf('@').let { atSign ->
84                         link.substring(atSign + 1, link.length - 9).let { freemailId ->
85                                 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
86                         }
87                 }
88                 HTTP, HTTPS -> LinkPart(link, link
89                                 .withoutProtocol
90                                 .withoutWwwPrefix
91                                 .withoutUrlParameters
92                                 .withoutMiddlePathComponents
93                                 .withoutTrailingSlash)
94         }
95
96 }
97
98 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
99         if (previous.isEmpty()) {
100                 previous + current
101         } else {
102                 if ((previous.last() == "\n") && (current == "")) {
103                         previous
104                 } else {
105                         previous + ("\n" + current)
106                 }
107         }
108 }
109
110 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
111         if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
112                 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
113         } else {
114                 parts + part
115         }
116 }
117
118 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
119
120 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
121 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
122 private val String.withoutUrlParameters get() = split('?').first()
123
124 private val String.withoutWwwPrefix
125         get() = split("/")
126                         .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
127                         .joinToString("/")
128
129 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
130         if (index == 0) replacement(element) else element
131 }
132
133 private val String.withoutMiddlePathComponents
134         get() = split("/").let {
135                 if (it.size > 2) {
136                         "${it.first()}/…/${it.last()}"
137                 } else {
138                         it.joinToString("/")
139                 }
140         }
141 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
142 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
143 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
144
145 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
146
147         KSK("KSK@", true),
148         CHK("CHK@", true),
149         SSK("SSK@", true),
150         USK("USK@", true),
151         HTTP("http://", false),
152         HTTPS("https://", false),
153         SONE("sone://", false) {
154                 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
155         },
156         POST("post://", false),
157         FREEMAIL("", true) {
158                 override fun findNext(line: String): NextLink? {
159                         val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
160                         if (line[nextFreemailSuffix - 53] != '@') return null
161                         if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
162                         val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
163                                 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
164                         }.lastOrNull() ?: return null
165                         return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
166                 }
167
168                 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
169         };
170
171         open fun findNext(line: String): NextLink? {
172                 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
173                 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
174                 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
175                 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
176                 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
177         }
178
179         private fun String.findEndOfLink() =
180                         substring(0, whitespace.find(this)?.range?.start ?: length)
181                                         .dropLastWhile(::isPunctuation)
182                                         .upToFirstUnmatchedParen()
183
184         private fun Int.validate() = validateLinkLength(this)
185         protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
186
187         private fun String.upToFirstUnmatchedParen() =
188                         foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
189                                 when (currentChar) {
190                                         '(' -> (openParens + 1) to firstUnmatchedParen
191                                         ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
192                                         else -> openParens to firstUnmatchedParen
193                                 }
194                         }.second ?: length
195
196 }
197
198 private val punctuationChars = listOf('.', ',', '?', '!')
199 private fun isPunctuation(char: Char) = char in punctuationChars
200
201 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
202
203 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
204
205 private val ByteArray.freenetBase64 get() = Base64.encode(this)!!