✅ Add test for parser to be injectable
[Sone.git] / src / main / kotlin / net / pterodactylus / sone / text / SoneTextParser.kt
1 package net.pterodactylus.sone.text
2
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import org.bitpedia.util.Base32
19 import java.net.MalformedURLException
20 import javax.inject.*
21
22 /**
23  * [Parser] implementation that can recognize Freenet URIs.
24  */
25 class SoneTextParser @Inject constructor(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
26
27         fun parse(source: String, context: SoneTextParserContext?) =
28                         source.split("\n")
29                                         .dropWhile { it.trim() == "" }
30                                         .dropLastWhile { it.trim() == "" }
31                                         .mergeMultipleEmptyLines()
32                                         .flatMap { splitLineIntoParts(it, context) }
33                                         .removeEmptyPlainTextParts()
34                                         .mergeAdjacentPlainTextParts()
35
36         private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
37                         generateSequence(PlainTextPart("") as Part to line) { remainder ->
38                                 if (remainder.second == "")
39                                         null
40                                 else
41                                         LinkType.values()
42                                                         .mapNotNull { it.findNext(remainder.second) }
43                                                         .minBy { it.position }
44                                                         .let {
45                                                                 when {
46                                                                         it == null -> PlainTextPart(remainder.second) to ""
47                                                                         it.position == 0 -> it.toPart(context) to it.remainder
48                                                                         else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
49                                                                 }
50                                                         }
51                         }.map { it.first }.toList()
52
53         private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
54                 KSK, CHK -> try {
55                         FreenetURI(link).let { freenetUri ->
56                                 FreenetLinkPart(
57                                                 link,
58                                                 if (freenetUri.isKSK) {
59                                                         freenetUri.guessableKey
60                                                 } else {
61                                                         freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
62                                                 },
63                                                 link.split('?').first()
64                                 )
65                         }
66                 } catch (e: MalformedURLException) {
67                         PlainTextPart(link)
68                 }
69                 SSK, USK ->
70                         try {
71                                 FreenetURI(link).let { uri ->
72                                         uri.docName ?: "${uri.keyType}@${uri.routingKey.freenetBase64}"
73                                 }.let { FreenetLinkPart(link, it, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true) }
74                         } catch (e: MalformedURLException) {
75                                 PlainTextPart(link)
76                         }
77                 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
78                 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
79                 FREEMAIL -> link.indexOf('@').let { atSign ->
80                         link.substring(atSign + 1, link.length - 9).let { freemailId ->
81                                 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
82                         }
83                 }
84                 HTTP, HTTPS -> LinkPart(link, link
85                                 .withoutProtocol
86                                 .withoutWwwPrefix
87                                 .withoutUrlParameters
88                                 .withoutMiddlePathComponents
89                                 .withoutTrailingSlash)
90         }
91
92 }
93
94 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
95         if (previous.isEmpty()) {
96                 previous + current
97         } else {
98                 if ((previous.last() == "\n") && (current == "")) {
99                         previous
100                 } else {
101                         previous + ("\n" + current)
102                 }
103         }
104 }
105
106 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
107         if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
108                 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
109         } else {
110                 parts + part
111         }
112 }
113
114 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
115
116 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
117 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
118 private val String.withoutUrlParameters get() = split('?').first()
119
120 private val String.withoutWwwPrefix
121         get() = split("/")
122                         .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
123                         .joinToString("/")
124
125 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
126         if (index == 0) replacement(element) else element
127 }
128
129 private val String.withoutMiddlePathComponents
130         get() = split("/").let {
131                 if (it.size > 2) {
132                         "${it.first()}/…/${it.last()}"
133                 } else {
134                         it.joinToString("/")
135                 }
136         }
137 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
138 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
139 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
140
141 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
142
143         KSK("KSK@", true),
144         CHK("CHK@", true),
145         SSK("SSK@", true),
146         USK("USK@", true),
147         HTTP("http://", false),
148         HTTPS("https://", false),
149         SONE("sone://", false) {
150                 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
151         },
152         POST("post://", false),
153         FREEMAIL("", true) {
154                 override fun findNext(line: String): NextLink? {
155                         val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
156                         if (line[nextFreemailSuffix - 53] != '@') return null
157                         if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
158                         val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
159                                 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
160                         }.lastOrNull() ?: return null
161                         return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
162                 }
163
164                 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
165         };
166
167         open fun findNext(line: String): NextLink? {
168                 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
169                 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
170                 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
171                 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
172                 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
173         }
174
175         private fun String.findEndOfLink() =
176                         substring(0, whitespace.find(this)?.range?.start ?: length)
177                                         .dropLastWhile(::isPunctuation)
178                                         .upToFirstUnmatchedParen()
179
180         private fun Int.validate() = validateLinkLength(this)
181         protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
182
183         private fun String.upToFirstUnmatchedParen() =
184                         foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
185                                 when (currentChar) {
186                                         '(' -> (openParens + 1) to firstUnmatchedParen
187                                         ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
188                                         else -> openParens to firstUnmatchedParen
189                                 }
190                         }.second ?: length
191
192 }
193
194 private val punctuationChars = listOf('.', ',', '?', '!')
195 private fun isPunctuation(char: Char) = char in punctuationChars
196
197 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
198
199 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
200
201 private val ByteArray.freenetBase64 get() = Base64.encode(this)!!