Fix parsing of SSK links without document name
[Sone.git] / src / main / kotlin / net / pterodactylus / sone / text / SoneTextParser.kt
1 package net.pterodactylus.sone.text
2
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import org.bitpedia.util.Base32
19 import java.net.MalformedURLException
20
21 /**
22  * [Parser] implementation that can recognize Freenet URIs.
23  */
24 class SoneTextParser(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
25
26         fun parse(source: String, context: SoneTextParserContext?) =
27                         source.split("\n")
28                                         .dropWhile { it.trim() == "" }
29                                         .dropLastWhile { it.trim() == "" }
30                                         .mergeMultipleEmptyLines()
31                                         .flatMap { splitLineIntoParts(it, context) }
32                                         .removeEmptyPlainTextParts()
33                                         .mergeAdjacentPlainTextParts()
34
35         private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
36                         generateSequence(PlainTextPart("") as Part to line) { remainder ->
37                                 if (remainder.second == "")
38                                         null
39                                 else
40                                         LinkType.values()
41                                                         .mapNotNull { it.findNext(remainder.second) }
42                                                         .minBy { it.position }
43                                                         .let {
44                                                                 when {
45                                                                         it == null -> PlainTextPart(remainder.second) to ""
46                                                                         it.position == 0 -> it.toPart(context) to it.remainder
47                                                                         else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
48                                                                 }
49                                                         }
50                         }.map { it.first }.toList()
51
52         private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
53                 KSK, CHK -> try {
54                         FreenetURI(link).let { freenetUri ->
55                                 FreenetLinkPart(
56                                                 link,
57                                                 if (freenetUri.isKSK) {
58                                                         freenetUri.guessableKey
59                                                 } else {
60                                                         freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
61                                                 },
62                                                 link.split('?').first()
63                                 )
64                         }
65                 } catch (e: MalformedURLException) {
66                         PlainTextPart(link)
67                 }
68                 SSK, USK ->
69                         try {
70                                 FreenetURI(link).let { uri ->
71                                         uri.docName ?: "${uri.keyType}@${uri.routingKey.freenetBase64}"
72                                 }.let { FreenetLinkPart(link, it, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true) }
73                         } catch (e: MalformedURLException) {
74                                 PlainTextPart(link)
75                         }
76                 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
77                 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
78                 FREEMAIL -> link.indexOf('@').let { atSign ->
79                         link.substring(atSign + 1, link.length - 9).let { freemailId ->
80                                 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
81                         }
82                 }
83                 HTTP, HTTPS -> LinkPart(link, link
84                                 .withoutProtocol
85                                 .withoutWwwPrefix
86                                 .withoutUrlParameters
87                                 .withoutMiddlePathComponents
88                                 .withoutTrailingSlash)
89         }
90
91 }
92
93 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
94         if (previous.isEmpty()) {
95                 previous + current
96         } else {
97                 if ((previous.last() == "\n") && (current == "")) {
98                         previous
99                 } else {
100                         previous + ("\n" + current)
101                 }
102         }
103 }
104
105 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
106         if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
107                 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
108         } else {
109                 parts + part
110         }
111 }
112
113 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
114
115 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
116 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
117 private val String.withoutUrlParameters get() = split('?').first()
118
119 private val String.withoutWwwPrefix
120         get() = split("/")
121                         .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
122                         .joinToString("/")
123
124 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
125         if (index == 0) replacement(element) else element
126 }
127
128 private val String.withoutMiddlePathComponents
129         get() = split("/").let {
130                 if (it.size > 2) {
131                         "${it.first()}/…/${it.last()}"
132                 } else {
133                         it.joinToString("/")
134                 }
135         }
136 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
137 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
138 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
139
140 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
141
142         KSK("KSK@", true),
143         CHK("CHK@", true),
144         SSK("SSK@", true),
145         USK("USK@", true),
146         HTTP("http://", false),
147         HTTPS("https://", false),
148         SONE("sone://", false) {
149                 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
150         },
151         POST("post://", false),
152         FREEMAIL("", true) {
153                 override fun findNext(line: String): NextLink? {
154                         val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
155                         if (line[nextFreemailSuffix - 53] != '@') return null
156                         if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
157                         val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
158                                 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
159                         }.lastOrNull() ?: return null
160                         return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
161                 }
162
163                 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
164         };
165
166         open fun findNext(line: String): NextLink? {
167                 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
168                 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
169                 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
170                 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
171                 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
172         }
173
174         private fun String.findEndOfLink() =
175                         substring(0, whitespace.find(this)?.range?.start ?: length)
176                                         .dropLastWhile(::isPunctuation)
177                                         .upToFirstUnmatchedParen()
178
179         private fun Int.validate() = validateLinkLength(this)
180         protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
181
182         private fun String.upToFirstUnmatchedParen() =
183                         foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
184                                 when (currentChar) {
185                                         '(' -> (openParens + 1) to firstUnmatchedParen
186                                         ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
187                                         else -> openParens to firstUnmatchedParen
188                                 }
189                         }.second ?: length
190
191 }
192
193 private val punctuationChars = listOf('.', ',', '?', '!')
194 private fun isPunctuation(char: Char) = char in punctuationChars
195
196 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
197
198 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
199
200 private val ByteArray.freenetBase64 get() = Base64.encode(this)!!