Merge branch 'release-0.9.8'
[Sone.git] / src / main / kotlin / net / pterodactylus / sone / text / SoneTextParser.kt
1 package net.pterodactylus.sone.text
2
3 import freenet.keys.FreenetURI
4 import freenet.support.Base64
5 import net.pterodactylus.sone.data.Sone
6 import net.pterodactylus.sone.data.impl.IdOnlySone
7 import net.pterodactylus.sone.database.PostProvider
8 import net.pterodactylus.sone.database.SoneProvider
9 import net.pterodactylus.sone.text.LinkType.CHK
10 import net.pterodactylus.sone.text.LinkType.FREEMAIL
11 import net.pterodactylus.sone.text.LinkType.HTTP
12 import net.pterodactylus.sone.text.LinkType.HTTPS
13 import net.pterodactylus.sone.text.LinkType.KSK
14 import net.pterodactylus.sone.text.LinkType.POST
15 import net.pterodactylus.sone.text.LinkType.SONE
16 import net.pterodactylus.sone.text.LinkType.SSK
17 import net.pterodactylus.sone.text.LinkType.USK
18 import net.pterodactylus.sone.utils.let
19 import org.bitpedia.util.Base32
20 import java.net.MalformedURLException
21
22 /**
23  * [Parser] implementation that can recognize Freenet URIs.
24  */
25 class SoneTextParser(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
26
27         fun parse(source: String, context: SoneTextParserContext?) =
28                         source.split("\n")
29                                         .dropWhile { it.trim() == "" }
30                                         .dropLastWhile { it.trim() == "" }
31                                         .mergeMultipleEmptyLines()
32                                         .flatMap { splitLineIntoParts(it, context) }
33                                         .removeEmptyPlainTextParts()
34                                         .mergeAdjacentPlainTextParts()
35
36         private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
37                         generateSequence(PlainTextPart("") as Part to line) { remainder ->
38                                 if (remainder.second == "")
39                                         null
40                                 else
41                                         LinkType.values()
42                                                         .mapNotNull { it.findNext(remainder.second) }
43                                                         .minBy { it.position }
44                                                         .let {
45                                                                 when {
46                                                                         it == null -> PlainTextPart(remainder.second) to ""
47                                                                         it.position == 0 -> it.toPart(context) to it.remainder
48                                                                         else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
49                                                                 }
50                                                         }
51                         }.map { it.first }.toList()
52
53         private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
54                 KSK, CHK -> try {
55                         FreenetURI(link).let { freenetUri ->
56                                 FreenetLinkPart(
57                                                 link,
58                                                 if (freenetUri.isKSK) {
59                                                         freenetUri.guessableKey
60                                                 } else {
61                                                         freenetUri.metaString ?: freenetUri.docName ?: link.substring(0, 9)
62                                                 },
63                                                 link.split('?').first()
64                                 )
65                         }
66                 } catch (e: MalformedURLException) {
67                         PlainTextPart(link)
68                 }
69                 SSK, USK ->
70                         try {
71                                  FreenetLinkPart(link, FreenetURI(link).docName, trusted = context?.routingKey?.contentEquals(FreenetURI(link).routingKey) == true)
72                         } catch (e: MalformedURLException) {
73                                 PlainTextPart(link)
74                         }
75                 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
76                 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
77                 FREEMAIL -> link.indexOf('@').let { atSign ->
78                         link.substring(atSign + 1, link.length - 9).let { freemailId ->
79                                 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
80                         }
81                 }
82                 HTTP, HTTPS -> LinkPart(link, link
83                                 .withoutProtocol
84                                 .withoutWwwPrefix
85                                 .withoutUrlParameters
86                                 .withoutMiddlePathComponents
87                                 .withoutTrailingSlash)
88         }
89
90 }
91
92 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
93         if (previous.isEmpty()) {
94                 previous + current
95         } else {
96                 if ((previous.last() == "\n") && (current == "")) {
97                         previous
98                 } else {
99                         previous + ("\n" + current)
100                 }
101         }
102 }
103
104 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
105         if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
106                 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
107         } else {
108                 parts + part
109         }
110 }
111
112 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
113
114 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
115 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
116 private val String.withoutUrlParameters get() = split('?').first()
117
118 private val String.withoutWwwPrefix
119         get() = split("/")
120                         .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
121                         .joinToString("/")
122
123 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
124         if (index == 0) replacement(element) else element
125 }
126
127 private val String.withoutMiddlePathComponents
128         get() = split("/").let {
129                 if (it.size > 2) {
130                         "${it.first()}/…/${it.last()}"
131                 } else {
132                         it.joinToString("/")
133                 }
134         }
135 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
136 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
137 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
138
139 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
140
141         KSK("KSK@", true),
142         CHK("CHK@", true),
143         SSK("SSK@", true),
144         USK("USK@", true),
145         HTTP("http://", false),
146         HTTPS("https://", false),
147         SONE("sone://", false) {
148                 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
149         },
150         POST("post://", false),
151         FREEMAIL("", true) {
152                 override fun findNext(line: String): NextLink? {
153                         val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
154                         if (line[nextFreemailSuffix - 53] != '@') return null
155                         if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
156                         val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
157                                 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
158                         }.lastOrNull() ?: return null
159                         return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
160                 }
161
162                 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
163         };
164
165         open fun findNext(line: String): NextLink? {
166                 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
167                 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
168                 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
169                 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
170                 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
171         }
172
173         private fun String.findEndOfLink() =
174                         substring(0, whitespace.find(this)?.range?.start ?: length)
175                                         .dropLastWhile(::isPunctuation)
176                                         .upToFirstUnmatchedParen()
177
178         private fun Int.validate() = validateLinkLength(this)
179         protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
180
181         private fun String.upToFirstUnmatchedParen() =
182                         foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
183                                 when (currentChar) {
184                                         '(' -> (openParens + 1) to firstUnmatchedParen
185                                         ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
186                                         else -> openParens to firstUnmatchedParen
187                                 }
188                         }.second ?: length
189
190 }
191
192 private val punctuationChars = listOf('.', ',', '?', '!')
193 private fun isPunctuation(char: Char) = char in punctuationChars
194
195 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
196
197 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)