🐛 Remove backlinks from keys (thx, TheSeeker!)
[Sone.git] / src / main / kotlin / net / pterodactylus / sone / text / SoneTextParser.kt
1 package net.pterodactylus.sone.text
2
3 import freenet.keys.*
4 import freenet.support.*
5 import net.pterodactylus.sone.data.*
6 import net.pterodactylus.sone.data.impl.*
7 import net.pterodactylus.sone.database.*
8 import net.pterodactylus.sone.text.LinkType.*
9 import net.pterodactylus.sone.text.LinkType.USK
10 import org.bitpedia.util.*
11 import java.net.*
12 import javax.inject.*
13
14 /**
15  * [Parser] implementation that can recognize Freenet URIs.
16  */
17 class SoneTextParser @Inject constructor(private val soneProvider: SoneProvider?, private val postProvider: PostProvider?) {
18
19         fun parse(source: String, context: SoneTextParserContext?) =
20                         source.split("\n")
21                                         .dropWhile { it.trim() == "" }
22                                         .dropLastWhile { it.trim() == "" }
23                                         .mergeMultipleEmptyLines()
24                                         .flatMap { splitLineIntoParts(it, context) }
25                                         .removeEmptyPlainTextParts()
26                                         .mergeAdjacentPlainTextParts()
27
28         private fun splitLineIntoParts(line: String, context: SoneTextParserContext?) =
29                         generateSequence(PlainTextPart("") as Part to line) { remainder ->
30                                 if (remainder.second == "")
31                                         null
32                                 else
33                                         LinkType.values()
34                                                         .mapNotNull { it.findNext(remainder.second) }
35                                                         .minBy { it.position }
36                                                         .let {
37                                                                 when {
38                                                                         it == null -> PlainTextPart(remainder.second) to ""
39                                                                         it.position == 0 -> it.toPart(context) to it.remainder
40                                                                         else -> PlainTextPart(remainder.second.substring(0, it.position)) to (it.link + it.remainder)
41                                                                 }
42                                                         }
43                         }.map { it.first }.toList()
44
45         private val NextLink.linkWithoutBacklink: String
46                 get() {
47                         val backlink = link.indexOf("/../")
48                         val query = link.indexOf("?")
49                         return if ((backlink > -1) && ((query == -1) || (query > -1) && (backlink < query)))
50                                 link.substring(0, backlink)
51                         else
52                                 link
53                 }
54
55         private fun NextLink.toPart(context: SoneTextParserContext?) = when (linkType) {
56                 KSK, CHK -> try {
57                         FreenetURI(linkWithoutBacklink).let { freenetUri ->
58                                 FreenetLinkPart(
59                                                 linkWithoutBacklink,
60                                                 freenetUri.allMetaStrings?.lastOrNull { it != "" } ?: freenetUri.docName ?: linkWithoutBacklink.substring(0, 9),
61                                                 linkWithoutBacklink.split('?').first()
62                                 )
63                         }
64                 } catch (e: MalformedURLException) {
65                         PlainTextPart(linkWithoutBacklink)
66                 }
67                 SSK, USK ->
68                         try {
69                                 FreenetURI(linkWithoutBacklink).let { uri ->
70                                         uri.allMetaStrings
71                                                         ?.takeIf { (it.size > 1) || ((it.size == 1) && (it.single() != "")) }
72                                                         ?.lastOrNull()
73                                                         ?: uri.docName
74                                                         ?: "${uri.keyType}@${uri.routingKey.freenetBase64}"
75                                 }.let { FreenetLinkPart(linkWithoutBacklink.removeSuffix("/"), it, trusted = context?.routingKey?.contentEquals(FreenetURI(linkWithoutBacklink).routingKey) == true) }
76                         } catch (e: MalformedURLException) {
77                                 PlainTextPart(linkWithoutBacklink)
78                         }
79                 SONE -> link.substring(7).let { SonePart(soneProvider?.getSone(it) ?: IdOnlySone(it)) }
80                 POST -> postProvider?.getPost(link.substring(7))?.let { PostPart(it) } ?: PlainTextPart(link)
81                 FREEMAIL -> link.indexOf('@').let { atSign ->
82                         link.substring(atSign + 1, link.length - 9).let { freemailId ->
83                                 FreemailPart(link.substring(0, atSign), freemailId, freemailId.decodedId)
84                         }
85                 }
86                 HTTP, HTTPS -> LinkPart(link, link
87                                 .withoutProtocol
88                                 .withoutWwwPrefix
89                                 .withoutUrlParameters
90                                 .withoutMiddlePathComponents
91                                 .withoutTrailingSlash)
92         }
93
94 }
95
96 private fun List<String>.mergeMultipleEmptyLines() = fold(emptyList<String>()) { previous, current ->
97         if (previous.isEmpty()) {
98                 previous + current
99         } else {
100                 if ((previous.last() == "\n") && (current == "")) {
101                         previous
102                 } else {
103                         previous + ("\n" + current)
104                 }
105         }
106 }
107
108 private fun List<Part>.mergeAdjacentPlainTextParts() = fold(emptyList<Part>()) { parts, part ->
109         if ((parts.lastOrNull() is PlainTextPart) && (part is PlainTextPart)) {
110                 parts.dropLast(1) + PlainTextPart(parts.last().text + part.text)
111         } else {
112                 parts + part
113         }
114 }
115
116 private fun List<Part>.removeEmptyPlainTextParts() = filterNot { it == PlainTextPart("") }
117
118 private val String.decodedId: String get() = Base64.encode(Base32.decode(this))
119 private val String.withoutProtocol get() = substring(indexOf("//") + 2)
120 private val String.withoutUrlParameters get() = split('?').first()
121
122 private val String.withoutWwwPrefix
123         get() = split("/")
124                         .replaceFirst { it.split(".").dropWhile { it == "www" }.joinToString(".") }
125                         .joinToString("/")
126
127 private fun <T> List<T>.replaceFirst(replacement: (T) -> T) = mapIndexed { index, element ->
128         if (index == 0) replacement(element) else element
129 }
130
131 private val String.withoutMiddlePathComponents
132         get() = split("/").let {
133                 if (it.size > 2) {
134                         "${it.first()}/…/${it.last()}"
135                 } else {
136                         it.joinToString("/")
137                 }
138         }
139 private val String.withoutTrailingSlash get() = if (endsWith("/")) substring(0, length - 1) else this
140 private val SoneTextParserContext.routingKey: ByteArray? get() = postingSone?.routingKey
141 private val Sone.routingKey: ByteArray get() = Base64.decode(id)
142
143 private enum class LinkType(private val scheme: String, private val freenetLink: Boolean) {
144
145         KSK("KSK@", true),
146         CHK("CHK@", true),
147         SSK("SSK@", true),
148         USK("USK@", true),
149         HTTP("http://", false),
150         HTTPS("https://", false),
151         SONE("sone://", false) {
152                 override fun validateLinkLength(length: Int) = length.takeIf { it == 50 }
153         },
154         POST("post://", false),
155         FREEMAIL("", true) {
156                 override fun findNext(line: String): NextLink? {
157                         val nextFreemailSuffix = line.indexOf(".freemail").takeIf { it >= 54 } ?: return null
158                         if (line[nextFreemailSuffix - 53] != '@') return null
159                         if (!line.substring(nextFreemailSuffix - 52, nextFreemailSuffix).matches(Regex("^[a-z2-7]*\$"))) return null
160                         val firstCharacterIndex = generateSequence(nextFreemailSuffix - 53) {
161                                 it.minus(1).takeIf { (it >= 0) && line[it].validLocalPart }
162                         }.lastOrNull() ?: return null
163                         return NextLink(firstCharacterIndex, this, line.substring(firstCharacterIndex, nextFreemailSuffix + 9), line.substring(nextFreemailSuffix + 9))
164                 }
165
166                 private val Char.validLocalPart get() = (this in ('A'..'Z')) || (this in ('a'..'z')) || (this in ('0'..'9')) || (this == '-') || (this == '_') || (this == '.')
167         };
168
169         open fun findNext(line: String): NextLink? {
170                 val nextLinkPosition = line.indexOf(scheme).takeIf { it != -1 } ?: return null
171                 val endOfLink = line.substring(nextLinkPosition).findEndOfLink().validate() ?: return null
172                 val link = line.substring(nextLinkPosition, nextLinkPosition + endOfLink)
173                 val realNextLinkPosition = if (freenetLink && line.substring(0, nextLinkPosition).endsWith("freenet:")) nextLinkPosition - 8 else nextLinkPosition
174                 return NextLink(realNextLinkPosition, this, link, line.substring(nextLinkPosition + endOfLink))
175         }
176
177         private fun String.findEndOfLink() =
178                         substring(0, whitespace.find(this)?.range?.start ?: length)
179                                         .dropLastWhile(::isPunctuation)
180                                         .upToFirstUnmatchedParen()
181
182         private fun Int.validate() = validateLinkLength(this)
183         protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
184
185         private fun String.upToFirstUnmatchedParen() =
186                         foldIndexed(Pair<Int, Int?>(0, null)) { index, (openParens, firstUnmatchedParen), currentChar ->
187                                 when (currentChar) {
188                                         '(' -> (openParens + 1) to firstUnmatchedParen
189                                         ')' -> ((openParens - 1) to (if (openParens == 0) (firstUnmatchedParen ?: index) else firstUnmatchedParen))
190                                         else -> openParens to firstUnmatchedParen
191                                 }
192                         }.second ?: length
193
194 }
195
196 private val punctuationChars = listOf('.', ',', '?', '!')
197 private fun isPunctuation(char: Char) = char in punctuationChars
198
199 private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
200
201 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
202
203 private val ByteArray.freenetBase64 get() = Base64.encode(this)!!