🚸 Make colons not part of links
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Fri, 4 Aug 2023 11:28:35 +0000 (13:28 +0200)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Fri, 4 Aug 2023 11:28:35 +0000 (13:28 +0200)
src/main/kotlin/net/pterodactylus/sone/text/SoneTextParser.kt
src/test/kotlin/net/pterodactylus/sone/text/SoneTextParserTest.kt

index 6af0fde..9992e94 100644 (file)
@@ -180,10 +180,13 @@ private enum class LinkType(private val scheme: String, private val freenetLink:
        }
 
        private fun String.findEndOfLink() =
-                       substring(0, whitespace.find(this)?.range?.start ?: length)
+                       substring(0, nonLinkCharacters.find(this, findFirstCharacterAfterScheme())?.range?.start ?: length)
                                        .dropLastWhile(::isPunctuation)
                                        .upToFirstUnmatchedParen()
 
+       private fun String.findFirstCharacterAfterScheme() =
+               values().firstOrNull { startsWith(it.scheme) }?.scheme?.length ?: 0
+
        private fun Int.validate() = validateLinkLength(this)
        protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length }
 
@@ -201,6 +204,6 @@ private enum class LinkType(private val scheme: String, private val freenetLink:
 private val punctuationChars = listOf('.', ',', '?', '!')
 private fun isPunctuation(char: Char) = char in punctuationChars
 
-private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]")
+private val nonLinkCharacters = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000:]")
 
 private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String)
index bd14858..e5c0ddd 100644 (file)
@@ -332,6 +332,12 @@ class SoneTextParserTest {
        }
 
        @Test
+       fun `colon is not considered to be a part of the link`() {
+               val parts = soneTextParser.parse("Some text (and a link: http://example.sone/abc_(def):foo.jpg) â€“ nice!", null)
+               assertThat("Part Text", convertText(parts, PlainTextPart::class.java, LinkPart::class.java), equalTo("Some text (and a link: [http://example.sone/abc_(def)|http://example.sone/abc_(def)|example.sone/abc_(def)]:foo.jpg) â€“ nice!"))
+       }
+
+       @Test
        fun `punctuation is ignored at end of link before whitespace`() {
                val parts = soneTextParser.parse("Some text and a link: http://example.sone/abc. Nice!", null)
                assertThat("Part Text", convertText(parts, PlainTextPart::class.java, LinkPart::class.java), equalTo("Some text and a link: [http://example.sone/abc|http://example.sone/abc|example.sone/abc]. Nice!"))