From 3deb35cc7f6da30d562849aacfc58f217de81eb6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= Date: Fri, 4 Aug 2023 13:28:35 +0200 Subject: [PATCH] =?utf8?q?=F0=9F=9A=B8=20Make=20colons=20not=20part=20of?= =?utf8?q?=20links?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- src/main/kotlin/net/pterodactylus/sone/text/SoneTextParser.kt | 7 +++++-- src/test/kotlin/net/pterodactylus/sone/text/SoneTextParserTest.kt | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/net/pterodactylus/sone/text/SoneTextParser.kt b/src/main/kotlin/net/pterodactylus/sone/text/SoneTextParser.kt index 6af0fde..9992e94 100644 --- a/src/main/kotlin/net/pterodactylus/sone/text/SoneTextParser.kt +++ b/src/main/kotlin/net/pterodactylus/sone/text/SoneTextParser.kt @@ -180,10 +180,13 @@ private enum class LinkType(private val scheme: String, private val freenetLink: } private fun String.findEndOfLink() = - substring(0, whitespace.find(this)?.range?.start ?: length) + substring(0, nonLinkCharacters.find(this, findFirstCharacterAfterScheme())?.range?.start ?: length) .dropLastWhile(::isPunctuation) .upToFirstUnmatchedParen() + private fun String.findFirstCharacterAfterScheme() = + values().firstOrNull { startsWith(it.scheme) }?.scheme?.length ?: 0 + private fun Int.validate() = validateLinkLength(this) protected open fun validateLinkLength(length: Int) = length.takeIf { it > scheme.length } @@ -201,6 +204,6 @@ private enum class LinkType(private val scheme: String, private val freenetLink: private val punctuationChars = listOf('.', ',', '?', '!') private fun isPunctuation(char: Char) = char in punctuationChars -private val whitespace = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000]") +private val nonLinkCharacters = Regex("[\\u000a\u0020\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u202f\u205f\u2060\u2800\u3000:]") private data class NextLink(val position: Int, val linkType: LinkType, val link: String, val remainder: String) diff --git a/src/test/kotlin/net/pterodactylus/sone/text/SoneTextParserTest.kt b/src/test/kotlin/net/pterodactylus/sone/text/SoneTextParserTest.kt index bd14858..e5c0ddd 100644 --- a/src/test/kotlin/net/pterodactylus/sone/text/SoneTextParserTest.kt +++ b/src/test/kotlin/net/pterodactylus/sone/text/SoneTextParserTest.kt @@ -332,6 +332,12 @@ class SoneTextParserTest { } @Test + fun `colon is not considered to be a part of the link`() { + val parts = soneTextParser.parse("Some text (and a link: http://example.sone/abc_(def):foo.jpg) – nice!", null) + assertThat("Part Text", convertText(parts, PlainTextPart::class.java, LinkPart::class.java), equalTo("Some text (and a link: [http://example.sone/abc_(def)|http://example.sone/abc_(def)|example.sone/abc_(def)]:foo.jpg) – nice!")) + } + + @Test fun `punctuation is ignored at end of link before whitespace`() { val parts = soneTextParser.parse("Some text and a link: http://example.sone/abc. Nice!", null) assertThat("Part Text", convertText(parts, PlainTextPart::class.java, LinkPart::class.java), equalTo("Some text and a link: [http://example.sone/abc|http://example.sone/abc|example.sone/abc]. Nice!")) -- 2.7.4