diff --git a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt index d3df314..f73e8cb 100644 --- a/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt +++ b/kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt @@ -4,9 +4,10 @@ ) package de.cketti.codepoints.deluxe -import de.cketti.codepoints.CodePoints import de.cketti.codepoints.codePointAt as intCodePointAt import de.cketti.codepoints.codePointBefore as intCodePointBefore +import de.cketti.codepoints.forEachCodePoint as intForEachCodePoint +import de.cketti.codepoints.forEachCodePointIndexed as intForEachCodePointIndexed /** * Returns the Unicode code point at the specified index. @@ -54,50 +55,18 @@ fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): } /** - * Performs given [block] for each [CodePoint] in the [CharSequence] - * between [startIndex] (inclusive) and [endIndex] (exclusive). + * Performs given [action] for each [CodePoint] in the [CharSequence]. * * @see forEachCodePointIndexed */ inline fun CharSequence.forEachCodePoint( - startIndex: Int = 0, - endIndex: Int = length, - block: (codePoint: CodePoint) -> Unit, -) = forEachCodePointIndexed(startIndex, endIndex) { _, codePoint -> block(codePoint) } + action: (codePoint: CodePoint) -> Unit, +) = intForEachCodePoint { action(it.toCodePoint()) } /** - * Performs given [block] for each [CodePoint] in the [CharSequence] - * between [startIndex] (inclusive) and [endIndex] (exclusive). + * Performs given [action] for each [CodePoint] in the [CharSequence]. * Provides the start index for the given codepoint - * - * @param startIndex index of the first codepoint in CharSequence to start with (defaults to `0`) - * @param endIndex index of the last codepoint in CharSequence to stop at (defaults to `length`) */ inline fun CharSequence.forEachCodePointIndexed( - startIndex: Int = 0, - endIndex: Int = length, - block: (index: Int, codePoint: CodePoint) -> Unit, -) { - require(startIndex <= endIndex) { - "startIndex ($startIndex) must be less than or equal to endIndex ($endIndex)" - } - require(endIndex <= length) { - "endIndex ($endIndex) must be less than or equal to char sequence's length ($length)" - } - val str = this - var index = startIndex - while (index < endIndex) { - val codePointStartIndex = index - val firstChar = str[index] - index++ - if (firstChar.isHighSurrogate() && index < endIndex) { - val nextChar = str[index] - if (nextChar.isLowSurrogate()) { - block(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar).toCodePoint()) - index++ - continue - } - } - block(codePointStartIndex, firstChar.toCodePoint()) - } -} \ No newline at end of file + action: (index: Int, codePoint: CodePoint) -> Unit, +) = intForEachCodePointIndexed { index, codePoint -> action(index, codePoint.toCodePoint()) } \ No newline at end of file diff --git a/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt b/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt index 39a6982..5768358 100644 --- a/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt +++ b/kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt @@ -81,45 +81,6 @@ class CharSequenceExtensionsTest { ) } - @Test - fun forEachCodepoint_with_non_default_indexes() { - fun CharSequence.collectCodepoints( - startIndex: Int, - endIndex: Int, - ): List = buildList { forEachCodePoint(startIndex, endIndex) { add(it) } } - - assertEquals( - listOf('a'.toCodePoint()), - "ab".collectCodepoints(0, 1), - ) - assertEquals( - listOf('b'.toCodePoint()), - "ab".collectCodepoints(1, 2), - ) - assertEquals( - listOf('a'.toCodePoint()), - "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3), - ) - assertEquals( - listOf(0xD83E.toCodePoint()), - "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(0, 1), - ) - assertEquals( - listOf(0xDD95.toCodePoint()), - "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 2), - ) - assertEquals( - listOf(0xDD95.toCodePoint(), 'a'.toCodePoint(), 0xD83E.toCodePoint()), - "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4), - ) - assertFailsWith(IllegalArgumentException::class) { - "a".forEachCodePoint(startIndex = 1, endIndex = 0) { } - } - assertFailsWith(IllegalArgumentException::class) { - "a".forEachCodePoint(startIndex = 1, endIndex = 2) { } - } - } - @Test fun forEachCodepointIndexed() { fun CharSequence.collectCodepoints(): List> = @@ -146,43 +107,4 @@ class CharSequenceExtensionsTest { "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(), ) } - - @Test - fun forEachCodepointIndexed_with_non_default_indexes() { - fun CharSequence.collectCodepoints(start: Int, end: Int): List> = - buildList { forEachCodePointIndexed(start, end) { index, codepoint -> add(index to codepoint) } } - - assertEquals( - listOf(0 to 'a'.toCodePoint()), - "ab".collectCodepoints(0, 1), - ) - assertEquals( - listOf(1 to 'b'.toCodePoint()), - "ab".collectCodepoints(1, 2), - ) - assertEquals( - listOf(1 to 0x1F995.toCodePoint()), - "a\uD83E\uDD95".collectCodepoints(1, 3), - ) - assertEquals( - listOf( - 1 to 0xDD95.toCodePoint(), - 2 to 'a'.toCodePoint(), - 3 to 0xD83E.toCodePoint(), - ), - "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4), - ) - assertEquals( - listOf( - 2 to 'a'.toCodePoint(), - ), - "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3), - ) - assertFailsWith(IllegalArgumentException::class) { - "a".forEachCodePointIndexed(startIndex = 1, endIndex = 0) { _, _ -> } - } - assertFailsWith(IllegalArgumentException::class) { - "a".forEachCodePointIndexed(startIndex = 1, endIndex = 2) { _, _ -> } - } - } } diff --git a/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt b/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt index 290cdeb..a945ee6 100644 --- a/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt +++ b/kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt @@ -53,12 +53,12 @@ fun CharSequence.codePointAt(index: Int): Int { * * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character * sequence. - * + * * If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high - * surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is + * surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is * returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`. * - * If the value `index - 1` is out of bounds of this character sequence, this method throws an + * If the value `index - 1` is out of bounds of this character sequence, this method throws an * [IndexOutOfBoundsException]. */ fun CharSequence.codePointBefore(index: Int): Int { @@ -78,11 +78,11 @@ fun CharSequence.codePointBefore(index: Int): Int { /** * Returns the number of Unicode code points in the specified text range of this `CharSequence`. - * - * The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the + * + * The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the * length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as * one code point each. - * + * * If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than * `endIndex`, this method throws an [IndexOutOfBoundsException]. */ @@ -108,10 +108,10 @@ fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int { } /** - * Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points. - * + * Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points. + * * Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each. - * + * * If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and * the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is * negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this @@ -153,3 +153,38 @@ fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int { return currentIndex + 1 } } + +/** + * Performs given [action] for each codepoint in the [CharSequence]s. + * + * @see forEachCodePointIndexed + */ +inline fun CharSequence.forEachCodePoint( + action: (codePoint: Int) -> Unit, +) = forEachCodePointIndexed { _, codePoint -> action(codePoint) } + +/** + * Performs given [action] for each codepoint in the [CharSequence]. + * Provides the start index for the given codepoint + */ +inline fun CharSequence.forEachCodePointIndexed( + action: (index: Int, codePoint: Int) -> Unit, +) { + val str = this + var index = 0 + val endIndex = length + while (index < endIndex) { + val codePointStartIndex = index + val firstChar = str[index] + index++ + if (firstChar.isHighSurrogate() && index < endIndex) { + val nextChar = str[index] + if (nextChar.isLowSurrogate()) { + action(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar)) + index++ + continue + } + } + action(codePointStartIndex, firstChar.code) + } +} diff --git a/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt b/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt index 8d36622..a9f443c 100644 --- a/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt +++ b/kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt @@ -154,4 +154,53 @@ class CharSequenceExtensionsTest { "\uD83E\uDD95".offsetByCodePoints(index = 2, codePointOffset = -2) } } + + @Test + fun forEachCodepoint() { + fun CharSequence.collectCodepoints(): List = buildList { forEachCodePoint { add(it) } } + + assertEquals( + emptyList(), + "".collectCodepoints(), + ) + assertEquals( + listOf('a'.code), + "a".collectCodepoints(), + ) + assertEquals( + listOf('a'.code, 0xFFFF), + "a\uFFFF".collectCodepoints(), + ) + assertEquals( + listOf(0x1F995, 'a'.code, 0x1F996), + "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(), + ) + } + + @Test + fun forEachCodepointIndexed() { + fun CharSequence.collectCodepoints(): List> = + buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } } + + assertEquals( + emptyList(), + "".collectCodepoints(), + ) + assertEquals( + listOf(0 to 'a'.code), + "a".collectCodepoints(), + ) + assertEquals( + listOf(0 to 'a'.code, 1 to 0x1F995), + "a\uD83E\uDD95".collectCodepoints(), + ) + assertEquals( + listOf( + 0 to 0x1F995, + 2 to 'a'.code, + 3 to 0x1F996, + ), + "\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(), + ) + } }