Skip to content

Commit

Permalink
Add forEach extensions to kotlin-codepoints library
Browse files Browse the repository at this point in the history
  • Loading branch information
OptimumCode committed Jun 24, 2024
1 parent 03f0ae8 commit d483391
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 126 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
)
package de.cketti.codepoints.deluxe

import de.cketti.codepoints.CodePoints
import de.cketti.codepoints.codePointAt as intCodePointAt
import de.cketti.codepoints.codePointBefore as intCodePointBefore
import de.cketti.codepoints.forEachCodePoint as intForEachCodePoint
import de.cketti.codepoints.forEachCodePointIndexed as intForEachCodePointIndexed

/**
* Returns the Unicode code point at the specified index.
Expand Down Expand Up @@ -54,50 +55,18 @@ fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length):
}

/**
* Performs given [block] for each [CodePoint] in the [CharSequence]
* between [startIndex] (inclusive) and [endIndex] (exclusive).
* Performs given [action] for each [CodePoint] in the [CharSequence].
*
* @see forEachCodePointIndexed
*/
inline fun CharSequence.forEachCodePoint(
startIndex: Int = 0,
endIndex: Int = length,
block: (codePoint: CodePoint) -> Unit,
) = forEachCodePointIndexed(startIndex, endIndex) { _, codePoint -> block(codePoint) }
action: (codePoint: CodePoint) -> Unit,
) = intForEachCodePoint { action(it.toCodePoint()) }

/**
* Performs given [block] for each [CodePoint] in the [CharSequence]
* between [startIndex] (inclusive) and [endIndex] (exclusive).
* Performs given [action] for each [CodePoint] in the [CharSequence].
* Provides the start index for the given codepoint
*
* @param startIndex index of the first codepoint in CharSequence to start with (defaults to `0`)
* @param endIndex index of the last codepoint in CharSequence to stop at (defaults to `length`)
*/
inline fun CharSequence.forEachCodePointIndexed(
startIndex: Int = 0,
endIndex: Int = length,
block: (index: Int, codePoint: CodePoint) -> Unit,
) {
require(startIndex <= endIndex) {
"startIndex ($startIndex) must be less than or equal to endIndex ($endIndex)"
}
require(endIndex <= length) {
"endIndex ($endIndex) must be less than or equal to char sequence's length ($length)"
}
val str = this
var index = startIndex
while (index < endIndex) {
val codePointStartIndex = index
val firstChar = str[index]
index++
if (firstChar.isHighSurrogate() && index < endIndex) {
val nextChar = str[index]
if (nextChar.isLowSurrogate()) {
block(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar).toCodePoint())
index++
continue
}
}
block(codePointStartIndex, firstChar.toCodePoint())
}
}
action: (index: Int, codePoint: CodePoint) -> Unit,
) = intForEachCodePointIndexed { index, codePoint -> action(index, codePoint.toCodePoint()) }
Original file line number Diff line number Diff line change
Expand Up @@ -81,45 +81,6 @@ class CharSequenceExtensionsTest {
)
}

@Test
fun forEachCodepoint_with_non_default_indexes() {
fun CharSequence.collectCodepoints(
startIndex: Int,
endIndex: Int,
): List<CodePoint> = buildList { forEachCodePoint(startIndex, endIndex) { add(it) } }

assertEquals(
listOf('a'.toCodePoint()),
"ab".collectCodepoints(0, 1),
)
assertEquals(
listOf('b'.toCodePoint()),
"ab".collectCodepoints(1, 2),
)
assertEquals(
listOf('a'.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
)
assertEquals(
listOf(0xD83E.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(0, 1),
)
assertEquals(
listOf(0xDD95.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 2),
)
assertEquals(
listOf(0xDD95.toCodePoint(), 'a'.toCodePoint(), 0xD83E.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
)
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePoint(startIndex = 1, endIndex = 0) { }
}
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePoint(startIndex = 1, endIndex = 2) { }
}
}

@Test
fun forEachCodepointIndexed() {
fun CharSequence.collectCodepoints(): List<Pair<Int, CodePoint>> =
Expand All @@ -146,43 +107,4 @@ class CharSequenceExtensionsTest {
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}

@Test
fun forEachCodepointIndexed_with_non_default_indexes() {
fun CharSequence.collectCodepoints(start: Int, end: Int): List<Pair<Int, CodePoint>> =
buildList { forEachCodePointIndexed(start, end) { index, codepoint -> add(index to codepoint) } }

assertEquals(
listOf(0 to 'a'.toCodePoint()),
"ab".collectCodepoints(0, 1),
)
assertEquals(
listOf(1 to 'b'.toCodePoint()),
"ab".collectCodepoints(1, 2),
)
assertEquals(
listOf(1 to 0x1F995.toCodePoint()),
"a\uD83E\uDD95".collectCodepoints(1, 3),
)
assertEquals(
listOf(
1 to 0xDD95.toCodePoint(),
2 to 'a'.toCodePoint(),
3 to 0xD83E.toCodePoint(),
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
)
assertEquals(
listOf(
2 to 'a'.toCodePoint(),
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
)
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 0) { _, _ -> }
}
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 2) { _, _ -> }
}
}
}
53 changes: 44 additions & 9 deletions kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ fun CharSequence.codePointAt(index: Int): Int {
*
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
* sequence.
*
*
* If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high
* surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
* surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
* returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`.
*
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
* [IndexOutOfBoundsException].
*/
fun CharSequence.codePointBefore(index: Int): Int {
Expand All @@ -78,11 +78,11 @@ fun CharSequence.codePointBefore(index: Int): Int {

/**
* Returns the number of Unicode code points in the specified text range of this `CharSequence`.
*
* The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
*
* The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
* length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as
* one code point each.
*
*
* If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than
* `endIndex`, this method throws an [IndexOutOfBoundsException].
*/
Expand All @@ -108,10 +108,10 @@ fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int {
}

/**
* Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points.
*
* Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points.
*
* Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each.
*
*
* If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and
* the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is
* negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this
Expand Down Expand Up @@ -153,3 +153,38 @@ fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int {
return currentIndex + 1
}
}

/**
* Performs given [action] for each codepoint in the [CharSequence]s.
*
* @see forEachCodePointIndexed
*/
inline fun CharSequence.forEachCodePoint(
action: (codePoint: Int) -> Unit,
) = forEachCodePointIndexed { _, codePoint -> action(codePoint) }

/**
* Performs given [action] for each codepoint in the [CharSequence].
* Provides the start index for the given codepoint
*/
inline fun CharSequence.forEachCodePointIndexed(
action: (index: Int, codePoint: Int) -> Unit,
) {
val str = this
var index = 0
val endIndex = length
while (index < endIndex) {
val codePointStartIndex = index
val firstChar = str[index]
index++
if (firstChar.isHighSurrogate() && index < endIndex) {
val nextChar = str[index]
if (nextChar.isLowSurrogate()) {
action(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar))
index++
continue
}
}
action(codePointStartIndex, firstChar.code)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,53 @@ class CharSequenceExtensionsTest {
"\uD83E\uDD95".offsetByCodePoints(index = 2, codePointOffset = -2)
}
}

@Test
fun forEachCodepoint() {
fun CharSequence.collectCodepoints(): List<Int> = buildList { forEachCodePoint { add(it) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf('a'.code),
"a".collectCodepoints(),
)
assertEquals(
listOf('a'.code, 0xFFFF),
"a\uFFFF".collectCodepoints(),
)
assertEquals(
listOf(0x1F995, 'a'.code, 0x1F996),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}

@Test
fun forEachCodepointIndexed() {
fun CharSequence.collectCodepoints(): List<Pair<Int, Int>> =
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.code),
"a".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.code, 1 to 0x1F995),
"a\uD83E\uDD95".collectCodepoints(),
)
assertEquals(
listOf(
0 to 0x1F995,
2 to 'a'.code,
3 to 0x1F996,
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}
}

0 comments on commit d483391

Please sign in to comment.