Skip to content

Commit

Permalink
Add forEach extensions methods to iterate over codepoints
Browse files Browse the repository at this point in the history
  • Loading branch information
OptimumCode committed Jun 14, 2024
1 parent 20b40c0 commit 03f0ae8
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
)
package de.cketti.codepoints.deluxe

import de.cketti.codepoints.CodePoints
import de.cketti.codepoints.codePointAt as intCodePointAt
import de.cketti.codepoints.codePointBefore as intCodePointBefore

Expand Down Expand Up @@ -51,3 +52,52 @@ fun CharSequence.codePointSequence(): CodePointSequence {
fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
return CodePointIterator(this, startIndex, endIndex)
}

/**
* Performs given [block] for each [CodePoint] in the [CharSequence]
* between [startIndex] (inclusive) and [endIndex] (exclusive).
*
* @see forEachCodePointIndexed
*/
inline fun CharSequence.forEachCodePoint(
startIndex: Int = 0,
endIndex: Int = length,
block: (codePoint: CodePoint) -> Unit,
) = forEachCodePointIndexed(startIndex, endIndex) { _, codePoint -> block(codePoint) }

/**
* Performs given [block] for each [CodePoint] in the [CharSequence]
* between [startIndex] (inclusive) and [endIndex] (exclusive).
* Provides the start index for the given codepoint
*
* @param startIndex index of the first codepoint in CharSequence to start with (defaults to `0`)
* @param endIndex index of the last codepoint in CharSequence to stop at (defaults to `length`)
*/
inline fun CharSequence.forEachCodePointIndexed(
startIndex: Int = 0,
endIndex: Int = length,
block: (index: Int, codePoint: CodePoint) -> Unit,
) {
require(startIndex <= endIndex) {
"startIndex ($startIndex) must be less than or equal to endIndex ($endIndex)"
}
require(endIndex <= length) {
"endIndex ($endIndex) must be less than or equal to char sequence's length ($length)"
}
val str = this
var index = startIndex
while (index < endIndex) {
val codePointStartIndex = index
val firstChar = str[index]
index++
if (firstChar.isHighSurrogate() && index < endIndex) {
val nextChar = str[index]
if (nextChar.isLowSurrogate()) {
block(codePointStartIndex, CodePoints.toCodePoint(firstChar, nextChar).toCodePoint())
index++
continue
}
}
block(codePointStartIndex, firstChar.toCodePoint())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package de.cketti.codepoints.deluxe

import kotlin.test.assertEquals
import kotlin.test.Test
import kotlin.test.assertFailsWith

class CharSequenceExtensionsTest {
@Test
Expand Down Expand Up @@ -57,4 +58,131 @@ class CharSequenceExtensionsTest {
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(1))
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(3))
}

@Test
fun forEachCodepoint() {
fun CharSequence.collectCodepoints(): List<CodePoint> = buildList { forEachCodePoint { add(it) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf('a'.toCodePoint()),
"a".collectCodepoints(),
)
assertEquals(
listOf('a'.toCodePoint(), 0xFFFF.toCodePoint()),
"a\uFFFF".collectCodepoints(),
)
assertEquals(
listOf(0x1F995.toCodePoint(), 'a'.toCodePoint(), 0x1F996.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}

@Test
fun forEachCodepoint_with_non_default_indexes() {
fun CharSequence.collectCodepoints(
startIndex: Int,
endIndex: Int,
): List<CodePoint> = buildList { forEachCodePoint(startIndex, endIndex) { add(it) } }

assertEquals(
listOf('a'.toCodePoint()),
"ab".collectCodepoints(0, 1),
)
assertEquals(
listOf('b'.toCodePoint()),
"ab".collectCodepoints(1, 2),
)
assertEquals(
listOf('a'.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
)
assertEquals(
listOf(0xD83E.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(0, 1),
)
assertEquals(
listOf(0xDD95.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 2),
)
assertEquals(
listOf(0xDD95.toCodePoint(), 'a'.toCodePoint(), 0xD83E.toCodePoint()),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
)
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePoint(startIndex = 1, endIndex = 0) { }
}
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePoint(startIndex = 1, endIndex = 2) { }
}
}

@Test
fun forEachCodepointIndexed() {
fun CharSequence.collectCodepoints(): List<Pair<Int, CodePoint>> =
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }

assertEquals(
emptyList(),
"".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.toCodePoint()),
"a".collectCodepoints(),
)
assertEquals(
listOf(0 to 'a'.toCodePoint(), 1 to 0x1F995.toCodePoint()),
"a\uD83E\uDD95".collectCodepoints(),
)
assertEquals(
listOf(
0 to 0x1F995.toCodePoint(),
2 to 'a'.toCodePoint(),
3 to 0x1F996.toCodePoint(),
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
)
}

@Test
fun forEachCodepointIndexed_with_non_default_indexes() {
fun CharSequence.collectCodepoints(start: Int, end: Int): List<Pair<Int, CodePoint>> =
buildList { forEachCodePointIndexed(start, end) { index, codepoint -> add(index to codepoint) } }

assertEquals(
listOf(0 to 'a'.toCodePoint()),
"ab".collectCodepoints(0, 1),
)
assertEquals(
listOf(1 to 'b'.toCodePoint()),
"ab".collectCodepoints(1, 2),
)
assertEquals(
listOf(1 to 0x1F995.toCodePoint()),
"a\uD83E\uDD95".collectCodepoints(1, 3),
)
assertEquals(
listOf(
1 to 0xDD95.toCodePoint(),
2 to 'a'.toCodePoint(),
3 to 0xD83E.toCodePoint(),
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(1, 4),
)
assertEquals(
listOf(
2 to 'a'.toCodePoint(),
),
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(2, 3),
)
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 0) { _, _ -> }
}
assertFailsWith(IllegalArgumentException::class) {
"a".forEachCodePointIndexed(startIndex = 1, endIndex = 2) { _, _ -> }
}
}
}

0 comments on commit 03f0ae8

Please sign in to comment.