diff --git a/gradle.properties b/gradle.properties index 470ff79f..16967811 100644 --- a/gradle.properties +++ b/gradle.properties @@ -8,9 +8,9 @@ android.nonTransitiveRClass=true kotlin.native.ignoreIncorrectDependencies=true kotlin.mpp.enableCInteropCommonization=true kotlin.mpp.applyDefaultHierarchyTemplate=false -# common, kotlinx, korlibs, okio, ktor2 +# dev, common, kotlinx, korlibs, okio, ktor2 # dev will include all modules in settings.gradle.kts but use kotlinx dep for engine -libBuildType=common +libBuildType=okio SONATYPE_HOST=CENTRAL_PORTAL diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 9d67cb8c..ec73e43b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -3,8 +3,8 @@ agp = "8.5.2" kotlin = "2.0.20" compileSdk = "34" minSdk = "21" -libraryVersion = "0.1.6" -ktor = "3.0.0-beta-2" +libraryVersion = "0.1.7" +ktor = "3.0.0-rc-1" ktor2 = "2.3.12" coroutines = "1.8.1" kotlinxDatetime = "0.6.1" @@ -17,7 +17,7 @@ dokka = "1.9.20" korlibs = "6.0.1" mavenPublish = "0.29.0" -stately-concurrent = "2.0.7" +stately = "2.1.0" jsoup = "1.18.1" [libraries] @@ -26,6 +26,12 @@ kotlinx-coroutines-core = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-c kotlinx-coroutines-test = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-test", version.ref = "coroutines" } ktor2-io = { module = "io.ktor:ktor-io", version.ref = "ktor2" } ktor2-http = { module = "io.ktor:ktor-http", version.ref = "ktor2" } +ktor2-client-core = { module = "io.ktor:ktor-client-core", version.ref = "ktor2" } +ktor2-client-darwin = { module = "io.ktor:ktor-client-darwin", version.ref = "ktor2" } +ktor2-client-cio = { module = "io.ktor:ktor-client-cio", version.ref = "ktor2" } +ktor2-client-okhttp = { module = "io.ktor:ktor-client-okhttp", version.ref = "ktor2" } +ktor2-client-js = { module = "io.ktor:ktor-client-js", version.ref = "ktor2" } +ktor2-client-win = { module = "io.ktor:ktor-client-winhttp", version.ref = "ktor2" } ktor-io = { module = "io.ktor:ktor-io", version.ref = "ktor" } ktor-http = { module = "io.ktor:ktor-http", version.ref = "ktor" } ktor-client-core = { module = "io.ktor:ktor-client-core", version.ref = "ktor" } @@ -40,7 +46,7 @@ kotlinx-io = { module = "org.jetbrains.kotlinx:kotlinx-io-core", version.ref = " codepoints = { module = "de.cketti.unicode:kotlin-codepoints-deluxe", version.ref = "codepoints" } korlibs-io = { module = "com.soywiz:korlibs-io", version.ref = "korlibs" } korlibs-io-network-core = { module = "com.soywiz:korlibs-io-network-core", version.ref = "korlibs" } -stately-concurrent = { module = "co.touchlab:stately-concurrent-collections", version.ref = "stately-concurrent" } +stately-concurrency = { module = "co.touchlab:stately-concurrency", version.ref = "stately" } jsoup = { module = "org.jsoup:jsoup", version.ref = "jsoup" } okio = { module = "com.squareup.okio:okio", version.ref = "okio" } okio-nodefilesystem = { module = "com.squareup.okio:okio-nodefilesystem", version.ref = "okio" } diff --git a/ksoup-network-ktor2/module.yaml b/ksoup-network-ktor2/module.yaml index 3be64cbe..02dc982e 100644 --- a/ksoup-network-ktor2/module.yaml +++ b/ksoup-network-ktor2/module.yaml @@ -14,19 +14,19 @@ repositories: dependencies: - $libs.kotlinx.coroutines.core - ../ksoup: compile-only - - $libs.ktor.client.core: exported + - $libs.ktor2.client.core: exported dependencies@jvmAndAndroid: - - $libs.ktor.client.okhttp + - $libs.ktor2.client.okhttp dependencies@apple: - - $libs.ktor.client.darwin + - $libs.ktor2.client.darwin dependencies@js: - - $libs.ktor.client.js + - $libs.ktor2.client.js dependencies@mingw: - - $libs.ktor.client.win + - $libs.ktor2.client.win dependencies@linux: - - $libs.ktor.client.cio \ No newline at end of file + - $libs.ktor2.client.cio \ No newline at end of file diff --git a/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt b/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt index 749cb85c..f28297ec 100644 --- a/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt +++ b/ksoup-network-ktor2/src/com/fleeksoft/ksoup/network/SourceExtNetwork.kt @@ -3,6 +3,6 @@ package com.fleeksoft.ksoup.network import com.fleeksoft.ksoup.io.SourceReader import com.fleeksoft.ksoup.io.from import io.ktor.client.statement.* -import io.ktor.utils.io.* +import io.ktor.util.* suspend fun HttpResponse.asSourceReader() = SourceReader.from(this.bodyAsChannel().toByteArray()) diff --git a/ksoup-test/module.yaml b/ksoup-test/module.yaml index 13af50ff..3198a7f1 100644 --- a/ksoup-test/module.yaml +++ b/ksoup-test/module.yaml @@ -17,7 +17,7 @@ test-dependencies: - $libs.codepoints - $libs.kotlinx.coroutines.test - $libs.kotlinx.datetime - - $libs.stately.concurrent + - $libs.stately.concurrency test-dependencies@jvmAndAndroid: - $libs.jsoup diff --git a/ksoup-test/test/com/fleeksoft/ksoup/parser/HtmlParserTest.kt b/ksoup-test/test/com/fleeksoft/ksoup/parser/HtmlParserTest.kt index 20289c25..ab1dec6f 100644 --- a/ksoup-test/test/com/fleeksoft/ksoup/parser/HtmlParserTest.kt +++ b/ksoup-test/test/com/fleeksoft/ksoup/parser/HtmlParserTest.kt @@ -2251,6 +2251,26 @@ class HtmlParserTest { assertEquals("", img.outerHtml()) } + @Test + fun tableInPInQuirksMode() { + var html = "

Hello table data

" + var doc: Document = Ksoup.parse(html) + assertEquals(Document.QuirksMode.quirks, doc.quirksMode()) + assertEquals( + "

Hello table data

", // quirks, allows table in p + TextUtil.normalizeSpaces(doc.body().html()) + ) + + // doctype set, no quirks + html = "

Hello table data

" + doc = Ksoup.parse(html) + assertEquals(Document.QuirksMode.noQuirks, doc.quirksMode()) + assertEquals( + "

Hello table data

", // no quirks, p gets closed + TextUtil.normalizeSpaces(doc.body().html()) + ) + } + companion object { private fun dupeAttributeData(): List> { return listOf( diff --git a/ksoup-test/test/com/fleeksoft/ksoup/select/SelectorTest.kt b/ksoup-test/test/com/fleeksoft/ksoup/select/SelectorTest.kt index f1d294a5..21b68c87 100644 --- a/ksoup-test/test/com/fleeksoft/ksoup/select/SelectorTest.kt +++ b/ksoup-test/test/com/fleeksoft/ksoup/select/SelectorTest.kt @@ -1,7 +1,6 @@ package com.fleeksoft.ksoup.select import com.fleeksoft.ksoup.Ksoup -import com.fleeksoft.ksoup.TestHelper import com.fleeksoft.ksoup.nodes.Document import com.fleeksoft.ksoup.nodes.Element import com.fleeksoft.ksoup.parser.Parser @@ -85,7 +84,7 @@ class SelectorTest { // Locale.setDefault(locale) val h = "
" + - "
" + "
" val doc = Ksoup.parse(h) val withTitle = doc.select("[title]") assertEquals(4, withTitle.size) @@ -1226,6 +1225,55 @@ class SelectorTest { assertSelectedOwnText(emptyAttr, "Three") } + @Test + fun divHasSpanPreceding() { + val html = "
abcdef
" + val q = "div:has(span + a)" + + val doc: Document = Ksoup.parse(html) + val els: Elements = doc.select(q) + assertEquals(1, els.size) + assertEquals("div", els.first()?.normalName()) + } + + @Test + fun divHasDivPreceding() { + val html = """ +
+
hello
+
there
+ +
+ """.trimIndent() + + val q = "div:has(>div + div)" + + val doc: Document = Ksoup.parse(html) + val els: Elements = doc.select(q) + assertEquals(1, els.size) + assertEquals("div", els.first()?.normalName()) + assertEquals("1", els.first()?.id()) + } + + @Test + fun nestedMultiHas() { + val html = + "" + + "" + + "" + + "
" + + "
hello
" + + "
world
" + + "
" + + "" + val document: Document = Ksoup.parse(html) + + val q = "div:has(> div:has(> span) + div:has(> span))" + val els: Elements = document.select(q) + assertEquals(1, els.size) + assertEquals("o", els[0].id()) + } + companion object { /** Test that the selected elements match exactly the specified IDs. */ fun assertSelectedIds( diff --git a/ksoup/module.yaml b/ksoup/module.yaml index 9c79b217..a134e7f0 100644 --- a/ksoup/module.yaml +++ b/ksoup/module.yaml @@ -13,7 +13,7 @@ repositories: dependencies: - ../ksoup-engine-common: exported - $libs.codepoints - - $libs.stately.concurrent + - $libs.stately.concurrency test-dependencies: - $kotlin-test \ No newline at end of file diff --git a/ksoup/src/com/fleeksoft/ksoup/parser/HtmlTreeBuilderState.kt b/ksoup/src/com/fleeksoft/ksoup/parser/HtmlTreeBuilderState.kt index 954e341b..e639615e 100644 --- a/ksoup/src/com/fleeksoft/ksoup/parser/HtmlTreeBuilderState.kt +++ b/ksoup/src/com/fleeksoft/ksoup/parser/HtmlTreeBuilderState.kt @@ -22,7 +22,6 @@ public enum class HtmlTreeBuilderState { tb.insertCommentNode(t.asComment()) } else if (t.isDoctype()) { // todo: parse error check on expected doctypes - // todo: quirk state check on doctype ids val d: Token.Doctype = t.asDoctype() val doctype = DocumentType( @@ -33,10 +32,16 @@ public enum class HtmlTreeBuilderState { doctype.setPubSysKey(d.pubSysKey) tb.document.appendChild(doctype) tb.onNodeInserted(doctype) - if (d.isForceQuirks) tb.document.quirksMode(Document.QuirksMode.quirks) + // todo: quirk state check on more doctype ids, if deemed useful (most are ancient legacy and presumably irrelevant) + if (d.isForceQuirks || doctype.name() != "html" || doctype.publicId().equals("HTML", ignoreCase = true)) tb.document.quirksMode( + Document.QuirksMode.quirks + ) tb.transition(BeforeHtml) } else { // todo: check not iframe srcdoc + + // todo: check not iframe srcdoc + tb.document.quirksMode(Document.QuirksMode.quirks) // missing doctype tb.transition(BeforeHtml) return tb.process(t) // re-process token } diff --git a/ksoup/src/com/fleeksoft/ksoup/select/StructuralEvaluator.kt b/ksoup/src/com/fleeksoft/ksoup/select/StructuralEvaluator.kt index 2d6351ac..4d257d99 100644 --- a/ksoup/src/com/fleeksoft/ksoup/select/StructuralEvaluator.kt +++ b/ksoup/src/com/fleeksoft/ksoup/select/StructuralEvaluator.kt @@ -1,5 +1,6 @@ package com.fleeksoft.ksoup.select +import com.fleeksoft.ksoup.internal.SoftPool import com.fleeksoft.ksoup.internal.StringUtil import com.fleeksoft.ksoup.nodes.Element import com.fleeksoft.ksoup.nodes.NodeIterator @@ -48,8 +49,7 @@ public abstract class StructuralEvaluator(public val evaluator: Evaluator) : Eva internal class Has(evaluator: Evaluator) : StructuralEvaluator(evaluator) { companion object { - private val nodeIterator: ThreadLocal> = - ThreadLocal { NodeIterator(Element("html"), Element::class) } + private val ElementIterPool: SoftPool> = SoftPool { NodeIterator(Element("html"), Element::class) } } private val checkSiblings = evalWantsSiblings(evaluator) // evaluating against siblings (or children) @@ -63,16 +63,18 @@ public abstract class StructuralEvaluator(public val evaluator: Evaluator) : Eva } sib = sib.nextElementSibling() } - } else { - // otherwise we only want to match children (or below), and not the input element. And we want to minimize GCs so reusing the Iterator obj - val it = nodeIterator.get() - it.restart(element) + } + // otherwise we only want to match children (or below), and not the input element. And we want to minimize GCs so reusing the Iterator obj + val it = ElementIterPool.borrow() + it.restart(element) + try { while (it.hasNext()) { val el = it.next() if (el === element) continue // don't match self, only descendants - if (evaluator.matches(element, el)) return true } + } finally { + ElementIterPool.release(it) } return false } diff --git a/publishToMaven.sh b/publishToMaven.sh index 2e22af6a..35fe2a1c 100755 --- a/publishToMaven.sh +++ b/publishToMaven.sh @@ -16,7 +16,7 @@ if [ "$1" == "--remote" ]; then fi # Default build types if none are passed -default_build_types=("kotlinx" "korlibs" "ktor2" "okio") +default_build_types=("common" "kotlinx" "korlibs" "ktor2" "okio") # If build types are passed, use them; otherwise, use the default list if [ "$#" -ge 1 ]; then diff --git a/settings.gradle.kts b/settings.gradle.kts index 4e9ddbc2..4aa2e4f6 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -24,24 +24,26 @@ dependencyResolutionManagement { val libBuildType = settings.providers.gradleProperty("libBuildType").get() include("ksoup-engine-common") -if (libBuildType == "korlibs" || libBuildType == "common") { +if (libBuildType == "korlibs" || libBuildType == "dev") { include("ksoup-engine-korlibs", "ksoup-network-korlibs") } -if (libBuildType == "kotlinx" || libBuildType == "common") { +if (libBuildType == "kotlinx" || libBuildType == "dev") { include("ksoup-engine-kotlinx", "ksoup-network") } -if (libBuildType == "okio" || libBuildType == "common") { +if (libBuildType == "okio" || libBuildType == "dev") { include("ksoup-engine-okio", "ksoup-network-ktor2") } -if (libBuildType == "ktor2" || libBuildType == "common") { +if (libBuildType == "ktor2" || libBuildType == "dev") { include("ksoup-engine-ktor2", "ksoup-network-ktor2") } -include("ksoup") -include("ksoup-test") +if (libBuildType != "common") { + include("ksoup") + include("ksoup-test") +} //include("sample:shared", "sample:desktop") //include("sample:android", "sample:ios") \ No newline at end of file