Skip to content

Commit

Permalink
Merge pull request #91 from fleeksoft/buffer-fixes
Browse files Browse the repository at this point in the history
Fix byte array read size, fix ktor2 body read and Bump versions
  • Loading branch information
itboy87 authored Oct 11, 2024
2 parents 59651a1 + 1c7c246 commit b6c9ece
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 19 deletions.
6 changes: 3 additions & 3 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
[versions]
agp = "8.5.2"
kotlin = "2.0.20"
kotlin = "2.0.21"
compileSdk = "34"
minSdk = "21"
libraryVersion = "0.1.9"
ktor = "3.0.0-rc-1"
ktor = "3.0.0"
ktor2 = "2.3.12"
coroutines = "1.9.0"
kotlinxDatetime = "0.6.1"
kotlinx-io = "0.5.4"
okio = "3.9.0"
okio = "3.9.1"
dokka = "1.9.20"
kotlinx-benchmark = "0.4.12"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,17 @@ internal class SourceReaderByteArray(bytes: ByteArray) : SourceReader {

override fun read(bytes: ByteArray, offset: Int, length: Int): Int {
var i = offset
var pos = currentPosition
while (exhausted().not() && i < length) {
bytes[i] = source[currentPosition++]
i++
}
return i
val totalRead = currentPosition - pos
return if (totalRead == 0 && exhausted()) {
-1
} else {
totalRead
}
}

override fun readAllBytes(): ByteArray {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public suspend fun Ksoup.parseGetRequest(
val httpResponse = NetworkHelperKtor.instance.get(url, httpRequestBuilder = httpRequestBuilder)
// url can be changed after redirection
val finalUrl = httpResponse.request.url.toString()
return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl)
return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl)
}

/**
Expand All @@ -52,7 +52,7 @@ public suspend fun Ksoup.parseSubmitRequest(
)
// url can be changed after redirection
val finalUrl = httpResponse.request.url.toString()
return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl)
return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl)
}

/**
Expand All @@ -77,5 +77,5 @@ public suspend fun Ksoup.parsePostRequest(
)
// url can be changed after redirection
val finalUrl = httpResponse.request.url.toString()
return parse(sourceReader = httpResponse.asSourceReader(), parser = parser, baseUri = finalUrl)
return parse(html = httpResponse.bodyAsText(), parser = parser, baseUri = finalUrl)
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ package com.fleeksoft.ksoup.network
import com.fleeksoft.ksoup.io.SourceReader
import com.fleeksoft.ksoup.io.from
import io.ktor.client.statement.*
import io.ktor.util.*

suspend fun HttpResponse.asSourceReader() = SourceReader.from(this.bodyAsChannel().toByteArray())
suspend fun HttpResponse.asSourceReader() = SourceReader.from(this.bodyAsText().encodeToByteArray())
53 changes: 53 additions & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/meta/MetadataTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class MetadataTest {
<meta name="description" content="Test Description">
<link rel="canonical" href="https://example.com">
<link rel="icon" href="/favicon.ico">
<link href="/pikacon-32x32.png" rel="shortcut icon" type="image/png">
</head>
</html>
""".trimIndent()
Expand Down Expand Up @@ -61,6 +62,58 @@ class MetadataTest {
assertEquals("Test Description", metaData.description)
assertEquals("https://example.com", metaData.canonical)
assertEquals("https://example.com/favicon.ico", metaData.favicon)
assertEquals("https://example.com/pikacon-32x32.png", metaData.shortcutIcon)
}


fun testParseMetaDataFromReader() {
val html = """
<html>
<head>
<link rel="profile" href="http://gmpg.org/xfn/11">
<link rel="alternate" href="https://animepahe.ru" hreflang="en-us">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="theme-color" content="#373a3c"><!-- Chrome, Firefox OS, Opera and Vivaldi -->
<meta name="msapplication-navbutton-color" content="#373a3c"><!-- Windows Phone -->
<meta name="apple-mobile-web-app-status-bar-style" content="#373a3c"><!-- iOS Safari -->
<meta http-equiv="x-dns-prefetch-control" content="on">
<link rel="preconnect" href="//i.animepahe.ru">
<link rel="preload" href="/app/fonts/QldONTRRphEb_-V7LB6xTA.woff2" as="font" type="font/woff2" crossorigin>
<link rel="preload" href="/app/css/bootstrap.min.css" as="style">
<link rel="preload" href="/app/css/fork-awesome.min.css" as="style">
<link rel="preload" href="/app/css/style.css" as="style">
<link rel="preload" href="/app/js/vendor/bootstrap.bundle.min.js" as="script">
<link rel="preload" href="/app/js/core.js" as="script">
<meta name="msapplication-TileImage" content="https://animepahe.ru/animepahe-270x270.png">
<title>animepahe :: okay-ish anime website</title>
<meta name="description" content="Watch or download anime shows in HD 720p/1080p.">
<meta name="keywords" content="Anime,Pahe,Mini,720p,HD,mp4,English,Subtitle,Hardsub">
<meta name="robots" content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1">
<meta property="og:site_name" content="animepahe">
<meta property="og:locale" content="en_US">
<meta property="og:image" content="https://animepahe.ru/animepahe-270x270.png">
<meta property="og:url" content="https://animepahe.ru">
<meta property="og:type" content="website">
<meta property="og:title" content="cloud anime encoding">
<meta property="og:description" content="Watch or download anime shows in HD 720p/1080p.">
<meta name="author" content="animepahe">
<link href="/apple-touch-icon.png" rel="apple-touch-icon-precomposed" type="image/png">
<link href="/apple-touch-icon.png" rel="shortcut icon">
<link href="/pikacon-32x32.png" rel="shortcut icon" type="image/png">
<link href="/pikacon.ico" rel="shortcut icon" type="image/x-icon">
<link rel="alternate" type="application/rss+xml" title="RSS 2.0" href="https://animepahe.ru/feed">
<link rel="stylesheet" href="/app/css/bootstrap.min.css">
<link rel="stylesheet" href="/app/css/fork-awesome.min.css">
<link rel="stylesheet" href="/app/css/style.css">
</head>
</html>
""".trimIndent()

val metaData = Ksoup.parseMetaData(html, "https://animepahe.ru/")
assertEquals("animepahe :: okay-ish anime website", metaData.title)
assertEquals("cloud anime encoding", metaData.ogTitle)
assertEquals("https://animepahe.ru/animepahe-270x270.png", metaData.ogImage)
assertEquals("https://animepahe.ru/apple-touch-icon.png", metaData.shortcutIcon)
}

}
1 change: 1 addition & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/nodes/DocumentTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class DocumentTest {
assertEquals("", doc.html())
val body = doc.body()
assertEquals("body", body.tagName())
assertNull(doc.headOrNull())
val head = doc.head()
assertEquals("head", head.tagName())
assertEquals(
Expand Down
26 changes: 18 additions & 8 deletions ksoup/src/com/fleeksoft/ksoup/Ksoup.kt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package com.fleeksoft.ksoup

import com.fleeksoft.ksoup.helper.DataUtil
import com.fleeksoft.ksoup.io.Charset
import com.fleeksoft.ksoup.io.FileSource
import com.fleeksoft.ksoup.io.SourceReader
import com.fleeksoft.ksoup.model.MetaData
import com.fleeksoft.ksoup.nodes.Document
import com.fleeksoft.ksoup.nodes.Element
import com.fleeksoft.ksoup.parser.Parser
import com.fleeksoft.ksoup.parser.StreamParser
import com.fleeksoft.ksoup.ported.toSourceFile
import com.fleeksoft.ksoup.safety.Cleaner
import com.fleeksoft.ksoup.safety.Safelist
Expand Down Expand Up @@ -190,9 +190,12 @@ public object Ksoup {
}

fun parseMetaData(element: Element): MetaData {
val title = element.selectFirst("title")?.text()
val el = if (element is Document) {
element.headOrNull() ?: element
} else element
val title = el.selectFirst("title")?.text()
return parseMetaDataInternal(baseUri = element.baseUri(), title = title) { query ->
element.selectFirst(query)
el.selectFirst(query)
}
}

Expand All @@ -201,7 +204,7 @@ public object Ksoup {
baseUri: String = "",
interceptor: ((head: Element, metaData: MetaData) -> Unit)? = null
): MetaData {
val head = parse(html = html, baseUri = baseUri).head()
val head = parse(html = html, baseUri = baseUri).let { doc -> doc.headOrNull() ?: doc }

val title = head.selectFirst("title")?.text()
return parseMetaDataInternal(baseUri = baseUri, title = title) { query ->
Expand All @@ -214,9 +217,10 @@ public object Ksoup {
fun parseMetaData(
sourceReader: SourceReader,
baseUri: String = "",
interceptor: ((headStream: StreamParser, metaData: MetaData) -> Unit)? = null
charset: Charset? = null,
interceptor: ((head: Element, metaData: MetaData) -> Unit)? = null
): MetaData {
val head = DataUtil.streamParser(sourceReader = sourceReader, baseUri = baseUri, null, Parser.htmlParser())
val head = parse(sourceReader = sourceReader, baseUri = baseUri, charsetName = charset?.name).let { doc -> doc.headOrNull() ?: doc }
val title = head.selectFirst("title")?.text()
return parseMetaDataInternal(baseUri = baseUri, title = title) { query ->
head.selectFirst(query)
Expand Down Expand Up @@ -251,10 +255,15 @@ public object Ksoup {

// Fetch favicon
var faviconTag = selectFirst("link[rel~=icon]")?.attr("href")
if (faviconTag != null && !faviconTag.startsWith("http") && baseUri.isNotEmpty()) {
if (faviconTag != null && !faviconTag.startsWith("http", ignoreCase = true) && baseUri.isNotEmpty()) {
faviconTag = baseUri + faviconTag
}

var shortcutIcon = selectFirst("link[rel~=shortcut icon]")?.attr("href")
if (shortcutIcon != null && !shortcutIcon.startsWith("http", ignoreCase = true) && baseUri.isNotEmpty()) {
shortcutIcon = baseUri + shortcutIcon
}

// Create a MetaData object
return MetaData(
ogTitle = ogTitle,
Expand All @@ -273,7 +282,8 @@ public object Ksoup {
canonical = canonicalTag,
htmlTitle = title,
author = author,
favicon = faviconTag
favicon = faviconTag,
shortcutIcon = shortcutIcon,
)
}
}
3 changes: 2 additions & 1 deletion ksoup/src/com/fleeksoft/ksoup/model/MetaData.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ data class MetaData(
val canonical: String? = null,
val htmlTitle: String? = null,
val author: String? = null,
val favicon: String? = null
val favicon: String? = null,
val shortcutIcon: String? = null
)
18 changes: 18 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/nodes/Document.kt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ public class Document(private val namespace: String, private val location: Strin
return html.prependElement("head")
}

/**
Get this document's {@code head} element.
<p>
As a side effect, if this Document does not already have an HTML structure, it will be created. If you do not want
that, use {@code #selectFirst("head")} instead.
@return {@code head} element.
*/
public fun headOrNull(): Element? {
val html: Element = htmlEl()
var el: Element? = html.firstElementChild()
while (el != null) {
if (el.nameIs("head")) return el
el = el.nextElementSibling()
}
return null
}

/**
Get this document's {@code <body>} or {@code <frameset>} element.
<p>
Expand Down
2 changes: 1 addition & 1 deletion ksoup/src/com/fleeksoft/ksoup/nodes/Element.kt
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,7 @@ public open class Element : Node {
): String {
var el: Element? = start
while (el != null) {
if (el.attributes != null && el.attributes!!.hasKey(key)) return el.attributes!![key]
if (el.attributes?.hasKey(key) == true) return el.attributes!![key]
el = el.parent()
}
return ""
Expand Down

0 comments on commit b6c9ece

Please sign in to comment.