diff --git a/Package.swift b/Package.swift index cc00745..f2a52e9 100644 --- a/Package.swift +++ b/Package.swift @@ -1,4 +1,4 @@ -// swift-tools-version:5.7 +// swift-tools-version:5.0 import PackageDescription @@ -8,12 +8,7 @@ let package = Package( .library(name: "SwiftSoup", targets: ["SwiftSoup"]) ], targets: [ - .target(name: "SwiftSoup", - path: "Sources", - exclude: [], - resources: [.copy("PrivacyInfo.xcprivacy")]), + .target(name: "SwiftSoup", path: "Sources"), .testTarget(name: "SwiftSoupTests", dependencies: ["SwiftSoup"]) ] ) - - diff --git a/README.md b/README.md index 5a0eb88..8de2990 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ SwiftSoup

-[![StandWithPalestine](https://github.com/Safouene1/support-palestine-banner/blob/master/StandWithPalestine.svg)](https://bdsmovement.net/) + ![Platform OS X | iOS | tvOS | watchOS | Linux](https://img.shields.io/badge/platform-Linux%20%7C%20OS%20X%20%7C%20iOS%20%7C%20tvOS%20%7C%20watchOS-orange.svg) [![SPM compatible](https://img.shields.io/badge/SPM-compatible-4BC51D.svg?style=flat)](https://github.com/apple/swift-package-manager) ![🐧 linux: ready](https://img.shields.io/badge/%F0%9F%90%A7%20linux-ready-red.svg) @@ -12,9 +12,6 @@ [![License](https://img.shields.io/cocoapods/l/SwiftSoup.svg?style=flat)](http://cocoapods.org/pods/SwiftSoup) [![Twitter](https://img.shields.io/badge/twitter-@scinfu-blue.svg?style=flat)](http://twitter.com/scinfu) -[![ReadMeSupportPalestine](https://github.com/Safouene1/support-palestine-banner/blob/master/banner-support.svg)](https://bdsmovement.net/) - - `SwiftSoup` is a pure Swift library, cross-platform (macOS, iOS, tvOS, watchOS and Linux!), for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jQuery-like methods. `SwiftSoup` implements the WHATWG HTML5 specification, and parses HTML to the same DOM as modern browsers do. * Scrape and parse HTML from a URL, file, or string @@ -214,46 +211,6 @@ do { } ``` -If you supply a whole HTML document, with a `` tag, the `clean(_: String, _: String, _: Whitelist)` method will just return the cleaned body HTML. -You can clean both `` and `` by providing a `Whitelist` for each tags. - -```swift -do { - let unsafe: String = """ - - - Hey - - - -

Hello, world!

- - - """ - - var headWhitelist: Whitelist = { - do { - let customWhitelist = Whitelist.none() - try customWhitelist - .addTags("meta", "style", "title") - return customWhitelist - } catch { - fatalError("Couldn't init head whitelist") - } - }() - - let unsafeDocument: Document = try SwiftSoup.parse(unsafe) - let safe: String = try SwiftSoup.Cleaner(headWhitelist: headWhitelist, bodyWhitelist: .relaxed()) - .clean(unsafeDocument) - .html() - // now: Hey

Hello, world!

-} catch Exception.Error(let type, let message) { - print(message) -} catch { - print("error") -} -``` - ### Discussion A cross-site scripting attack against your site can really ruin your day, not to mention your users'. Many sites avoid XSS attacks by not allowing HTML in user submitted content: they enforce plain text only, or use an alternative markup syntax like wiki-text or Markdown. These are seldom optimal solutions for the user, as they lower expressiveness, and force the user to learn a new syntax. diff --git a/Sources/Element.swift b/Sources/Element.swift index 2566d1a..630b991 100644 --- a/Sources/Element.swift +++ b/Sources/Element.swift @@ -1226,9 +1226,9 @@ open class Element: Node { // selfclosing includes unknown tags, isEmpty defines tags that are always empty if (childNodes.isEmpty && _tag.isSelfClosing()) { if (out.syntax() == OutputSettings.Syntax.html && _tag.isEmpty()) { - accum.append(" />") // for "always empty" tags. selfclosing is ignored but retained for xml/xhtml compatibility + accum.append(">") } else { - accum.append(" />") // in xml + accum.append(" />") // in html, in xml } } else { accum.append(">") diff --git a/Sources/HtmlTreeBuilderState.swift b/Sources/HtmlTreeBuilderState.swift index 2811c02..30bf24a 100644 --- a/Sources/HtmlTreeBuilderState.swift +++ b/Sources/HtmlTreeBuilderState.swift @@ -1530,17 +1530,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol { } private static func handleRcData(_ startTag: Token.StartTag, _ tb: HtmlTreeBuilder)throws { + try tb.insert(startTag) tb.tokeniser.transition(TokeniserState.Rcdata) tb.markInsertionMode() tb.transition(.Text) - try tb.insert(startTag) } private static func handleRawtext(_ startTag: Token.StartTag, _ tb: HtmlTreeBuilder)throws { + try tb.insert(startTag) tb.tokeniser.transition(TokeniserState.Rawtext) tb.markInsertionMode() tb.transition(.Text) - try tb.insert(startTag) } // lists of tags to search through. A little harder to read here, but causes less GC than dynamic varargs. diff --git a/Sources/NodeTraversor.swift b/Sources/NodeTraversor.swift index 9a037d7..02d3a02 100644 --- a/Sources/NodeTraversor.swift +++ b/Sources/NodeTraversor.swift @@ -23,29 +23,28 @@ class NodeTraversor { * Start a depth-first traverse of the root and all of its descendants. * @param root the root node point to traverse. */ - open func traverse(_ root: Node?) throws { + open func traverse(_ root: Node?)throws { var node: Node? = root var depth: Int = 0 while (node != nil) { - try visitor.head(node!, depth) - if (node!.childNodeSize() > 0) { - node = node!.childNode(0) - depth+=1 - } else { - while (node!.nextSibling() == nil && depth > 0) { - let parent = node!.getParentNode() - try visitor.tail(node!, depth) - node = parent - depth-=1 - } - let nextSib = node!.nextSibling() - try visitor.tail(node!, depth) - if (node === root) { - break - } - node = nextSib - } + try visitor.head(node!, depth) + if (node!.childNodeSize() > 0) { + node = node!.childNode(0) + depth+=1 + } else { + while (node!.nextSibling() == nil && depth > 0) { + try visitor.tail(node!, depth) + node = node!.getParentNode() + depth-=1 + } + try visitor.tail(node!, depth) + if (node === root) { + break + } + node = node!.nextSibling() + } } } + } diff --git a/Sources/NodeVisitor.swift b/Sources/NodeVisitor.swift index 3f20e43..4d6d7c2 100644 --- a/Sources/NodeVisitor.swift +++ b/Sources/NodeVisitor.swift @@ -18,7 +18,7 @@ import Foundation */ public protocol NodeVisitor { /** - * Callback for when a node is first visited. {@code head} cannot safely call {@code node.remove()}. + * Callback for when a node is first visited. * * @param node the node being visited. * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node @@ -27,7 +27,7 @@ public protocol NodeVisitor { func head(_ node: Node, _ depth: Int)throws /** - * Callback for when a node is last visited, after all of its descendants have been visited. {@code tail} can safely call {@code node.remove()}. + * Callback for when a node is last visited, after all of its descendants have been visited. * * @param node the node being visited. * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node diff --git a/Sources/PrivacyInfo.xcprivacy b/Sources/PrivacyInfo.xcprivacy deleted file mode 100644 index 393cde2..0000000 --- a/Sources/PrivacyInfo.xcprivacy +++ /dev/null @@ -1,16 +0,0 @@ - - - - - NSPrivacyTracking - - NSPrivacyTrackingDomains - - NSPrivacyCollectedDataTypes - - - NSPrivacyAccessedAPITypes - - - - diff --git a/Sources/TokeniserState.swift b/Sources/TokeniserState.swift index e55d79b..707248a 100644 --- a/Sources/TokeniserState.swift +++ b/Sources/TokeniserState.swift @@ -1097,8 +1097,7 @@ enum TokeniserState: TokeniserStateProtocol { break case TokeniserStateVars.eof: t.eofError(self) - // note: fall through to > case - fallthrough + // note: fall through to > case case ">": // catch invalid t.error(self) t.createDoctypePending() diff --git a/SwiftSoup.podspec b/SwiftSoup.podspec index 602db0f..85715da 100644 --- a/SwiftSoup.podspec +++ b/SwiftSoup.podspec @@ -8,7 +8,7 @@ Pod::Spec.new do |s| s.name = 'SwiftSoup' - s.version = '2.7.2' + s.version = '2.6.0' s.summary = 'Swift HTML Parser / Reader, XML , with best of DOM, CSS, and jquery' s.description = <<-DESC SwiftSoup is a Swift library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods. @@ -31,8 +31,4 @@ SwiftSoup is a Swift library for working with real-world HTML. It provides a ver #s.resource_bundles = { # 'SwiftSoup' => ['Assets/*.properties'] #} - s.ios.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']} - s.osx.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']} - s.watchos.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']} - s.tvos.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']} end diff --git a/SwiftSoup.xcodeproj/project.pbxproj b/SwiftSoup.xcodeproj/project.pbxproj index b8e6549..e64b3cc 100644 --- a/SwiftSoup.xcodeproj/project.pbxproj +++ b/SwiftSoup.xcodeproj/project.pbxproj @@ -15,10 +15,6 @@ 6710743227F580530048E7C1 /* Mutex.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6710743027F580530048E7C1 /* Mutex.swift */; }; 6710743327F580530048E7C1 /* Mutex.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6710743027F580530048E7C1 /* Mutex.swift */; }; 6710743427F580530048E7C1 /* Mutex.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6710743027F580530048E7C1 /* Mutex.swift */; }; - 722E96332B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; }; - 722E96342B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; }; - 722E96352B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; }; - 722E96362B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; }; 8C19C82F1DB7E5D200B8FC22 /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; }; 8C19C8311DB7E8CD00B8FC22 /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; }; 8C19C8331DB7ECB700B8FC22 /* ParseErrorList.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8321DB7ECB700B8FC22 /* ParseErrorList.swift */; }; @@ -302,7 +298,6 @@ /* Begin PBXFileReference section */ 6710742B27F57FA60048E7C1 /* UnfairLock.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UnfairLock.swift; sourceTree = ""; }; 6710743027F580530048E7C1 /* Mutex.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Mutex.swift; sourceTree = ""; }; - 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; lastKnownFileType = text.xml; path = PrivacyInfo.xcprivacy; sourceTree = ""; }; 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Tokeniser.swift; sourceTree = ""; }; 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ParseError.swift; sourceTree = ""; }; 8C19C8321DB7ECB700B8FC22 /* ParseErrorList.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ParseErrorList.swift; sourceTree = ""; }; @@ -615,7 +610,6 @@ BD3B5BAA1FBED934001FDB3B /* InfoMac.plist */, BD3B5BED1FC063BD001FDB3B /* InfotvOS.plist */, BD3B5C301FC06424001FDB3B /* InfoWatchOS.plist */, - 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */, ); path = Sources; sourceTree = ""; @@ -909,7 +903,6 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - 722E96332B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -937,7 +930,6 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - 722E96342B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -945,7 +937,6 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - 722E96352B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -953,7 +944,6 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - 722E96362B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */, ); runOnlyForDeploymentPostprocessing = 0; };