Skip to content

Commit

Permalink
2.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Depal1 committed Jun 30, 2024
1 parent e54e7a6 commit 6d808c0
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 109 deletions.
9 changes: 2 additions & 7 deletions Package.swift
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// swift-tools-version:5.7
// swift-tools-version:5.0

import PackageDescription

Expand All @@ -8,12 +8,7 @@ let package = Package(
.library(name: "SwiftSoup", targets: ["SwiftSoup"])
],
targets: [
.target(name: "SwiftSoup",
path: "Sources",
exclude: [],
resources: [.copy("PrivacyInfo.xcprivacy")]),
.target(name: "SwiftSoup", path: "Sources"),
.testTarget(name: "SwiftSoupTests", dependencies: ["SwiftSoup"])
]
)


45 changes: 1 addition & 44 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<img src="https://raw.githubusercontent.com/scinfu/SwiftSoup/master/swiftsoup.png" alt="SwiftSoup" title="SwiftSoup">
</p>

[![StandWithPalestine](https://github.com/Safouene1/support-palestine-banner/blob/master/StandWithPalestine.svg)](https://bdsmovement.net/)

![Platform OS X | iOS | tvOS | watchOS | Linux](https://img.shields.io/badge/platform-Linux%20%7C%20OS%20X%20%7C%20iOS%20%7C%20tvOS%20%7C%20watchOS-orange.svg)
[![SPM compatible](https://img.shields.io/badge/SPM-compatible-4BC51D.svg?style=flat)](https://github.com/apple/swift-package-manager)
![🐧 linux: ready](https://img.shields.io/badge/%F0%9F%90%A7%20linux-ready-red.svg)
Expand All @@ -12,9 +12,6 @@
[![License](https://img.shields.io/cocoapods/l/SwiftSoup.svg?style=flat)](http://cocoapods.org/pods/SwiftSoup)
[![Twitter](https://img.shields.io/badge/[email protected]?style=flat)](http://twitter.com/scinfu)

[![ReadMeSupportPalestine](https://github.com/Safouene1/support-palestine-banner/blob/master/banner-support.svg)](https://bdsmovement.net/)


`SwiftSoup` is a pure Swift library, cross-platform (macOS, iOS, tvOS, watchOS and Linux!), for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jQuery-like methods.
`SwiftSoup` implements the WHATWG HTML5 specification, and parses HTML to the same DOM as modern browsers do.
* Scrape and parse HTML from a URL, file, or string
Expand Down Expand Up @@ -214,46 +211,6 @@ do {
}
```

If you supply a whole HTML document, with a `<head>` tag, the `clean(_: String, _: String, _: Whitelist)` method will just return the cleaned body HTML.
You can clean both `<head>` and `<body>` by providing a `Whitelist` for each tags.

```swift
do {
let unsafe: String = """
<html>
<head>
<title>Hey</title>
<script>console.log('hi');</script>
</head>
<body>
<p>Hello, world!</p>
</body>
</html>
"""

var headWhitelist: Whitelist = {
do {
let customWhitelist = Whitelist.none()
try customWhitelist
.addTags("meta", "style", "title")
return customWhitelist
} catch {
fatalError("Couldn't init head whitelist")
}
}()

let unsafeDocument: Document = try SwiftSoup.parse(unsafe)
let safe: String = try SwiftSoup.Cleaner(headWhitelist: headWhitelist, bodyWhitelist: .relaxed())
.clean(unsafeDocument)
.html()
// now: <html><head><title>Hey</title></head><body><p>Hello, world!</p></body></html>
} catch Exception.Error(let type, let message) {
print(message)
} catch {
print("error")
}
```

### Discussion
A cross-site scripting attack against your site can really ruin your day, not to mention your users'. Many sites avoid XSS attacks by not allowing HTML in user submitted content: they enforce plain text only, or use an alternative markup syntax like wiki-text or Markdown. These are seldom optimal solutions for the user, as they lower expressiveness, and force the user to learn a new syntax.

Expand Down
4 changes: 2 additions & 2 deletions Sources/Element.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1226,9 +1226,9 @@ open class Element: Node {
// selfclosing includes unknown tags, isEmpty defines tags that are always empty
if (childNodes.isEmpty && _tag.isSelfClosing()) {
if (out.syntax() == OutputSettings.Syntax.html && _tag.isEmpty()) {
accum.append(" />") // <img /> for "always empty" tags. selfclosing is ignored but retained for xml/xhtml compatibility
accum.append(">")
} else {
accum.append(" />") // <img /> in xml
accum.append(" />") // <img> in html, <img /> in xml
}
} else {
accum.append(">")
Expand Down
4 changes: 2 additions & 2 deletions Sources/HtmlTreeBuilderState.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1530,17 +1530,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
}

private static func handleRcData(_ startTag: Token.StartTag, _ tb: HtmlTreeBuilder)throws {
try tb.insert(startTag)
tb.tokeniser.transition(TokeniserState.Rcdata)
tb.markInsertionMode()
tb.transition(.Text)
try tb.insert(startTag)
}

private static func handleRawtext(_ startTag: Token.StartTag, _ tb: HtmlTreeBuilder)throws {
try tb.insert(startTag)
tb.tokeniser.transition(TokeniserState.Rawtext)
tb.markInsertionMode()
tb.transition(.Text)
try tb.insert(startTag)
}

// lists of tags to search through. A little harder to read here, but causes less GC than dynamic varargs.
Expand Down
37 changes: 18 additions & 19 deletions Sources/NodeTraversor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,28 @@ class NodeTraversor {
* Start a depth-first traverse of the root and all of its descendants.
* @param root the root node point to traverse.
*/
open func traverse(_ root: Node?) throws {
open func traverse(_ root: Node?)throws {
var node: Node? = root
var depth: Int = 0

while (node != nil) {
try visitor.head(node!, depth)
if (node!.childNodeSize() > 0) {
node = node!.childNode(0)
depth+=1
} else {
while (node!.nextSibling() == nil && depth > 0) {
let parent = node!.getParentNode()
try visitor.tail(node!, depth)
node = parent
depth-=1
}
let nextSib = node!.nextSibling()
try visitor.tail(node!, depth)
if (node === root) {
break
}
node = nextSib
}
try visitor.head(node!, depth)
if (node!.childNodeSize() > 0) {
node = node!.childNode(0)
depth+=1
} else {
while (node!.nextSibling() == nil && depth > 0) {
try visitor.tail(node!, depth)
node = node!.getParentNode()
depth-=1
}
try visitor.tail(node!, depth)
if (node === root) {
break
}
node = node!.nextSibling()
}
}
}

}
4 changes: 2 additions & 2 deletions Sources/NodeVisitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import Foundation
*/
public protocol NodeVisitor {
/**
* Callback for when a node is first visited. {@code head} cannot safely call {@code node.remove()}.
* Callback for when a node is first visited.
*
* @param node the node being visited.
* @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
Expand All @@ -27,7 +27,7 @@ public protocol NodeVisitor {
func head(_ node: Node, _ depth: Int)throws

/**
* Callback for when a node is last visited, after all of its descendants have been visited. {@code tail} can safely call {@code node.remove()}.
* Callback for when a node is last visited, after all of its descendants have been visited.
*
* @param node the node being visited.
* @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
Expand Down
16 changes: 0 additions & 16 deletions Sources/PrivacyInfo.xcprivacy

This file was deleted.

3 changes: 1 addition & 2 deletions Sources/TokeniserState.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1097,8 +1097,7 @@ enum TokeniserState: TokeniserStateProtocol {
break
case TokeniserStateVars.eof:
t.eofError(self)
// note: fall through to > case
fallthrough
// note: fall through to > case
case ">": // catch invalid <!DOCTYPE>
t.error(self)
t.createDoctypePending()
Expand Down
6 changes: 1 addition & 5 deletions SwiftSoup.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

Pod::Spec.new do |s|
s.name = 'SwiftSoup'
s.version = '2.7.2'
s.version = '2.6.0'
s.summary = 'Swift HTML Parser / Reader, XML , with best of DOM, CSS, and jquery'
s.description = <<-DESC
SwiftSoup is a Swift library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods.
Expand All @@ -31,8 +31,4 @@ SwiftSoup is a Swift library for working with real-world HTML. It provides a ver
#s.resource_bundles = {
# 'SwiftSoup' => ['Assets/*.properties']
#}
s.ios.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']}
s.osx.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']}
s.watchos.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']}
s.tvos.resource_bundles = {'SwiftSoup' => ['Sources/PrivacyInfo.xcprivacy']}
end
10 changes: 0 additions & 10 deletions SwiftSoup.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@
6710743227F580530048E7C1 /* Mutex.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6710743027F580530048E7C1 /* Mutex.swift */; };
6710743327F580530048E7C1 /* Mutex.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6710743027F580530048E7C1 /* Mutex.swift */; };
6710743427F580530048E7C1 /* Mutex.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6710743027F580530048E7C1 /* Mutex.swift */; };
722E96332B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; };
722E96342B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; };
722E96352B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; };
722E96362B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */; };
8C19C82F1DB7E5D200B8FC22 /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; };
8C19C8311DB7E8CD00B8FC22 /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; };
8C19C8331DB7ECB700B8FC22 /* ParseErrorList.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8321DB7ECB700B8FC22 /* ParseErrorList.swift */; };
Expand Down Expand Up @@ -302,7 +298,6 @@
/* Begin PBXFileReference section */
6710742B27F57FA60048E7C1 /* UnfairLock.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UnfairLock.swift; sourceTree = "<group>"; };
6710743027F580530048E7C1 /* Mutex.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Mutex.swift; sourceTree = "<group>"; };
722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; lastKnownFileType = text.xml; path = PrivacyInfo.xcprivacy; sourceTree = "<group>"; };
8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Tokeniser.swift; sourceTree = "<group>"; };
8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ParseError.swift; sourceTree = "<group>"; };
8C19C8321DB7ECB700B8FC22 /* ParseErrorList.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ParseErrorList.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -615,7 +610,6 @@
BD3B5BAA1FBED934001FDB3B /* InfoMac.plist */,
BD3B5BED1FC063BD001FDB3B /* InfotvOS.plist */,
BD3B5C301FC06424001FDB3B /* InfoWatchOS.plist */,
722E96322B7DF76400936F48 /* PrivacyInfo.xcprivacy */,
);
path = Sources;
sourceTree = "<group>";
Expand Down Expand Up @@ -909,7 +903,6 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
722E96332B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down Expand Up @@ -937,23 +930,20 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
722E96342B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
BD3B5BE81FC063BD001FDB3B /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
722E96352B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
BD3B5C2B1FC06423001FDB3B /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
722E96362B7DF76400936F48 /* PrivacyInfo.xcprivacy in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down

0 comments on commit 6d808c0

Please sign in to comment.