diff --git a/Package.swift b/Package.swift index cc00745..6a8f0df 100644 --- a/Package.swift +++ b/Package.swift @@ -4,6 +4,7 @@ import PackageDescription let package = Package( name: "SwiftSoup", + platforms: [.macOS(.v10_15), .iOS(.v13), .watchOS(.v4)], products: [ .library(name: "SwiftSoup", targets: ["SwiftSoup"]) ], diff --git a/Sources/CharacterReader.swift b/Sources/CharacterReader.swift index 91a90f8..93adc5a 100644 --- a/Sources/CharacterReader.swift +++ b/Sources/CharacterReader.swift @@ -1,193 +1,119 @@ -// -// CharacterReader.swift -// SwiftSoup -// -// Created by Nabil Chatbi on 10/10/16. -// Copyright © 2016 Nabil Chatbi.. All rights reserved. -// - import Foundation -/** - CharacterReader consumes tokens off a string. To replace the old TokenQueue. - */ public final class CharacterReader { private static let empty = "" - public static let EOF: UnicodeScalar = "\u{FFFF}"//65535 - private let input: String.UnicodeScalarView - private var pos: String.UnicodeScalarView.Index - private var mark: String.UnicodeScalarView.Index - //private let stringCache: Array // holds reused strings in this doc, to lessen garbage - + public static let EOF: UnicodeScalar = "\u{FFFF}" // 65535 + private let input: String.UTF8View + private var pos: String.UTF8View.Index + private var mark: String.UTF8View.Index + public init(_ input: String) { - self.input = input.unicodeScalars - self.pos = input.startIndex - self.mark = input.startIndex + self.input = input.utf8 + self.pos = self.input.startIndex + self.mark = self.input.startIndex } - + public func getPos() -> Int { return input.distance(from: input.startIndex, to: pos) } - + public func isEmpty() -> Bool { return pos >= input.endIndex } - + public func current() -> UnicodeScalar { - return (pos >= input.endIndex) ? CharacterReader.EOF : input[pos] + guard pos < input.endIndex else { return CharacterReader.EOF } + return UnicodeScalar(input[pos]) } - + @discardableResult public func consume() -> UnicodeScalar { - guard pos < input.endIndex else { - return CharacterReader.EOF - } - let val = input[pos] + guard pos < input.endIndex else { return CharacterReader.EOF } + let val = UnicodeScalar(input[pos]) input.formIndex(after: &pos) return val } - + public func unconsume() { guard pos > input.startIndex else { return } input.formIndex(before: &pos) } - + public func advance() { guard pos < input.endIndex else { return } input.formIndex(after: &pos) } - + public func markPos() { mark = pos } - + public func rewindToMark() { pos = mark } - -// public func consumeAsString() -> String { -// guard pos < input.endIndex else { return "" } -// let str = String(input[pos]) -// input.formIndex(after: &pos) -// return str -// } - - /** - * Locate the next occurrence of a Unicode scalar - * - * - Parameter c: scan target - * - Returns: offset between current position and next instance of target. -1 if not found. - */ - public func nextIndexOf(_ c: UnicodeScalar) -> String.UnicodeScalarView.Index? { - // doesn't handle scanning for surrogates - return input[pos...].firstIndex(of: c) + + public func consumeAsString() -> String { + guard pos < input.endIndex else { return "" } + let scalar = UnicodeScalar(input[pos]) + input.formIndex(after: &pos) + return String(scalar) } - - /** - * Locate the next occurence of a target string - * - * - Parameter seq: scan target - * - Returns: index of next instance of target. nil if not found. - */ - public func nextIndexOf(_ seq: String) -> String.UnicodeScalarView.Index? { - // doesn't handle scanning for surrogates - var start = pos - let targetScalars = seq.unicodeScalars - guard let firstChar = targetScalars.first else { return pos } // search for "" -> current place - MATCH: while true { - // Match on first scalar - guard let firstCharIx = input[start...].firstIndex(of: firstChar) else { return nil } - var current = firstCharIx - // Then manually match subsequent scalars - for scalar in targetScalars.dropFirst() { - input.formIndex(after: ¤t) - guard current < input.endIndex else { return nil } - if input[current] != scalar { - start = input.index(after: firstCharIx) - continue MATCH - } + + public func consumeToAny(_ chars: Set) -> String { + let start = pos + + while pos < input.endIndex { + let utf8Byte = input[pos] + if chars.contains(utf8Byte) { + break } - // full match; current is at position of last matching character - return firstCharIx + input.formIndex(after: &pos) } + + return String(decoding: input[start.. String { - guard let targetIx = nextIndexOf(c) else { - return consumeToEnd() + private func unicodeScalar(at index: String.UTF8View.Index, in utf8View: String.UTF8View) -> UnicodeScalar? { + var iterator = utf8View[index...].makeIterator() + var utf8Decoder = UTF8() + var unicodeScalar: UnicodeScalar? + let decodingState = utf8Decoder.decode(&iterator) + + switch decodingState { + case .scalarValue(let scalar): + unicodeScalar = scalar + case .emptyInput, .error: + break // Handle decoding errors if needed } + + return unicodeScalar + } + + public func consumeTo(_ c: UnicodeScalar) -> String { + guard let targetIx = nextIndexOf(c) else { return consumeToEnd() } let consumed = cacheString(pos, targetIx) pos = targetIx return consumed } - + public func consumeTo(_ seq: String) -> String { - guard let targetIx = nextIndexOf(seq) else { - return consumeToEnd() - } + guard let targetIx = nextIndexOf(seq) else { return consumeToEnd() } let consumed = cacheString(pos, targetIx) pos = targetIx return consumed } - -// public func consumeToAny(_ chars: UnicodeScalar...) -> String { -// return consumeToAny(Set(chars)) -// } - -// public func consumeToAny(_ chars: Set) -> String { -// let endIndex = input.endIndex -// let start = pos -// while pos < endIndex { -// if chars.contains(input[pos]) { -// break -// } -// input.formIndex(after: &pos) -// } -// return cacheString(start, pos) -// } - - public func consumeToAny(_ chars: Set) -> String { - let start = pos - if let nextIndex = input[pos...].firstIndex(where: { chars.contains($0) }) { - pos = nextIndex - } else { - pos = input.endIndex - } - return cacheString(start, pos) - } - -// public func consumeToAnySorted(_ chars: UnicodeScalar...) -> String { -// return consumeToAny(chars) -// } - public func consumeToAnySorted(_ chars: Set) -> String { - return consumeToAny(chars) - } - - static let dataTerminators: Set = Set([.Ampersand, .LessThan, TokeniserStateVars.nullScalr]) - // read to &, <, or null - public func consumeData() -> String { - return consumeToAny(CharacterReader.dataTerminators) - } - - static let tagNameTerminators: Set = Set([.BackslashT, .BackslashN, .BackslashR, .BackslashF, .Space, .Slash, .GreaterThan, TokeniserStateVars.nullScalr]) - // read to '\t', '\n', '\r', '\f', ' ', '/', '>', or nullChar - public func consumeTagName() -> String { - return consumeToAny(CharacterReader.tagNameTerminators) - } - public func consumeToEnd() -> String { let consumed = cacheString(pos, input.endIndex) pos = input.endIndex return consumed } - + public func consumeLetterSequence() -> String { let start = pos - let endIndex = input.endIndex - while pos < endIndex { - let c = input[pos] - if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) { + while pos < input.endIndex { + let scalar = UnicodeScalar(input[pos]) + if CharacterSet.letters.contains(scalar) { input.formIndex(after: &pos) } else { break @@ -195,21 +121,20 @@ public final class CharacterReader { } return cacheString(start, pos) } - + public func consumeLetterThenDigitSequence() -> String { let start = pos - let endIndex = input.endIndex - while pos < endIndex { - let c = input[pos] - if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) { + while pos < input.endIndex { + let scalar = UnicodeScalar(input[pos]) + if CharacterSet.letters.contains(scalar) { input.formIndex(after: &pos) } else { break } } - while pos < endIndex { - let c = input[pos] - if (c >= "0" && c <= "9") { + while pos < input.endIndex { + let scalar = UnicodeScalar(input[pos]) + if CharacterSet.decimalDigits.contains(scalar) { input.formIndex(after: &pos) } else { break @@ -217,13 +142,12 @@ public final class CharacterReader { } return cacheString(start, pos) } - + public func consumeHexSequence() -> String { let start = pos - let endIndex = input.endIndex - while pos < endIndex { - let c = input[pos] - if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")) { + while pos < input.endIndex { + let scalar = UnicodeScalar(input[pos]) + if CharacterSet(charactersIn: "0123456789ABCDEFabcdef").contains(scalar) { input.formIndex(after: &pos) } else { break @@ -231,13 +155,12 @@ public final class CharacterReader { } return cacheString(start, pos) } - + public func consumeDigitSequence() -> String { let start = pos - let endIndex = input.endIndex - while pos < endIndex { - let c = input[pos] - if (c >= "0" && c <= "9") { + while pos < input.endIndex { + let scalar = UnicodeScalar(input[pos]) + if CharacterSet.decimalDigits.contains(scalar) { input.formIndex(after: &pos) } else { break @@ -245,19 +168,18 @@ public final class CharacterReader { } return cacheString(start, pos) } - + public func matches(_ c: UnicodeScalar) -> Bool { - return !isEmpty() && input[pos] == c - + guard pos < input.endIndex else { return false } + return UnicodeScalar(input[pos]) == c } - + public func matches(_ seq: String, ignoreCase: Bool = false, consume: Bool = false) -> Bool { var current = pos let scalars = seq.unicodeScalars - let endIndex = input.endIndex for scalar in scalars { - guard current < endIndex else { return false } - let c = input[current] + guard current < input.endIndex else { return false } + let c = UnicodeScalar(input[current]) if ignoreCase { guard c.uppercase == scalar.uppercase else { return false } } else { @@ -270,68 +192,107 @@ public final class CharacterReader { } return true } - - public func matchesIgnoreCase(_ seq: String ) -> Bool { + + public func matchesIgnoreCase(_ seq: String) -> Bool { return matches(seq, ignoreCase: true) } - + public func matchesAny(_ seq: UnicodeScalar...) -> Bool { return matchesAny(seq) } public func matchesAny(_ seq: [UnicodeScalar]) -> Bool { guard pos < input.endIndex else { return false } - return seq.contains(input[pos]) + return seq.contains(UnicodeScalar(input[pos])) } - + public func matchesAnySorted(_ seq: [UnicodeScalar]) -> Bool { return matchesAny(seq) } - + public func matchesLetter() -> Bool { guard pos < input.endIndex else { return false } - let c = input[pos] - return (c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters) + return CharacterSet.letters.contains(UnicodeScalar(input[pos])) } - + public func matchesDigit() -> Bool { guard pos < input.endIndex else { return false } - let c = input[pos] - return c >= "0" && c <= "9" + return CharacterSet.decimalDigits.contains(UnicodeScalar(input[pos])) } - + @discardableResult public func matchConsume(_ seq: String) -> Bool { return matches(seq, consume: true) } - + @discardableResult public func matchConsumeIgnoreCase(_ seq: String) -> Bool { return matches(seq, ignoreCase: true, consume: true) } - - public func containsIgnoreCase(_ seq: String ) -> Bool { - // used to check presence of , . only finds consistent case. + + public func containsIgnoreCase(_ seq: String) -> Bool { let loScan = seq.lowercased(with: Locale(identifier: "en")) let hiScan = seq.uppercased(with: Locale(identifier: "eng")) return nextIndexOf(loScan) != nil || nextIndexOf(hiScan) != nil } - + public func toString() -> String { - return String(input[pos...]) + return String(input[pos...]) ?? "" + } + + private func cacheString(_ start: String.UTF8View.Index, _ end: String.UTF8View.Index) -> String { + return String(decoding: input[start.. String.UTF8View.Index? { + return input[pos...].firstIndex { UnicodeScalar($0) == c } + } + + public func nextIndexOf(_ seq: String) -> String.UTF8View.Index? { + var start = pos + let targetUtf8 = seq.utf8 + + while true { + guard let firstCharIx = input[start...].firstIndex(of: targetUtf8.first!) else { return nil } + + var current = firstCharIx + var matched = true + for utf8Byte in targetUtf8 { + guard current < input.endIndex else { return nil } + if input[current] != utf8Byte { + matched = false + break + } + input.formIndex(after: ¤t) + } + + if matched { + return firstCharIx + } else { + start = input.index(after: firstCharIx) + } + } } - /** - * Originally intended as a caching mechanism for strings, but caching doesn't - * seem to improve performance. Now just a stub. - */ - private func cacheString(_ start: String.UnicodeScalarView.Index, _ end: String.UnicodeScalarView.Index) -> String { - return String(input[start.. String { + return consumeToAny(CharacterReader.dataTerminators) + } + + static let tagNameTerminators = Set([.BackslashT, .BackslashN, .BackslashR, .BackslashF, .Space, .Slash, .GreaterThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) + + public func consumeTagName() -> String { + return consumeToAny(CharacterReader.tagNameTerminators) + } + + public func consumeToAnySorted(_ chars: Set) -> String { + return consumeToAny(chars) } } extension CharacterReader: CustomDebugStringConvertible { public var debugDescription: String { - return toString() + return toString() } } diff --git a/Sources/Entities.swift b/Sources/Entities.swift index 8e91531..84fbe4e 100644 --- a/Sources/Entities.swift +++ b/Sources/Entities.swift @@ -51,7 +51,7 @@ public class Entities { return left.value != right.value } - private static let codeDelims: Set = Set([",", ";"]) + private static let codeDelims = Set([",", ";"].flatMap { $0.utf8 }) init(string: String, size: Int, id: Int) { diff --git a/Sources/Tokeniser.swift b/Sources/Tokeniser.swift index 2fb5b59..ff40cbe 100644 --- a/Sources/Tokeniser.swift +++ b/Sources/Tokeniser.swift @@ -36,7 +36,7 @@ final class Tokeniser { self.errors = errors } - func read()throws->Token { + func read() throws -> Token { if (!selfClosingFlagAcknowledged) { error("Self closing flag not acknowledged") selfClosingFlagAcknowledged = true diff --git a/Sources/TokeniserState.swift b/Sources/TokeniserState.swift index d58a651..6307aae 100644 --- a/Sources/TokeniserState.swift +++ b/Sources/TokeniserState.swift @@ -15,14 +15,15 @@ protocol TokeniserStateProtocol { public class TokeniserStateVars { public static let nullScalr: UnicodeScalar = "\u{0000}" - static let attributeSingleValueChars = Set(["'", UnicodeScalar.Ampersand, nullScalr]) - static let attributeDoubleValueChars = Set(["\"", UnicodeScalar.Ampersand, nullScalr]) - static let attributeNameChars = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", "/", "=", ">", nullScalr, "\"", "'", UnicodeScalar.LessThan]) - static let attributeValueUnquoted = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", UnicodeScalar.Ampersand, ">", nullScalr, "\"", "'", UnicodeScalar.LessThan, "=", "`"]) + static let attributeSingleValueChars = Set(["'", UnicodeScalar.Ampersand, nullScalr].flatMap { $0.utf8 }) + static let attributeDoubleValueChars = Set(["\"", UnicodeScalar.Ampersand, nullScalr].flatMap { $0.utf8 }) + static let attributeNameChars = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", "/", "=", ">", nullScalr, "\"", "'", UnicodeScalar.LessThan].flatMap { $0.utf8 }) + static let attributeValueUnquoted = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", UnicodeScalar.Ampersand, ">", nullScalr, "\"", "'", UnicodeScalar.LessThan, "=", "`"].flatMap { $0.utf8 }) - static let dataDefaultStopChars: Set = [UnicodeScalar.Ampersand, UnicodeScalar.LessThan, TokeniserStateVars.nullScalr] - static let commentDefaultStopChars: Set = ["-", TokeniserStateVars.nullScalr] - static let readDataDefaultStopChars: Set = [UnicodeScalar.LessThan, TokeniserStateVars.nullScalr] + static let dataDefaultStopChars = Set([UnicodeScalar.Ampersand, UnicodeScalar.LessThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) + static let scriptDataDefaultStopChars = Set(["-", UnicodeScalar.LessThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) + static let commentDefaultStopChars = Set(["-", TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) + static let readDataDefaultStopChars = Set([UnicodeScalar.LessThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) static let replacementChar: UnicodeScalar = Tokeniser.replacementChar @@ -422,7 +423,7 @@ enum TokeniserState: TokeniserStateProtocol { t.emit(TokeniserStateVars.replacementChar) break default: - let data = r.consumeToAny(TokeniserStateVars.dataDefaultStopChars) + let data = r.consumeToAny(TokeniserStateVars.scriptDataDefaultStopChars) t.emit(data) } break @@ -533,7 +534,7 @@ enum TokeniserState: TokeniserStateProtocol { t.transition(.Data) break default: - let data = r.consumeToAny(TokeniserStateVars.dataDefaultStopChars) + let data = r.consumeToAny(TokeniserStateVars.scriptDataDefaultStopChars) t.emit(data) } break diff --git a/Tests/SwiftSoupTests/AttributeParseTest.swift b/Tests/SwiftSoupTests/AttributeParseTest.swift index 0d75c55..058fb9c 100644 --- a/Tests/SwiftSoupTests/AttributeParseTest.swift +++ b/Tests/SwiftSoupTests/AttributeParseTest.swift @@ -95,11 +95,11 @@ class AttributeParseTest: XCTestCase { XCTAssertEqual(html, try el.outerHtml()) } - func testdropsSlashFromAttributeName()throws { + func testretainsSlashFromAttributeName()throws { let html: String = "" var doc: Document = try SwiftSoup.parse(html) XCTAssertTrue(try doc.select("img[onerror]").size() != 0, "SelfClosingStartTag ignores last character") - XCTAssertEqual("", try doc.body()!.html()) + XCTAssertEqual("", try doc.body()!.html()) doc = try SwiftSoup.parse(html, "", Parser.xmlParser()) XCTAssertEqual("", try doc.html()) @@ -115,7 +115,7 @@ class AttributeParseTest: XCTestCase { ("teststrictAttributeUnescapes", teststrictAttributeUnescapes), ("testmoreAttributeUnescapes", testmoreAttributeUnescapes), ("testparsesBooleanAttributes", testparsesBooleanAttributes), - ("testdropsSlashFromAttributeName", testdropsSlashFromAttributeName) + ("testretainsSlashFromAttributeName", testretainsSlashFromAttributeName) ] }() diff --git a/Tests/SwiftSoupTests/CharacterReaderTest.swift b/Tests/SwiftSoupTests/CharacterReaderTest.swift index f762312..06ca479 100644 --- a/Tests/SwiftSoupTests/CharacterReaderTest.swift +++ b/Tests/SwiftSoupTests/CharacterReaderTest.swift @@ -138,14 +138,14 @@ class CharacterReaderTest: XCTestCase { } func testConsumeToAny() { - let r = CharacterReader("One &bar; qux") - XCTAssertEqual("One ", r.consumeToAny("&", ";")) + let r = CharacterReader("One 二 &bar; qux 三") + XCTAssertEqual("One 二 ", r.consumeToAny(Set(["&", ";"].flatMap { $0.utf8 }))) XCTAssertTrue(r.matches("&")) XCTAssertTrue(r.matches("&bar;")) XCTAssertEqual("&", r.consume()) - XCTAssertEqual("bar", r.consumeToAny("&", ";")) + XCTAssertEqual("bar", r.consumeToAny(Set(["&", ";"].flatMap { $0.utf8 }))) XCTAssertEqual(";", r.consume()) - XCTAssertEqual(" qux", r.consumeToAny("&", ";")) + XCTAssertEqual(" qux 三", r.consumeToAny(Set(["&", ";"].flatMap { $0.utf8 }))) } func testConsumeLetterSequence() { @@ -208,7 +208,7 @@ class CharacterReaderTest: XCTestCase { //let scan = [" ", "\n", "\t"] let r = CharacterReader("One\nTwo\tThree") XCTAssertFalse(r.matchesAny(" ", "\n", "\t")) - XCTAssertEqual("One", r.consumeToAny(" ", "\n", "\t")) + XCTAssertEqual("One", r.consumeToAny(Set([" ", "\n", "\t"].flatMap { $0.utf8 }))) XCTAssertTrue(r.matchesAny(" ", "\n", "\t")) XCTAssertEqual("\n", r.consume()) XCTAssertFalse(r.matchesAny(" ", "\n", "\t")) @@ -251,6 +251,51 @@ class CharacterReaderTest: XCTestCase { // // XCTAssertTrue(r.rangeEquals(18, 5, "CHOKE")) // XCTAssertFalse(r.rangeEquals(18, 5, "CHIKE")) + } + + func testJavaScriptParsingHangRegression() throws { + let expectation = XCTestExpectation(description: "SwiftSoup parse should complete") + + DispatchQueue.global().async { + do { + let html = """ + + + """ + _ = try SwiftSoup.parse(html) + expectation.fulfill() // Fulfill the expectation if parse completes + } catch { + XCTFail("Parsing failed with error: \(error)") + expectation.fulfill() // Fulfill the expectation to not block the waiter in case of error + } + } + + // Wait for the expectation with a timeout of 3 seconds + let result = XCTWaiter().wait(for: [expectation], timeout: 3.0) + + switch result { + case .completed: + // Parse completed within the timeout, the test passes + break + case .timedOut: + // Parse did not complete within the timeout, the test fails + XCTFail("Parsing took too long; hang detected") + default: + break + } + } + + func testURLCrashRegression() throws { + let html = """ + + + + + """ + _ = try SwiftSoup.parse(html) } static var allTests = { @@ -274,8 +319,10 @@ class CharacterReaderTest: XCTestCase { ("testContainsIgnoreCase", testContainsIgnoreCase), ("testMatchesAny", testMatchesAny), ("testCachesStrings", testCachesStrings), - ("testRangeEquals", testRangeEquals) - ] + ("testRangeEquals", testRangeEquals), + ("testJavaScriptParsingHangRegression", testJavaScriptParsingHangRegression), + ("testURLCrashRegression", testURLCrashRegression), + ] }() } diff --git a/Tests/SwiftSoupTests/CleanerTest.swift b/Tests/SwiftSoupTests/CleanerTest.swift index 9b97735..28e754d 100644 --- a/Tests/SwiftSoupTests/CleanerTest.swift +++ b/Tests/SwiftSoupTests/CleanerTest.swift @@ -26,7 +26,7 @@ class CleanerTest: XCTestCase { // XCTAssertEqual(" \n", dropped) let preserved = try SwiftSoup.clean(html, Whitelist.basicWithImages().addProtocols("img", "src", "cid", "data")) - XCTAssertEqual(" \n", preserved) + XCTAssertEqual(" \n", preserved) } func testSimpleBehaviourTest()throws { @@ -53,7 +53,7 @@ class CleanerTest: XCTestCase { func testBasicWithImagesTest()throws { let h = "

Image

" let cleanHtml = try SwiftSoup.clean(h, Whitelist.basicWithImages()) - XCTAssertEqual("

\"Image\"

", TextUtil.stripNewlines(cleanHtml!)) + XCTAssertEqual("

\"Image\"

", TextUtil.stripNewlines(cleanHtml!)) } func testRelaxed()throws { @@ -113,7 +113,7 @@ class CleanerTest: XCTestCase { func testDropImageScript()throws { let h = "" let cleanHtml = try SwiftSoup.clean(h, Whitelist.relaxed()) - XCTAssertEqual("", cleanHtml) + XCTAssertEqual("", cleanHtml) } func testCleanJavascriptHref()throws { @@ -153,7 +153,7 @@ class CleanerTest: XCTestCase { func testtestHandlesEmptyAttributes()throws { let h = "\"\"" let cleanHtml = try SwiftSoup.clean(h, Whitelist.basicWithImages()) - XCTAssertEqual("\"\"", cleanHtml) + XCTAssertEqual("\"\"", cleanHtml) } func testIsValid()throws { @@ -170,13 +170,13 @@ class CleanerTest: XCTestCase { func testResolvesRelativeLinks()throws { let html = "Link" let clean = try SwiftSoup.clean(html, "http://example.com/", Whitelist.basicWithImages()) - XCTAssertEqual("Link\n", clean) + XCTAssertEqual("Link\n", clean) } func testPreservesRelativeLinksIfConfigured()throws { let html = "Link " let clean = try SwiftSoup.clean(html, "http://example.com/", Whitelist.basicWithImages().preserveRelativeLinks(true)) - XCTAssertEqual("Link\n \n", clean) + XCTAssertEqual("Link\n \n", clean) } func testDropsUnresolvableRelativeLinks()throws { diff --git a/Tests/SwiftSoupTests/DocumentTest.swift b/Tests/SwiftSoupTests/DocumentTest.swift index 00c34ad..9c5e3ec 100644 --- a/Tests/SwiftSoupTests/DocumentTest.swift +++ b/Tests/SwiftSoupTests/DocumentTest.swift @@ -169,7 +169,7 @@ class DocumentTest: XCTestCase { "\n" + " \n" + " \n" + - " "\"><>&\"\n" + + " "\" /><>&\"\n" + " bar\n" + " \n" + "", try! doc.html()) @@ -242,7 +242,7 @@ class DocumentTest: XCTestCase { print("") } - let htmlCharsetUTF8: String = "\n" + " \n" + " \n" + " \n" + " \n" + "" + let htmlCharsetUTF8: String = "\n" + " \n" + " \n" + " \n" + " \n" + "" XCTAssertEqual(htmlCharsetUTF8, try! doc.outerHtml()) let selectedElement: Element = try! doc.select("meta[charset]").first()! @@ -259,7 +259,7 @@ class DocumentTest: XCTestCase { let htmlCharsetISO = "\n" + " \n" + - " \n" + + " \n" + " \n" + " \n" + "" @@ -280,7 +280,7 @@ class DocumentTest: XCTestCase { let htmlCharsetUTF8 = "\n" + " \n" + - " \n" + + " \n" + " \n" + " \n" + "" @@ -303,8 +303,8 @@ class DocumentTest: XCTestCase { let htmlCharset = "\n" + " \n" + - " \n" + - " \n" + + " \n" + + " \n" + " \n" + " \n" + "" @@ -335,7 +335,7 @@ class DocumentTest: XCTestCase { let htmlCharsetUTF8 = "\n" + " \n" + - " \n" + + " \n" + " \n" + " \n" + "" diff --git a/Tests/SwiftSoupTests/ElementTest.swift b/Tests/SwiftSoupTests/ElementTest.swift index 353b8ea..55c37d9 100644 --- a/Tests/SwiftSoupTests/ElementTest.swift +++ b/Tests/SwiftSoupTests/ElementTest.swift @@ -287,7 +287,7 @@ class ElementTest: XCTestCase { func testOuterHtml()throws { let doc = try SwiftSoup.parse("

Hello

there") - XCTAssertEqual("

Hello

there

", + XCTAssertEqual("

Hello

there

", try TextUtil.stripNewlines(doc.outerHtml())) } @@ -582,10 +582,10 @@ class ElementTest: XCTestCase { func testpParentlessToString()throws { let doc: Document = try SwiftSoup.parse("") let img: Element = try doc.select("img").first()! - XCTAssertEqual("", try img.outerHtml()) + XCTAssertEqual("", try img.outerHtml()) try img.remove() // lost its parent - XCTAssertEqual("", try img.outerHtml()) + XCTAssertEqual("", try img.outerHtml()) } func testClone()throws { diff --git a/Tests/SwiftSoupTests/HtmlParserTest.swift b/Tests/SwiftSoupTests/HtmlParserTest.swift index ccf1d69..5ffb6b1 100644 --- a/Tests/SwiftSoupTests/HtmlParserTest.swift +++ b/Tests/SwiftSoupTests/HtmlParserTest.swift @@ -19,11 +19,11 @@ class HtmlParserTest: XCTestCase { let thisClass = type(of: self) let linuxCount = thisClass.allTests.count let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount) - XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests") + XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests: \(thisClass.defaultTestSuite.tests.filter { test in !thisClass.allTests.contains(where: { $0.0 == String(test.name.split(separator: " ")[1].dropLast(1)) }) })") #endif } - func testParsesSimpleDocument()throws { + func testParsesSimpleDocument() throws { let html: String = "First!

First post!

" let doc: Document = try SwiftSoup.parse(html) // need a better way to verify these: @@ -238,7 +238,7 @@ class HtmlParserTest: XCTestCase { func testHandlesWhatWgExpensesTableExample()throws { // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#examples-0 let doc = try SwiftSoup.parse("
2008 2007 2006
Research and development $ 1,109 $ 782 $ 712
Percentage of net sales 3.4% 3.3% 3.7%
Selling, general, and administrative $ 3,761 $ 2,963 $ 2,433
Percentage of net sales 11.6% 12.3% 12.6%
") - XCTAssertEqual("
2008 2007 2006
Research and development $ 1,109 $ 782 $ 712
Percentage of net sales 3.4% 3.3% 3.7%
Selling, general, and administrative $ 3,761 $ 2,963 $ 2,433
Percentage of net sales 11.6% 12.3% 12.6%
", try TextUtil.stripNewlines(doc.body()!.html())) + XCTAssertEqual("
2008 2007 2006
Research and development $ 1,109 $ 782 $ 712
Percentage of net sales 3.4% 3.3% 3.7%
Selling, general, and administrative $ 3,761 $ 2,963 $ 2,433
Percentage of net sales 11.6% 12.3% 12.6%
", try TextUtil.stripNewlines(doc.body()!.html())) } func testHandlesTbodyTable()throws { @@ -347,25 +347,25 @@ class HtmlParserTest: XCTestCase { // if a known tag, allow self closing outside of spec, but force an end tag. unknown tags can be self closing. let h = "
One
hr text
hr text two", try TextUtil.stripNewlines(doc.body()!.html())) + XCTAssertEqual("
One
hr text
hr text two", try TextUtil.stripNewlines(doc.body()!.html())) } func testHandlesKnownEmptyNoFrames() throws { let h = "<meta name=foo></head><body>One</body></html>"; let doc = try SwiftSoup.parse(h); - XCTAssertEqual("<html><head><noframes>One", try TextUtil.stripNewlines(doc.html())); + XCTAssertEqual("One", try TextUtil.stripNewlines(doc.html())); } func testHandlesKnownEmptyStyle() throws { let h = "One", try TextUtil.stripNewlines(doc.html())); + XCTAssertEqual("One", try TextUtil.stripNewlines(doc.html())); } func testHandlesKnownEmptyTitle() throws { let h = "<meta name=foo></head><body>One</body></html>"; let doc = try SwiftSoup.parse(h); - XCTAssertEqual("<html><head><title>One", try TextUtil.stripNewlines(doc.html())); + XCTAssertEqual("One", try TextUtil.stripNewlines(doc.html())); } func testHandlesSolidusAtAttributeEnd()throws { @@ -405,7 +405,7 @@ class HtmlParserTest: XCTestCase { func testHandlesFrames()throws { let h = "" let doc = try SwiftSoup.parse(h) - XCTAssertEqual("", + XCTAssertEqual("", try TextUtil.stripNewlines(doc.html())) // no body auto vivification } @@ -413,7 +413,7 @@ class HtmlParserTest: XCTestCase { func testIgnoresContentAfterFrameset()throws { let h = "One
" let doc = try SwiftSoup.parse(h) - XCTAssertEqual("One", try TextUtil.stripNewlines(doc.html())) + XCTAssertEqual("One", try TextUtil.stripNewlines(doc.html())) // no body, no table. No crash! } @@ -437,7 +437,7 @@ class HtmlParserTest: XCTestCase { func testNormalisesDocument()throws { let h = "OneTwoThreeFourFive Six Seven " let doc = try SwiftSoup.parse(h) - XCTAssertEqual("OneTwoThreeFourFive Six Seven ", + XCTAssertEqual("OneTwoThreeFourFive Six Seven ", try TextUtil.stripNewlines(doc.html())) } @@ -681,7 +681,10 @@ class HtmlParserTest: XCTestCase { ("testcommentBeforeHtml", testcommentBeforeHtml), ("testemptyTdTag", testemptyTdTag), ("testhandlesSolidusInA", testhandlesSolidusInA), - ("testhandlesSpanInTbody", testhandlesSpanInTbody) + ("testhandlesSpanInTbody", testhandlesSpanInTbody), + ("testHandlesKnownEmptyNoFrames", testHandlesKnownEmptyNoFrames), + ("testHandlesKnownEmptyStyle", testHandlesKnownEmptyStyle), + ("testHandlesKnownEmptyTitle", testHandlesKnownEmptyTitle), ] }() diff --git a/Tests/SwiftSoupTests/XmlTreeBuilderTest.swift b/Tests/SwiftSoupTests/XmlTreeBuilderTest.swift index 20f2345..3eec3dd 100644 --- a/Tests/SwiftSoupTests/XmlTreeBuilderTest.swift +++ b/Tests/SwiftSoupTests/XmlTreeBuilderTest.swift @@ -83,7 +83,7 @@ class XmlTreeBuilderTest: XCTestCase { // html will force "
one
" to logically "
One
". // XML should be stay "
one
-- don't recognise tag. let htmlDoc = try SwiftSoup.parse("
one
") - XCTAssertEqual("
one\n
", try htmlDoc.body()?.html()) + XCTAssertEqual("
one\n
", try htmlDoc.body()?.html()) let xmlDoc = try SwiftSoup.parse("
one
", "", Parser.xmlParser()) XCTAssertEqual("
one
", try xmlDoc.html())