diff --git a/src/builtins/BuiltinString.cpp b/src/builtins/BuiltinString.cpp index 2e742ddb5..7e7c51dab 100644 --- a/src/builtins/BuiltinString.cpp +++ b/src/builtins/BuiltinString.cpp @@ -1575,6 +1575,36 @@ static Value builtinStringIncludes(ExecutionState& state, Value thisValue, size_ return Value(true); } +// https://tc39.es/ecma262/multipage/text-processing.html#sec-string.prototype.iswellformed +static Value builtinStringIsWellFormed(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional newTarget) +{ + // Let O be ? RequireObjectCoercible(this value). + // Let S be ? ToString(O). + RESOLVE_THIS_BINDING_TO_STRING(S, String, isWellFormed); + // Return IsStringWellFormedUnicode(S) + return Value(S->isWellFormed()); +} + +// https://tc39.es/ecma262/multipage/text-processing.html#sec-string.prototype.towellformed +static Value builtinStringToWellFormed(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional newTarget) +{ + // Let O be ? RequireObjectCoercible(this value). + // Let S be ? ToString(O). + RESOLVE_THIS_BINDING_TO_STRING(S, String, toWellFormed); + // Let strLen be the length of S. + // Let k be 0. + // Let result be the empty String. + // Repeat, while k < strLen, + // Let cp be CodePointAt(S, k). + // If cp.[[IsUnpairedSurrogate]] is true, then + // Set result to the string-concatenation of result and 0xFFFD (REPLACEMENT CHARACTER). + // Else, + // Set result to the string-concatenation of result and UTF16EncodeCodePoint(cp.[[CodePoint]]). + // Set k to k + cp.[[CodeUnitCount]]. + // Return result. + return S->toWellFormed(); +} + static Value builtinStringIteratorNext(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional newTarget) { if (!thisValue.isObject() || !thisValue.asObject()->isStringIteratorObject()) { @@ -1745,6 +1775,14 @@ void GlobalObject::installString(ExecutionState& state) ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->at, builtinStringAt, 1, NativeFunctionInfo::Strict)), (ObjectPropertyDescriptor::PresentAttribute)(ObjectPropertyDescriptor::WritablePresent | ObjectPropertyDescriptor::ConfigurablePresent))); + m_stringPrototype->directDefineOwnProperty(state, ObjectPropertyName(strings->isWellFormed), + ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->isWellFormed, builtinStringIsWellFormed, 0, NativeFunctionInfo::Strict)), + (ObjectPropertyDescriptor::PresentAttribute)(ObjectPropertyDescriptor::WritablePresent | ObjectPropertyDescriptor::ConfigurablePresent))); + + m_stringPrototype->directDefineOwnProperty(state, ObjectPropertyName(strings->toWellFormed), + ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->toWellFormed, builtinStringToWellFormed, 0, NativeFunctionInfo::Strict)), + (ObjectPropertyDescriptor::PresentAttribute)(ObjectPropertyDescriptor::WritablePresent | ObjectPropertyDescriptor::ConfigurablePresent))); + #define DEFINE_STRING_ADDITIONAL_HTML_FUNCTION(fnName, argLength) \ m_stringPrototype->directDefineOwnProperty(state, ObjectPropertyName(strings->fnName), \ ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->fnName, builtinString##fnName, argLength, NativeFunctionInfo::Strict)), \ diff --git a/src/runtime/StaticStrings.h b/src/runtime/StaticStrings.h index c0697c0e6..35d6418a6 100644 --- a/src/runtime/StaticStrings.h +++ b/src/runtime/StaticStrings.h @@ -303,6 +303,7 @@ namespace Escargot { F(isPrototypeOf) \ F(isSafeInteger) \ F(isSealed) \ + F(isWellFormed) \ F(isView) \ F(italics) \ F(iterator) \ @@ -468,6 +469,7 @@ namespace Escargot { F(toTimeString) \ F(toUTCString) \ F(toUpperCase) \ + F(toWellFormed) \ F(transfer) \ F(trim) \ F(trimEnd) \ diff --git a/src/runtime/String.cpp b/src/runtime/String.cpp index 44ca0cda9..c065e79d0 100644 --- a/src/runtime/String.cpp +++ b/src/runtime/String.cpp @@ -359,6 +359,30 @@ size_t utf32ToUtf16(char32_t i, char16_t* u) } } +bool isWellFormed(const char16_t*& utf16, const char16_t* bufferEnd) +{ + if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff) { + if (utf16 + 1 < bufferEnd) { + if (utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) { + utf16 += 2; + return true; + } else { + utf16 += 1; + return false; + } + } else { + utf16 += 1; + return false; + } + } else if (utf16[0] >= 0xdc00 && utf16[0] <= 0xdfff) { + utf16 += 1; + return false; + } else { + utf16 += 1; + return true; + } +} + bool StringBufferAccessData::equals16Bit(const char16_t* c1, const char* c2, size_t len) { while (len > 0) { @@ -844,6 +868,45 @@ uint32_t String::tryToUseAsIndexProperty() const return tryToUseAsIndex32(); } +bool String::isWellFormed() const +{ + auto bad = bufferAccessData(); + if (bad.has8BitContent) { + return true; + } + auto utf16 = bad.bufferAs16Bit; + auto end = bad.bufferAs16Bit + bad.length; + while (utf16 < end) { + if (!::Escargot::isWellFormed(utf16, end)) { + return false; + } + } + return true; +} + +String* String::toWellFormed() const +{ + if (isWellFormed()) { + return this; + } + auto bad = bufferAccessData(); + auto utf16 = bad.bufferAs16Bit; + auto end = bad.bufferAs16Bit + bad.length; + UTF16StringData result; + while (utf16 < end) { + auto start = utf16; + if (::Escargot::isWellFormed(utf16, end)) { + while (start < utf16) { + result.pushBack(*start); + start++; + } + } else { + result.pushBack(0xfffd); + } + } + return new UTF16String(std::move(result)); +} + size_t String::find(String* str, size_t pos) const { const size_t srcStrLen = str->length(); diff --git a/src/runtime/String.h b/src/runtime/String.h index c95c9dcd0..735520409 100644 --- a/src/runtime/String.h +++ b/src/runtime/String.h @@ -62,6 +62,7 @@ ASCIIStringData utf16StringToASCIIString(const char16_t* buf, const size_t len); ASCIIStringDataNonGCStd dtoa(double number); size_t utf32ToUtf8(char32_t uc, char* UTF8); size_t utf32ToUtf16(char32_t i, char16_t* u); +bool isWellFormed(const char16_t*& utf16, const char16_t* bufferEnd); // these functions only care ascii range(0~127) bool islower(char32_t ch); @@ -534,6 +535,9 @@ class String : public PointerValue { return has8BitContent(); } + bool isWellFormed() const; + String* toWellFormed() const; + template const Any* characters() const { diff --git a/tools/test/test262/excludelist.orig.xml b/tools/test/test262/excludelist.orig.xml index 86cf97867..ccf9f13df 100644 --- a/tools/test/test262/excludelist.orig.xml +++ b/tools/test/test262/excludelist.orig.xml @@ -1189,24 +1189,8 @@ TODO TODO TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO TODO TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO - TODO TODO TODO TODO