Skip to content

Commit

Permalink
Fixes processing unicode escape
Browse files Browse the repository at this point in the history
  • Loading branch information
GOB52 committed Feb 18, 2023
1 parent c2d9dc5 commit dac6b22
Show file tree
Hide file tree
Showing 11 changed files with 282 additions and 215 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Log output when internal errors occur and added detection mechanism.
### Added a mechanism to help determine keys and retrieve values
if elseif elseif elseif elseif elseif elseif elseif elseif elseif elseif elseif elseif ... OMG!
Added a helper for retrive values and a delegation handler for processing per JSON object.
see also [test_element.cpp](test/test_element.cpp)
see also [test_element.cpp](test/test_element.cpp), [test_basic.cpp](test/test_basic.cpp)

### Unit test support with GoogleTest
Even small test cases are useful.
Expand Down
2 changes: 1 addition & 1 deletion library.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"type": "git",
"url": "https://github.com/GOB52/gob_json.git"
},
"version": "0.0.3",
"version": "0.0.4",
"build": {
"libArchive": false
},
Expand Down
2 changes: 1 addition & 1 deletion library.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name=gob_json
version=0.0.3
version=0.0.4
author=GOB
maintainer=GOB
sentence=Library for parsing potentially huge json streams on devices with scarce memory .
Expand Down
2 changes: 1 addition & 1 deletion platformio.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ default_envs = native_11, native_14, native_17, native_20
;default_env = m5s_11, m5s_14, m5s_17, m5s_20

[env]
build_flags = -DUNIT_TEST -Wall -Wextra -Werror=format
build_flags = -DUNIT_TEST -Wall -Wextra -Wreturn-local-addr -Werror=format -Werror=return-local-addr
-D GOB_JSON_PARSER_BUFFER_MAX_LENGTH=512
-D GOB_JSON_PARSER_KEY_MAX_LENGTH=64
-D GOB_JSON_PARSER_STACK_MAX_DEPTH=16
Expand Down
79 changes: 45 additions & 34 deletions src/gob_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,15 @@ void StreamingParser::parse(const char ch)
startValue(c);
break;
case State::START_ESCAPE:
// GOB_JSON_LOGV("startEscape");
processEscapeCharacters(c);
break;
case State::UNICODE:
// GOB_JSON_LOGV("unicode:[%c]:0x%02x", c,c);
processUnicodeCharacter(c);
break;
case State::UNICODE_SURROGATE:
// GOB_JSON_LOGV("surrogate:[%c]:0x%02x", c,c);
unicodeEscapeBuffer[unicodeEscapeBufferPos] = c;
unicodeEscapeBufferPos++;
if (unicodeEscapeBufferPos == 2) {
Expand Down Expand Up @@ -389,28 +392,18 @@ void StreamingParser::processUnicodeCharacter(char c) {

if (unicodeBufferPos == 4) {
int codepoint = getHexArrayAsDecimal(unicodeBuffer, unicodeBufferPos);
endUnicodeCharacter(codepoint);
return;
/*if (codepoint >= 0xD800 && codepoint < 0xDC00) {
unicodeHighSurrogate = codepoint;
unicodeBufferPos = 0;
state = State::UNICODE_SURROGATE;
} else if (codepoint >= 0xDC00 && codepoint <= 0xDFFF) {
if (unicodeHighSurrogate == -1) {
// throw new ParsingError($this->_line_number,
// $this->_char_number,
// "Missing high surrogate for Unicode low surrogate.");
}
int combinedCodePoint = ((unicodeHighSurrogate - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000;
endUnicodeCharacter(combinedCodePoint);
} else if (unicodeHighSurrogate != -1) {
// throw new ParsingError($this->_line_number,
// $this->_char_number,
// "Invalid low surrogate following Unicode high surrogate.");
endUnicodeCharacter(codepoint);
} else {
endUnicodeCharacter(codepoint);
}*/
if(state != State::UNICODE_SURROGATE)
{
if (codepoint >= 0xD800 && codepoint < 0xDC00) {
unicodeHighSurrogate = codepoint;
unicodeBufferPos = 0;
state = State::UNICODE_SURROGATE;
}
else
{
endUnicodeCharacter(codepoint);
}
}
}
}
bool StreamingParser::isHexCharacter(char c) {
Expand Down Expand Up @@ -601,19 +594,37 @@ void StreamingParser::startNumber(char c) {
increaseBufferPointer();
}

void StreamingParser::endUnicodeCharacter(int codepoint) {
if (codepoint < 0x80){
buffer[bufferPos] = (char) (codepoint);
} else if (codepoint <= 0x800){
buffer[bufferPos] = (char) ((codepoint >> 6) | 0b11000000);
increaseBufferPointer();
buffer[bufferPos] = (char) ((codepoint & 0b00111111) | 0b10000000);
} else if (codepoint == 0x2019){
buffer[bufferPos] = '\''; // \u2019 ’
} else {
buffer[bufferPos] = ' ';
void StreamingParser::endUnicodeCharacter(uint32_t cp) {
// UTF-32 to UTF-8
constexpr uint8_t mask = 0xBF;
constexpr uint8_t mark_bit = 0x80;
constexpr uint8_t markTable[] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

uint8_t length{3};
uint8_t buf[8]{};

// GOB_JSON_LOGV("cp:%x", cp);

if(unicodeHighSurrogate != -1)
{
uint32_t high = unicodeHighSurrogate;
uint32_t low = cp;
cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000;
// GOB_JSON_LOGV("SP:high:%x low:%x => %x", high, low, cp);
}

if(cp < 0x80) { length = 1; }
else if(cp < 0x800) { length = 2; }
else if(cp < 0x10000) { length = 3; }
else if(cp < 0x110000) { length = 4; }
for(uint8_t i = length; i > 0 ; --i)
{
buf[i-1] = static_cast<uint8_t>( (i==1) ? (cp | markTable[length]) : ((cp | mark_bit) & mask) );
cp >>= 6;
}
increaseBufferPointer();
auto p = buf;
while(length--) { buffer[bufferPos] = *p++; increaseBufferPointer(); }

unicodeBufferPos = 0;
unicodeHighSurrogate = -1;
state = State::IN_STRING;
Expand Down
8 changes: 4 additions & 4 deletions src/gob_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ namespace json {
#endif

#ifndef GOB_JSON_PARSER_BUFFER_MAX_LENGTH
# pragma message "Buffer length as default"
# pragma message "[gob_json] Buffer length as default"
# define GOB_JSON_PARSER_BUFFER_MAX_LENGTH (256)
#else
# pragma message "Defined buffer length=" GOB_JSON_STRINGIFY(GOB_JSON_PARSER_BUFFER_MAX_LENGTH)
# pragma message "[gob_json] Defined buffer length=" GOB_JSON_STRINGIFY(GOB_JSON_PARSER_BUFFER_MAX_LENGTH)
#endif

/*!
Expand Down Expand Up @@ -95,7 +95,7 @@ class StreamingParser
void endTrue();
void endDocument();
void endUnicodeSurrogateInterstitial();
void endUnicodeCharacter(int codepoint);
void endUnicodeCharacter(uint32_t codepoint);

void increaseBufferPointer();
void processEscapeCharacters(char c);
Expand Down Expand Up @@ -154,7 +154,7 @@ class StreamingParser
int unicodeEscapeBufferPos{0};;
char unicodeBuffer[10];
int unicodeBufferPos{0};
int unicodeHighSurrogate{0};
int unicodeHighSurrogate{-1};

size_t characterCounter{0};
int curCh{}; // for error information.
Expand Down
8 changes: 4 additions & 4 deletions src/gob_json_element_path.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@ namespace goblib { namespace json {
#endif

#ifndef GOB_JSON_PARSER_KEY_MAX_LENGTH
# pragma message "Key length as default"
# pragma message "[gob_json] Key length as default"
# define GOB_JSON_PARSER_KEY_MAX_LENGTH (32)
#else
# pragma message "Defined key length=" GOB_JSON_STRINGIFY(GOB_JSON_PARSER_KEY_MAX_LENGTH)
# pragma message "[gob_json] Defined key length=" GOB_JSON_STRINGIFY(GOB_JSON_PARSER_KEY_MAX_LENGTH)
#endif

// For ElementPath::selectors and StreamingParser::stack
#ifndef GOB_JSON_PARSER_STACK_MAX_DEPTH
# pragma message "Stack max depth as default"
# pragma message "[gob_json] Stack max depth as default"
# define GOB_JSON_PARSER_STACK_MAX_DEPTH (20)
#else
# pragma message "Defined stack max depth=" GOB_JSON_STRINGIFY(GOB_JSON_PARSER_STACK_MAX_DEPTH)
# pragma message "[gob_json] Defined stack max depth=" GOB_JSON_STRINGIFY(GOB_JSON_PARSER_STACK_MAX_DEPTH)
#endif

/*!
Expand Down
2 changes: 1 addition & 1 deletion src/gob_json_version.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#define GOB_JSON_VERSION_MAJOR 0
#define GOB_JSON_VERSION_MINOR 0
#define GOB_JSON_VERSION_PATCH 3
#define GOB_JSON_VERSION_PATCH 4

#define GOB_JSON_VERSION_STRINGIFY_AGAIN(x) #x
#define GOB_JSON_VERSION_STRINGIFY(x) GOB_JSON_VERSION_STRINGIFY_AGAIN(x)
Expand Down
12 changes: 9 additions & 3 deletions src/internal/gob_json_log.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@
# include <esp32-hal-log.h>
# ifndef GOB_JSON_LOG_LEVEL
# if defined(LOG_LOCAL_LEVEL)
# pragma message "[JSON]] Using LOG_LOCAL_LEVEL"
# pragma message "[gob_json Using LOG_LOCAL_LEVEL"
# define GOB_JSON_LOG_LEVEL (LOG_LOCAL_LEVEL)
# elif defined(CORE_DEBUG_LEVEL)
# pragma message "[JSON] Using CORE_DEBUG_LEVEL"
# pragma message "[gob_json] Using CORE_DEBUG_LEVEL"
# define GOB_JSON_LOG_LEVEL (CORE_DEBUG_LEVEL)
# else
# pragma message "[gob_json] Using loglevel 3"
# define GOB_JSON_LOG_LEVEL (3))
# endif
# else
# pragma message "[JSON] Using defined log level"
# pragma message "[gob_json] Using defined log level"
# endif
/*! @brief Error */
# define GOB_JSON_LOGE(format, ...) do { if(GOB_JSON_LOG_LEVEL >= ESP_LOG_ERROR) { log_printf(ARDUHAL_LOG_FORMAT(E, format), ##__VA_ARGS__); } } while(0)
Expand All @@ -40,7 +43,10 @@

# include <cstdio>
# ifndef GOB_JSON_LOG_LEVEL
# pragma message "[gob_json] Using loglevel 3"
# define GOB_JSON_LOG_LEVEL (3)
# else
# pragma message "[gob_json] Using defined log level"
# endif

# define GOB_JSON_LOG(fmt, ...) do { printf("%s:%d ", __FILE__, __LINE__); printf(fmt, ##__VA_ARGS__); putchar('\n'); }while(0)
Expand Down
Loading

0 comments on commit dac6b22

Please sign in to comment.