-
Notifications
You must be signed in to change notification settings - Fork 1
/
percentEncode.h
135 lines (117 loc) · 4.76 KB
/
percentEncode.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// https://www.w3schools.com/tags/ref_urlencode.asp
// https://www.fon.hum.uva.nl/praat/manual/Special_symbols.html
String percentEncode(const char* plaintext) {
String result{};
uint32_t cnt{ 0 };
while (plaintext[cnt] != 0) {
if (plaintext[cnt] > 0x7F || plaintext[cnt] < 0x20) {
switch (plaintext[cnt]) {
case 0xC2:
{
const uint8_t firstByte = plaintext[cnt];
cnt++;
const uint8_t secondByte = plaintext[cnt];
switch (secondByte) {
case 0xA0 ... 0xBF:
{
result.concat((char)firstByte);
result.concat((char)secondByte);
}
break;
default:
{
result.concat("?");
log_e("Invalid 16-bit utf8 sequence. Dropped 2 bytes.");
}
}
}
break;
case 0xC3:
{
const uint8_t firstByte = plaintext[cnt];
cnt++;
const uint8_t secondByte = plaintext[cnt];
switch (secondByte) {
case 0x80 ... 0xBF:
{
result.concat((char)firstByte);
result.concat((char)secondByte);
}
break;
default:
{
result.concat("?");
log_e("Invalid 16-bit utf8 sequence. Dropped 2 bytes.");
}
}
}
break;
case 0xC9:
result.concat("É"); // É
break;
case 0xE1:
result.concat("á"); // á
break;
case 0xE4:
result.concat("ä"); // ä
break;
case 0xE7:
result.concat("ç"); // ç
break;
case 0xE8:
result.concat("è"); // è
break;
case 0xE9:
result.concat("é"); // é
break;
case 0xEA:
result.concat("ê"); // ê
break;
case 0xEB:
result.concat("ë"); // ë
break;
case 0xED:
result.concat("í"); // í
break;
// WIP
case 0xEF: //Byte Order Mark -> https://en.wikipedia.org/wiki/Byte_order_mark - see UTF-8 on that page - seen on 'SUBLIME pure jazz'
{
cnt++;
const uint8_t secondByte = plaintext[cnt];
if (0xBB != secondByte) {
result.concat("?");
log_e("Invalid byte sequence. Dropped 2 bytes.");
break;
}
cnt++;
const uint8_t thirdByte = plaintext[cnt];
if (0xBF != thirdByte) {
result.concat("?");
log_e("Invalid byte sequence. Dropped 3 bytes.");
break;
}
/* if arrived here, we have the sequence 0xEF,0xBB,0xBF which is a BOM and codes for no output */
ESP_LOGD(TAG, "Byte Order Mark skipped");
}
break;
case 0xF3:
result.concat("ó "); // ó
break;
case 0xF6:
result.concat("ö"); // ö
break;
case 0xFC:
result.concat("ü"); // ü
break;
default:
result.concat("?");
log_w("ERROR: Unhandled char 0x%x", plaintext[cnt]);
}
} else
result.concat(plaintext[cnt]);
cnt++;
}
ESP_LOGD(TAG, "Input str: %s", plaintext);
ESP_LOGD(TAG, "Returning html encoded str: %s", result.c_str());
return result;
}