-
Notifications
You must be signed in to change notification settings - Fork 0
/
strv_extensions.h
425 lines (330 loc) · 9.92 KB
/
strv_extensions.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
#ifndef RE_STRV_EXTENSION_H
#define RE_STRV_EXTENSION_H
/* Unusual functions related to strv. */
/*-----------------------------------------------------------------------*/
/* strv */
/*-----------------------------------------------------------------------*/
/* Ascii to integer. */
STRV_API int strv_atoi(strv sv);
/* Returns a strv with zero size onces there is no next token. */
STRV_API strv strv_tok(strv sv, strv delims);
STRV_API strv strv_line_at(strv sv, size_t pos);
/* Return first line "line" and remove the line from the source "sv" */
STRV_API strv strv_pop_line(strv* sv);
/* Given the source at position 'pos' we get the current line and the N (extra_lines_required) necessary surrounding lines.
- previous_line_count contains the previous lines that were extracted before the current one.
- next_line_count contains the next lines that were extracted after the current one.
*/
STRV_API strv strv_get_surrounding_lines(strv source, size_t pos, size_t extra_lines_required, size_t* previous_line_count, size_t* next_line_count);
/* Should match W3C email regex:
/^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$/
*/
STRV_API bool strv_is_email(strv sv);
/*-----------------------------------------------------------------------*/
/* strv_splitter */
/*-----------------------------------------------------------------------*/
typedef struct strv_splitter strv_splitter;
struct strv_splitter {
strv str;
strv delims;
};
STRV_API void strv_splitter_init(strv_splitter* s, strv sv, strv delims);
STRV_API void strv_splitter_init_str(strv_splitter* s, strv sv, const char* delims);
STRV_API strv_splitter strv_splitter_make(strv sv, strv delims);
STRV_API strv_splitter strv_splitter_make_str(strv sv, const char* delims);
STRV_API bool strv_splitter_get_next(strv_splitter* s, strv* res);
#endif /* RE_STRV_EXTENSION_H */
#ifdef STRV_IMPLEMENTATION
static bool char_is_alphanum(char c);
/* Returns true if match regex (.[a-zA-Z0-9_])+ */
static bool parse_email_ending(const char** cursor, const char* end);
static void eat_email_suffix(const char** cursor, const char* end);
/* Returns true if match regex [a-zA-Z0-9_]+ */
static bool parse_email_subdomain(const char** cursor, const char* end);
STRV_API int
strv_atoi(strv sv)
{
const char* str = sv.data;
size_t size = sv.size;
const char* end = str + size;
int result = 0;
int bool_is_negative = 0;
if (*str == '-') {
bool_is_negative = 1;
++str;
}
while (*str >= '0' && *str <= '9' && str < end) {
result = (result * 10) + (*str - '0');
++str;
}
if (bool_is_negative) {
result = -result;
}
return result;
}
STRV_API strv
strv_tok(strv sv, strv delims)
{
strv result = strv_make_from(sv.data, 0);
if (!sv.size) {
return result;
}
const char* start = sv.data;
const char* end = sv.data + sv.size;
/* Remove token on the left */
while (start < end
&& strv_contains_char(delims, start[0]))
{
start += 1;
}
if (start == end) {
return result;
}
/* Remove token on the right */
result.data = start;
while (start < end
&& !strv_contains_char(delims, start[0]))
{
start += 1;
result.size += 1;
}
return result;
}
STRV_API strv
strv_line_at(strv sv, size_t pos)
{
size_t left_index = pos;
size_t right_index = pos;
if (sv.size == 0)
{
return sv;
}
/* If we are on \n and the previous char is \r, we place ourself on the \r instead.
* Otherwise the \r will be considered as a previous new line even though it's
* within thesame line.
*/
if (sv.data[left_index] == '\n' && pos > 0 && sv.data[left_index - 1] == '\r')
{
left_index -= 1;
}
/* Look for the beginning of the string or the previous line. */
while (left_index > 0)
{
if (sv.data[left_index - 1] == '\n' || sv.data[left_index - 1] == '\r')
{
break;
}
left_index -= 1;
}
/* Go to next line ending if we are not already on one */
while (right_index < sv.size
&& sv.data[right_index] != '\n'
&& sv.data[right_index] != '\r')
{
right_index += 1;
}
/* We stopped right at the \r or \n or at the end of the line.
* If we are on a \r or \n we eat them because we want them in the result.
*/
if (right_index < sv.size
&& (sv.data[right_index] == '\r' || sv.data[right_index] == '\n'))
{
right_index += 1;
}
/* If we are on a \n and the previous char was a \r we eat it because want if to be part of the result. */
if (right_index < sv.size
&& sv.data[right_index - 1] == '\r'
&& sv.data[right_index] == '\n')
{
right_index += 1;
}
return strv_substr_from(sv, left_index, right_index - left_index);
}
STRV_API strv
strv_pop_line(strv* sv)
{
size_t pos = 0;
strv line = strv_line_at(*sv, pos);
if (line.size)
{
sv->data += line.size;
sv->size -= line.size;
}
return line;
}
STRV_API strv
strv_get_surrounding_lines(strv sv, size_t pos, size_t extra_lines_required, size_t* previous_line_count, size_t* next_line_count)
{
STRV_ASSERT(pos <= sv.size);
strv line = strv_line_at(sv, pos);
strv tmp;
size_t left_pos = line.data - sv.data;
size_t left_line_count = 0;
/* Get lines on the left. */
while (left_pos > 0
&& left_line_count < extra_lines_required)
{
tmp = strv_line_at(sv, left_pos - 1);
left_pos -= tmp.size;
left_line_count += 1;
}
size_t right_pos = line.data - sv.data + line.size;
size_t right_line_count = 0;
/* Get lines on the right. */
while (right_pos < sv.size
&& right_line_count < extra_lines_required)
{
tmp = strv_line_at(sv, right_pos + 1);
right_pos += tmp.size;
right_line_count += 1;
}
*previous_line_count = left_line_count;
*next_line_count = right_line_count;
return strv_substr_from(sv, left_pos, right_pos - left_pos);
}
STRV_API bool
strv_is_email(strv sv)
{
const char* cursor = sv.data;
const char* end = sv.data + sv.size;
/* advance to '@' if every character are valid */
eat_email_suffix(&cursor, end);
if (cursor >= end) return false;
if (*cursor != '@') return false; /* must contain '@' */
++cursor; /* skip '@' */
strv view = strv_make_from(cursor, end - cursor);
size_t first_dot = strv_find_char(view, '.');
if (first_dot == STRV_NPOS) return false; /* does not contains any dots */
const char* first_dot_ptr = cursor + first_dot;
if (!parse_email_subdomain(&cursor, first_dot_ptr)) return false;
if (cursor == end) return false; /* must not reach the end already */
if (!parse_email_ending(&cursor, end)) return false;
if (cursor < end) return false; /* must reach the end */
return true;
}
static bool
char_is_alphanum(char c)
{
return
(c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '1')
;
}
static bool
parse_email_ending(const char** cursor, const char* end)
{
const char* current = *cursor;
bool match = 0;
while (current < end)
{
if (*current != '.') return false; /* must contain '.' */
++current; /* skip '.' */
bool found_char = false;
while (current < end)
{
bool valid = !!(char_is_alphanum(*current)
|| strv_contains_char(strv_make_from_str("-"), *current))
;
if (!valid) {
break;
}
found_char = true;
match = true;
++current;
}
if (!found_char)
{
break;
}
}
*cursor = current;
return match;
}
static void
eat_email_suffix(const char** cursor, const char* end)
{
const char* current = *cursor;
while (current < end)
{
int valid = !!(char_is_alphanum(*current)
|| strv_contains_char(strv_make_from_str(".!#$%&’*+/=?^_`{|}~-"), *current))
;
if (!valid) {
break;
}
++current;
}
*cursor = current;
}
static bool
parse_email_subdomain(const char** cursor, const char* end)
{
const char* current = *cursor;
bool match = false;
while (current < end)
{
bool valid = !!(char_is_alphanum(*current)
|| strv_contains_char(strv_make_from_str("-"), *current))
;
if (!valid) {
break;
}
match = true;
++current;
}
*cursor = current;
return match;
}
/*-----------------------------------------------------------------------*/
/* strv_splitter */
/*-----------------------------------------------------------------------*/
STRV_API void
strv_splitter_init(strv_splitter* s, strv sv, strv delims)
{
s->str = sv;
s->delims = delims;
}
STRV_API void
strv_splitter_init_str(strv_splitter* s, strv sv, const char* delims)
{
s->str = sv;
s->delims = strv_make_from_str(delims);
}
STRV_API strv_splitter
strv_splitter_make(strv sv, strv delims)
{
strv_splitter s;
strv_splitter_init(&s, sv, delims);
return s;
}
STRV_API strv_splitter
strv_splitter_make_str(strv sv, const char* delims)
{
strv_splitter s;
strv_splitter_init_str(&s, sv, delims);
return s;
}
STRV_API bool
strv_splitter_get_next(strv_splitter* s, strv* res)
{
if (s->str.size > 0)
{
strv sv = strv_tok(s->str, s->delims);
s->str.size -= ((sv.data - s->str.data) + sv.size);
s->str.data = sv.data + sv.size;
/* If there are remaining char this means a delimiter has been found, in which case we skip it */
if (s->str.size)
{
s->str.size -= 1;
s->str.data += 1;
}
if (sv.size)
{
*res = sv;
return true;
}
}
return false;
}
#endif /* STRV_IMPLEMENTATION */