Skip to content

Commit

Permalink
Refactor myescapedstrcpy to mysubstr, with correct escaping detection…
Browse files Browse the repository at this point in the history
…, unicode detection. Update test_substring test
  • Loading branch information
ppomes committed Sep 9, 2024
1 parent d0aaa41 commit c05a16f
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 22 deletions.
63 changes: 50 additions & 13 deletions main/myanon.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <string.h>
#include <getopt.h>
#include <sys/time.h>
#include <stdint.h>

#include "config.h"
#include "uthash.h"
Expand Down Expand Up @@ -77,31 +78,66 @@ char *mystrcpy(char *dest, const char *src, size_t size)
return dest;
}

char *myescapedstrcpy(char *dest, const char *src, size_t size)
static int is_escape_char(char c)
{
return c == '\\';
}

static inline int is_utf8_continuation(unsigned char c) {
return (c & 0xC0) == 0x80;
}

static size_t utf8_char_length(unsigned char c) {
if ((c & 0x80) == 0) return 1;
if ((c & 0xE0) == 0xC0) return 2;
if ((c & 0xF0) == 0xE0) return 3;
if ((c & 0xF8) == 0xF0) return 4;
return 0; // Invalid UTF-8 start byte
}

char *mysubstr(char *dest, const char *src, size_t dst_size, size_t num_chars)
{
size_t srccount = 0;
size_t dstcount = 0;
short backslash = 0;
size_t copied_chars = 0;
memset(dest, 0, dst_size);

memset(dest, 0, size);
while (src[srccount] != '\0' && dstcount < size - 1)
while (src[srccount] != '\0' && dstcount < dst_size - 1 && copied_chars < num_chars)
{
if (src[srccount] == '\\')
if (is_escape_char(src[srccount]))
{
backslash++;

if (backslash % 2 == 0)
if (src[srccount + 1] != '\0')
{
backslash = 0;
dest[dstcount++] = src[srccount++];
dest[dstcount++] = src[srccount++];
copied_chars++;
}
else
{
break;
}
}
else
{
dest[dstcount++] = src[srccount++];
size_t char_length = utf8_char_length(src[srccount]);
if (char_length == 0 || srccount + char_length > strlen(src))
{
break;
}
if (dstcount + char_length <= dst_size - 1)
{
for (size_t i = 0; i < char_length; i++)
{
dest[dstcount++] = src[srccount++];
}
copied_chars++;
}
else
{
break;
}
}
}

return dest;
}

Expand Down Expand Up @@ -230,8 +266,9 @@ anonymized_res_st anonymize_token(bool quoted, anon_base_st *config, char *token
break;
case AM_SUBSTRING:
res_st.len = MIN(worktokenlen, config->len);
DEBUG_MSG("%d, %d, %d", worktokenlen, config->len, res_st.len)
myescapedstrcpy((char *)&(res_st.data[0]), worktoken, res_st.len + 1);
mysubstr((char *)&(res_st.data[0]), worktoken, sizeof(res_st.data), res_st.len);
res_st.len = strlen(&(res_st.data[0]));
DEBUG_MSG("%d, %d, %d, %s\n", worktokenlen, config->len, res_st.len, res_st.data);
break;

#ifdef HAVE_PYTHON
Expand Down
2 changes: 1 addition & 1 deletion main/myanon.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ EXTERN unsigned long anon_time;

/* some safe malloc/strpcy wrappers */
void *mymalloc(size_t size);
char *myescapedstrcpy(char *dest, const char *src, size_t size);
char *mystrcpy(char *dest, const char *src, size_t size);
char *mysubstr(char *dst, const char *src, size_t dst_size, size_t num_chars);

/* function to anonymize a single field 'token' which length is 'tokenlen'
* anonymizaton config for this field is *config */
Expand Down
8 changes: 4 additions & 4 deletions tests/test_substring.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- MySQL dump 10.13 Distrib 8.0.37, for Linux (x86_64)
-- MySQL dump 10.13 Distrib 8.0.39, for Linux (x86_64)
--
-- Host: localhost Database: test_substring
-- ------------------------------------------------------
-- Server version 8.0.37-0ubuntu0.24.04.1
-- Server version 8.0.39-0ubuntu0.24.04.2

/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
Expand Down Expand Up @@ -33,7 +33,7 @@ CREATE TABLE `minimal_table` (

LOCK TABLES `minimal_table` WRITE;
/*!40000 ALTER TABLE `minimal_table` DISABLE KEYS */;
INSERT INTO `minimal_table` VALUES ('minimaal zo veel'),('🥳'),(' test escape');
INSERT INTO `minimal_table` VALUES ('minimaal zo veel'),('🥳🥳🥳🥳🥳🥳🥳🥳🥳🥳'),(' test with tab'),('\n\n'),('\0\0\0\0\0\0\0\0');
/*!40000 ALTER TABLE `minimal_table` ENABLE KEYS */;
UNLOCK TABLES;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
Expand All @@ -46,4 +46,4 @@ UNLOCK TABLES;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;

-- Dump completed on 2024-08-27 14:58:38
-- Dump completed on 2024-09-08 20:19:25
8 changes: 4 additions & 4 deletions tests/test_substring_anon.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- MySQL dump 10.13 Distrib 8.0.37, for Linux (x86_64)
-- MySQL dump 10.13 Distrib 8.0.39, for Linux (x86_64)
--
-- Host: localhost Database: test_substring
-- ------------------------------------------------------
-- Server version 8.0.37-0ubuntu0.24.04.1
-- Server version 8.0.39-0ubuntu0.24.04.2

/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
Expand Down Expand Up @@ -33,7 +33,7 @@ CREATE TABLE `minimal_table` (

LOCK TABLES `minimal_table` WRITE;
/*!40000 ALTER TABLE `minimal_table` DISABLE KEYS */;
INSERT INTO `minimal_table` VALUES ('minim'),('🥳'),(' test');
INSERT INTO `minimal_table` VALUES ('minim'),('🥳🥳🥳🥳🥳'),(' test'),('\n\n'),('\0\0\0\0\0');
/*!40000 ALTER TABLE `minimal_table` ENABLE KEYS */;
UNLOCK TABLES;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
Expand All @@ -46,4 +46,4 @@ UNLOCK TABLES;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;

-- Dump completed on 2024-08-27 14:58:38
-- Dump completed on 2024-09-08 20:19:25

0 comments on commit c05a16f

Please sign in to comment.