回滚暂时移除对UTF-8文本支持

undefined-ux · Dec 11, 2023 · de9825a · de9825a
1 parent 32461df
commit de9825a
Show file tree

Hide file tree

Showing 8 changed files with 95 additions and 96 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,11 @@
 .vs/
+.idea/
 x64/
 cmake-build-debug/
-*.pdb.idea/
 cmake-build-release/
 cmake-build-debug-mingw/
+*.pdb
+
+lcui-quick-start.zip
+package-lock.json
+-g/
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.27)
 
 PROJECT(JsonParser C)
 set(CMAKE_C_STANDARD 17)
+
+find_package(Iconv REQUIRED)
+
 add_library(jsonParserLib
         core/Json.h
         core/parser/parser.h
@@ -19,7 +22,7 @@ add_library(jsonParserLib
         core/utils/utf2gbk/UTF2GBK.c
         core/utils/utf2gbk/UTF2GBK.h
 )
-target_link_libraries(jsonParserLib PRIVATE iconv)
+target_link_libraries(jsonParserLib PRIVATE Iconv::Iconv)
 
 add_executable(json
         cli/main.c

diff --git a/cli/main.c b/cli/main.c
@@ -6,109 +6,100 @@
 #include "Json.h"
 
 
-// 结构体用于存储命令行参数的值
+// 结构体用于存储命令行参数的值
 struct CommandLineArgs {
-	FILE* input;         // 输入流
-	FILE* output;        // 输出流
-	int compress;        // 是否压缩
-	int format;          // 是否格式化
-    int utf8Text;         // 是否为utf-8 文本 是则需资源回收时删除中间文件
-    char* convertCacheFilePath; // 为utf-8文本时转换为gbk格式时生成的临时文件
-    char* outputFilePath; // 需要输出文件路径， 当为utf-8文本时用于转换回utf-8文本
-
+    FILE* input; // 输入流
+    FILE* output; // 输出流
+    int compress; // 是否压缩
+    int format; // 是否格式化
+    // int utf8Text; // 是否为utf-8 文本 是则需资源回收时删除中间文件
+    // char* convertCacheFilePath; // 为utf-8文本时转换为gbk格式时生成的临时文件
+    // char* outputFilePath; // 需要输出文件路径， 当为utf-8文本时用于转换回utf-8文本
 };
 
-// 函数声明
+// 函数声明
 struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]);
 
 int main(const int argc, char* argv[]) {
-	// setlocale(LC_ALL, ""); // 设置本地化环境以支持宽字符
+    // setlocale(LC_ALL, ""); // 设置本地化环境以支持宽字符
     const struct CommandLineArgs args = parseCommandLineArgs(argc, argv);
-    // 设置输入输出流
-    // 默认为标准输入输出
+    // 设置输入输出流
+    // 默认为标准输入输出
     setInputStream(args.input);
-	setOutputStream(args.output);
-	
-    //解析Json
+    setOutputStream(args.output);
+
+    //解析Json
     const struct JsonVal* json = parseValue();
-    if (args.compress) {
-        printJsonVal(json);
-    }
-    else if (args.format) {
-        printfJsonVal(json, 0);
-    }
+    if (args.compress) { printJsonVal(json); }
+    else if (args.format) { printfJsonVal(json, 0); }
 
     //destoryJsonVal(json);
-    if (args.input != stdin) {
-        fclose(args.input);
-    }
+    if (args.input != stdin) { fclose(args.input); }
     if (args.output != stdout) {
         fclose(args.output);
-        if(args.utf8Text) {
-            convertGbkToUtf8(args.output);
-        }
+        // if (args.utf8Text) { convertGbkToUtf8(args.output); }
     }
-	return 0;
+    return 0;
 }
 
-// 函数定义：解析命令行参数
+// 函数定义：解析命令行参数
 struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
     struct CommandLineArgs args;
 
-    // 初始化结构体成员
+    // 初始化结构体成员
     args.input = stdin;
     args.output = stdout;
     args.compress = 0;
     args.format = 1;
-    args.utf8Text = 0;
-    args.convertCacheFilePath = "";
-    // 标记是否已经出现了--format或--compress
+    // args.utf8Text = 0;
+    // args.convertCacheFilePath = "__cache.json";
+    // 标记是否已经出现了--format或--compress
     int formatSeen = 0;
     int compressSeen = 0;
-    // 解析命令行参数
+    // 解析命令行参数
     for (int i = 1; i < argc; ++i) {
         if (strcmp(argv[i], "--output") == 0 || strcmp(argv[i], "-of") == 0) {
-            // 指定输出流
+            // 指定输出流
             if (i + 1 < argc) {
                 printf("Output: %s\n", argv[i + 1]);
                 args.output = fopen(argv[i + 1], "w");
-                args.outputFilePath = argv[i+1];
+                // args.outputFilePath = argv[i + 1];
                 if (args.output == NULL) {
                     perror("Error opening output file");
                     exit(EXIT_FAILURE);
                 }
-                i++; // 跳过下一个参数，因为它是文件路径
+                i++; // 跳过下一个参数，因为它是文件路径
             }
             else {
                 fprintf(stderr, "Error: --output option requires a file path.\n");
                 exit(EXIT_FAILURE);
             }
         }
         else if (strcmp(argv[i], "--input") == 0 || strcmp(argv[i], "-if") == 0) {
-            // 指定输入流
+            // 指定输入流
             if (i + 1 < argc) {
-                FILE* f = fopen(argv[i + 1], "r");
-                args.convertCacheFilePath = "__cache.json";
-                if(isUtf8(f)) {
-                    printf("INFO: Is UTF-8 Text\n");
-                    args.input = convertUtf8ToGbk(f,args.convertCacheFilePath);
-                    args.utf8Text = 1;
-                }else {
-                    args.input = fopen(argv[i + 1], "r");
-                }
+                args.input = fopen(argv[i + 1], "r");
+                // FILE* f = fopen(argv[i + 1], "r");
+
+                // if (isUtf8(f)) {
+                //     printf("INFO: Is UTF-8 Text\n");
+                //     args.input = convertUtf8ToGbk(f);
+                //     args.utf8Text = 1;
+                // }
+                // else { args.input = fopen(argv[i + 1], "r"); }
                 if (args.input == NULL) {
                     perror("Error opening input file");
                     exit(EXIT_FAILURE);
                 }
-                i++; // 跳过下一个参数，因为它是文件路径
+                i++; // 跳过下一个参数，因为它是文件路径
             }
             else {
                 fprintf(stderr, "Error: --input option requires a file path.\n");
                 exit(EXIT_FAILURE);
             }
         }
         else if (strcmp(argv[i], "--compress") == 0 || strcmp(argv[i], "-c") == 0) {
-            // 压缩格式输出Json
+            // 压缩格式输出Json
             if (formatSeen) {
                 fprintf(stderr, "Error: --compress and --format cannot be used together.\n");
                 exit(EXIT_FAILURE);
@@ -118,7 +109,7 @@ struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
             compressSeen = 1;
         }
         else if (strcmp(argv[i], "--format") == 0 || strcmp(argv[i], "-f") == 0) {
-            // 格式化输出Json
+            // 格式化输出Json
             if (compressSeen) {
                 fprintf(stderr, "Error: --compress and --format cannot be used together.\n");
                 exit(EXIT_FAILURE);
@@ -127,23 +118,23 @@ struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
             formatSeen = 1;
         }
         else if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) {
-            printf("用法：json [选项]...\n");
-            printf("从输入中解析和格式化JSON数据，可选择压缩或格式化输出。\n\n");
+            printf("用法：json [选项]...\n");
+            printf("从输入中解析和格式化JSON数据，可选择压缩或格式化输出。\n\n");
 
-            printf("长选项的强制性参数对于短选项也是强制性的。\n");
-            printf("    -if, --input 指定输入文件（默认为标准输入）\n");
-            printf("    -of, --output 指定输出文件（默认为标准输出）\n");
-            printf("    -f, --format 使用树形缩进输出格式化的JSON\n");
-            printf("    -c, --compress 输出压缩的JSON\n");
-            printf("    -h, --help 显示此帮助并退出\n\n");
+            printf("长选项的强制性参数对于短选项也是强制性的。\n");
+            printf("    -if, --input 指定输入文件（默认为标准输入）\n");
+            printf("    -of, --output 指定输出文件（默认为标准输出）\n");
+            printf("    -f, --format 使用树形缩进输出格式化的JSON\n");
+            printf("    -c, --compress 输出压缩的JSON\n");
+            printf("    -h, --help 显示此帮助并退出\n\n");
 
-            printf("示例：\n");
+            printf("示例：\n");
             printf("    json -if input.json -of output.json -f\n");
             printf("    json --input=input.json --output=output.json --compress\n\n");
 
-            printf("如果未指定输入或输出文件，则程序将默认使用标准输入或标准输出。\n\n");
+            printf("如果未指定输入或输出文件，则程序将默认使用标准输入或标准输出。\n\n");
 
-            printf("注意：--compress 和 --format 选项不能同时使用。\n");
+            printf("注意：--compress 和 --format 选项不能同时使用。\n");
             exit(0);
         }
         else {
@@ -153,4 +144,4 @@ struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
     }
 
     return args;
-}
+}
diff --git a/core/parser/parser.c b/core/parser/parser.c
@@ -30,7 +30,7 @@ struct JsonVal* parseValue() {
 		ungetc(c, f);
 		return parseNumber();
 	}
-	if (c == '"' || c == '\'') return parseString();
+	if (c == '"' || c == '\'') return parseString(c);
 	fprintf(
 		stderr,
 		"Unexcepted token %c at %llu",
@@ -39,12 +39,12 @@ struct JsonVal* parseValue() {
 	exit(1);
 }
 
-struct JsonString* parseStringToStr() {
+struct JsonString* parseStringToStr(char token) {
 	char c;
 	struct JsonString* str = JsonString_New();
 	const size_t pos = ftell(f) / sizeof(char);
 
-	while ((c = fgetc(f)) && c != EOF && c != '"' && c != '\'') {
+	while ((c = fgetc(f)) && c != EOF && c != token) {
 		if (c == '\\') {
 			c = fgetc(f);
 			if (c == EOF) {
@@ -75,15 +75,15 @@ struct JsonString* parseStringToStr() {
 		else { JsonStringPushBackChar(c, str); }
 	}
 
-	if (c != '"' && c != '\'') {
-		fprintf(stderr, "Expected character \" or ', but got EOF.\tString value parse begin with %llu\n", pos);
+	if (c != token) {
+		fprintf(stderr, "Expected character %c, but got EOF.\tString value parse begin with %llu\n", token, pos);
 		exit(1);
 	}
 	return str;
 }
 
-struct JsonVal* parseString() {
-	struct JsonString* str = parseStringToStr();
+struct JsonVal* parseString(char token) {
+	struct JsonString* str = parseStringToStr(token);
 
 	struct JsonVal* res = malloc(sizeof(struct JsonVal));
 	if (res == NULL) {
@@ -234,7 +234,7 @@ struct JsonVal* parseObject() {
 
 	while ((c = fgetc(f)) != EOF && c != '}') {
 		if (c == ' ' || c == '\n' || c == '\r' || c == ',') continue;
-		if (c == '"' || c == '\'') keyVal = parseStringToStr();
+		if (c == '"' || c == '\'') keyVal = parseStringToStr(c);
 
 		else if (c == ':') {
 			const struct JsonVal* Val = parseValue();

diff --git a/core/parser/parser.h b/core/parser/parser.h
@@ -3,7 +3,7 @@
 
 
 void setInputStream(FILE* stream);
-struct JsonVal* parseString();
+struct JsonVal* parseString(char token);
 struct JsonVal* parseNumber();
 struct JsonVal* parseBool();
 struct JsonVal* parseNull();

diff --git a/core/utils/utf2gbk/UTF2GBK.c b/core/utils/utf2gbk/UTF2GBK.c
@@ -1,17 +1,21 @@
+#define _POSIX_C_SOURCE 200112L
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <iconv.h>
+#include <unistd.h>
+
+#define BUFFER_SIZE 4096
 
-FILE* convertUtf8ToGbk(FILE* input, const char* outputFileName) {
+FILE* convertUtf8ToGbk(FILE* input) {
     iconv_t cd;
+    FILE* output = tmpfile();
     cd = iconv_open("GBK", "UTF-8");
     if (cd == (iconv_t)-1) {
         perror("iconv_open");
         exit(EXIT_FAILURE);
     }
-
-    FILE* output = fopen(outputFileName, "w");
     if (output == NULL) {
         perror("fopen");
         exit(EXIT_FAILURE);
@@ -39,9 +43,9 @@ FILE* convertUtf8ToGbk(FILE* input, const char* outputFileName) {
 
     iconv_close(cd);
     fclose(input);
-    fclose(output);
+    rewind(output);
 
-    return fopen(outputFileName, "r");
+    return output;
 }
 
 void convertGbkToUtf8(FILE* file) {
@@ -92,9 +96,9 @@ void convertGbkToUtf8(FILE* file) {
     iconv_close(cd);
 
     // Reopen the original file for writing
-    file = fopen("converted_file.txt", "w");
+    file = freopen("converted_file.txt", "w", stdout);
     if (file == NULL) {
-        perror("fopen");
+        perror("freopen");
         exit(EXIT_FAILURE);
     }
 
@@ -109,26 +113,21 @@ void convertGbkToUtf8(FILE* file) {
 
 
 int isUtf8(FILE* file) {
-    rewind(file); // 将文件指针定位到文件开头
-
+    // 获取当前文件指针位置
+    long originalPosition = ftell(file);
+    int res = 0;
     // 读取文件的前三个字节
     char bom[3];
     size_t bytesRead = fread(bom, 1, 3, file);
-
-    // 如果文件小于3个字节，返回0
-    if (bytesRead < 3) {
-        rewind(file);
-        return 0;
-    }
+    fseek(file, originalPosition, SEEK_SET);
+    fwrite(bom, 1, bytesRead, file);
 
     // 判断是否为UTF-8 without BOM编码
     if (bom[0] == (char)0xEF && bom[1] == (char)0xBB && bom[2] == (char)0xBF) {
         // 文件包含BOM，不是UTF-8 without BOM编码
-        rewind(file);
-        return 0;
+        res = 0;
     } else {
-        // 文件不包含BOM，可能是UTF-8 without BOM编码
-        rewind(file);
-        return 1;
+        res = 1;
     }
-}
+    return  res;
+}
diff --git a/core/utils/utf2gbk/UTF2GBK.h b/core/utils/utf2gbk/UTF2GBK.h
@@ -4,6 +4,6 @@
 #pragma once
 #include <stdio.h>
 
-FILE* convertUtf8ToGbk(FILE* input, const char* outputFileName);
+FILE* convertUtf8ToGbk(FILE* input);
 void convertGbkToUtf8(FILE* input);
 int isUtf8(FILE* file);