Skip to content

Commit

Permalink
回滚 暂时移除对UTF-8文本支持
Browse files Browse the repository at this point in the history
  • Loading branch information
undefined-ux committed Dec 11, 2023
1 parent 32461df commit de9825a
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 96 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
.vs/
.idea/
x64/
cmake-build-debug/
*.pdb.idea/
cmake-build-release/
cmake-build-debug-mingw/
*.pdb

lcui-quick-start.zip
package-lock.json
-g/
1 change: 1 addition & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.27)

PROJECT(JsonParser C)
set(CMAKE_C_STANDARD 17)

find_package(Iconv REQUIRED)

add_library(jsonParserLib
core/Json.h
core/parser/parser.h
Expand All @@ -19,7 +22,7 @@ add_library(jsonParserLib
core/utils/utf2gbk/UTF2GBK.c
core/utils/utf2gbk/UTF2GBK.h
)
target_link_libraries(jsonParserLib PRIVATE iconv)
target_link_libraries(jsonParserLib PRIVATE Iconv::Iconv)

add_executable(json
cli/main.c
Expand Down
117 changes: 54 additions & 63 deletions cli/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,109 +6,100 @@
#include "Json.h"


// 结构体用于存储命令行参数的值
// 结构体用于存储命令行参数的值
struct CommandLineArgs {
FILE* input; // 输入流
FILE* output; // 输出流
int compress; // 是否压缩
int format; // 是否格式化
int utf8Text; // 是否为utf-8 文本 是则需资源回收时删除中间文件
char* convertCacheFilePath; // 为utf-8文本时转换为gbk格式时生成的临时文件
char* outputFilePath; // 需要输出文件路径, 当为utf-8文本时用于转换回utf-8文本

FILE* input; // 输入流
FILE* output; // 输出流
int compress; // 是否压缩
int format; // 是否格式化
// int utf8Text; // 是否为utf-8 文本 是则需资源回收时删除中间文件
// char* convertCacheFilePath; // 为utf-8文本时转换为gbk格式时生成的临时文件
// char* outputFilePath; // 需要输出文件路径, 当为utf-8文本时用于转换回utf-8文本
};

// 函数声明
// 函数声明
struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]);

int main(const int argc, char* argv[]) {
// setlocale(LC_ALL, ""); // 设置本地化环境以支持宽字符
// setlocale(LC_ALL, ""); // 设置本地化环境以支持宽字符
const struct CommandLineArgs args = parseCommandLineArgs(argc, argv);
// 设置输入输出流
// 默认为标准输入输出
// 设置输入输出流
// 默认为标准输入输出
setInputStream(args.input);
setOutputStream(args.output);
//解析Json
setOutputStream(args.output);

//解析Json
const struct JsonVal* json = parseValue();
if (args.compress) {
printJsonVal(json);
}
else if (args.format) {
printfJsonVal(json, 0);
}
if (args.compress) { printJsonVal(json); }
else if (args.format) { printfJsonVal(json, 0); }

//destoryJsonVal(json);
if (args.input != stdin) {
fclose(args.input);
}
if (args.input != stdin) { fclose(args.input); }
if (args.output != stdout) {
fclose(args.output);
if(args.utf8Text) {
convertGbkToUtf8(args.output);
}
// if (args.utf8Text) { convertGbkToUtf8(args.output); }
}
return 0;
return 0;
}

// 函数定义:解析命令行参数
// 函数定义:解析命令行参数
struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
struct CommandLineArgs args;

// 初始化结构体成员
// 初始化结构体成员
args.input = stdin;
args.output = stdout;
args.compress = 0;
args.format = 1;
args.utf8Text = 0;
args.convertCacheFilePath = "";
// 标记是否已经出现了--format或--compress
// args.utf8Text = 0;
// args.convertCacheFilePath = "__cache.json";
// 标记是否已经出现了--format或--compress
int formatSeen = 0;
int compressSeen = 0;
// 解析命令行参数
// 解析命令行参数
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "--output") == 0 || strcmp(argv[i], "-of") == 0) {
// 指定输出流
// 指定输出流
if (i + 1 < argc) {
printf("Output: %s\n", argv[i + 1]);
args.output = fopen(argv[i + 1], "w");
args.outputFilePath = argv[i+1];
// args.outputFilePath = argv[i + 1];
if (args.output == NULL) {
perror("Error opening output file");
exit(EXIT_FAILURE);
}
i++; // 跳过下一个参数,因为它是文件路径
i++; // 跳过下一个参数,因为它是文件路径
}
else {
fprintf(stderr, "Error: --output option requires a file path.\n");
exit(EXIT_FAILURE);
}
}
else if (strcmp(argv[i], "--input") == 0 || strcmp(argv[i], "-if") == 0) {
// 指定输入流
// 指定输入流
if (i + 1 < argc) {
FILE* f = fopen(argv[i + 1], "r");
args.convertCacheFilePath = "__cache.json";
if(isUtf8(f)) {
printf("INFO: Is UTF-8 Text\n");
args.input = convertUtf8ToGbk(f,args.convertCacheFilePath);
args.utf8Text = 1;
}else {
args.input = fopen(argv[i + 1], "r");
}
args.input = fopen(argv[i + 1], "r");
// FILE* f = fopen(argv[i + 1], "r");

// if (isUtf8(f)) {
// printf("INFO: Is UTF-8 Text\n");
// args.input = convertUtf8ToGbk(f);
// args.utf8Text = 1;
// }
// else { args.input = fopen(argv[i + 1], "r"); }
if (args.input == NULL) {
perror("Error opening input file");
exit(EXIT_FAILURE);
}
i++; // 跳过下一个参数,因为它是文件路径
i++; // 跳过下一个参数,因为它是文件路径
}
else {
fprintf(stderr, "Error: --input option requires a file path.\n");
exit(EXIT_FAILURE);
}
}
else if (strcmp(argv[i], "--compress") == 0 || strcmp(argv[i], "-c") == 0) {
// 压缩格式输出Json
// 压缩格式输出Json
if (formatSeen) {
fprintf(stderr, "Error: --compress and --format cannot be used together.\n");
exit(EXIT_FAILURE);
Expand All @@ -118,7 +109,7 @@ struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
compressSeen = 1;
}
else if (strcmp(argv[i], "--format") == 0 || strcmp(argv[i], "-f") == 0) {
// 格式化输出Json
// 格式化输出Json
if (compressSeen) {
fprintf(stderr, "Error: --compress and --format cannot be used together.\n");
exit(EXIT_FAILURE);
Expand All @@ -127,23 +118,23 @@ struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
formatSeen = 1;
}
else if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) {
printf("用法:json [选项]...\n");
printf("从输入中解析和格式化JSON数据,可选择压缩或格式化输出。\n\n");
printf("用法:json [选项]...\n");
printf("从输入中解析和格式化JSON数据,可选择压缩或格式化输出。\n\n");

printf("长选项的强制性参数对于短选项也是强制性的。\n");
printf(" -if, --input 指定输入文件(默认为标准输入)\n");
printf(" -of, --output 指定输出文件(默认为标准输出)\n");
printf(" -f, --format 使用树形缩进输出格式化的JSON\n");
printf(" -c, --compress 输出压缩的JSON\n");
printf(" -h, --help 显示此帮助并退出\n\n");
printf("长选项的强制性参数对于短选项也是强制性的。\n");
printf(" -if, --input 指定输入文件(默认为标准输入)\n");
printf(" -of, --output 指定输出文件(默认为标准输出)\n");
printf(" -f, --format 使用树形缩进输出格式化的JSON\n");
printf(" -c, --compress 输出压缩的JSON\n");
printf(" -h, --help 显示此帮助并退出\n\n");

printf("示例:\n");
printf("示例:\n");
printf(" json -if input.json -of output.json -f\n");
printf(" json --input=input.json --output=output.json --compress\n\n");

printf("如果未指定输入或输出文件,则程序将默认使用标准输入或标准输出。\n\n");
printf("如果未指定输入或输出文件,则程序将默认使用标准输入或标准输出。\n\n");

printf("注意:--compress 和 --format 选项不能同时使用。\n");
printf("注意:--compress 和 --format 选项不能同时使用。\n");
exit(0);
}
else {
Expand All @@ -153,4 +144,4 @@ struct CommandLineArgs parseCommandLineArgs(int argc, char* argv[]) {
}

return args;
}
}
16 changes: 8 additions & 8 deletions core/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ struct JsonVal* parseValue() {
ungetc(c, f);
return parseNumber();
}
if (c == '"' || c == '\'') return parseString();
if (c == '"' || c == '\'') return parseString(c);
fprintf(
stderr,
"Unexcepted token %c at %llu",
Expand All @@ -39,12 +39,12 @@ struct JsonVal* parseValue() {
exit(1);
}

struct JsonString* parseStringToStr() {
struct JsonString* parseStringToStr(char token) {
char c;
struct JsonString* str = JsonString_New();
const size_t pos = ftell(f) / sizeof(char);

while ((c = fgetc(f)) && c != EOF && c != '"' && c != '\'') {
while ((c = fgetc(f)) && c != EOF && c != token) {
if (c == '\\') {
c = fgetc(f);
if (c == EOF) {
Expand Down Expand Up @@ -75,15 +75,15 @@ struct JsonString* parseStringToStr() {
else { JsonStringPushBackChar(c, str); }
}

if (c != '"' && c != '\'') {
fprintf(stderr, "Expected character \" or ', but got EOF.\tString value parse begin with %llu\n", pos);
if (c != token) {
fprintf(stderr, "Expected character %c, but got EOF.\tString value parse begin with %llu\n", token, pos);
exit(1);
}
return str;
}

struct JsonVal* parseString() {
struct JsonString* str = parseStringToStr();
struct JsonVal* parseString(char token) {
struct JsonString* str = parseStringToStr(token);

struct JsonVal* res = malloc(sizeof(struct JsonVal));
if (res == NULL) {
Expand Down Expand Up @@ -234,7 +234,7 @@ struct JsonVal* parseObject() {

while ((c = fgetc(f)) != EOF && c != '}') {
if (c == ' ' || c == '\n' || c == '\r' || c == ',') continue;
if (c == '"' || c == '\'') keyVal = parseStringToStr();
if (c == '"' || c == '\'') keyVal = parseStringToStr(c);

else if (c == ':') {
const struct JsonVal* Val = parseValue();
Expand Down
2 changes: 1 addition & 1 deletion core/parser/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


void setInputStream(FILE* stream);
struct JsonVal* parseString();
struct JsonVal* parseString(char token);
struct JsonVal* parseNumber();
struct JsonVal* parseBool();
struct JsonVal* parseNull();
Expand Down
41 changes: 20 additions & 21 deletions core/utils/utf2gbk/UTF2GBK.c
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
#define _POSIX_C_SOURCE 200112L

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
#include <unistd.h>

#define BUFFER_SIZE 4096

FILE* convertUtf8ToGbk(FILE* input, const char* outputFileName) {
FILE* convertUtf8ToGbk(FILE* input) {
iconv_t cd;
FILE* output = tmpfile();
cd = iconv_open("GBK", "UTF-8");
if (cd == (iconv_t)-1) {
perror("iconv_open");
exit(EXIT_FAILURE);
}

FILE* output = fopen(outputFileName, "w");
if (output == NULL) {
perror("fopen");
exit(EXIT_FAILURE);
Expand Down Expand Up @@ -39,9 +43,9 @@ FILE* convertUtf8ToGbk(FILE* input, const char* outputFileName) {

iconv_close(cd);
fclose(input);
fclose(output);
rewind(output);

return fopen(outputFileName, "r");
return output;
}

void convertGbkToUtf8(FILE* file) {
Expand Down Expand Up @@ -92,9 +96,9 @@ void convertGbkToUtf8(FILE* file) {
iconv_close(cd);

// Reopen the original file for writing
file = fopen("converted_file.txt", "w");
file = freopen("converted_file.txt", "w", stdout);
if (file == NULL) {
perror("fopen");
perror("freopen");
exit(EXIT_FAILURE);
}

Expand All @@ -109,26 +113,21 @@ void convertGbkToUtf8(FILE* file) {


int isUtf8(FILE* file) {
rewind(file); // 将文件指针定位到文件开头

// 获取当前文件指针位置
long originalPosition = ftell(file);
int res = 0;
// 读取文件的前三个字节
char bom[3];
size_t bytesRead = fread(bom, 1, 3, file);

// 如果文件小于3个字节,返回0
if (bytesRead < 3) {
rewind(file);
return 0;
}
fseek(file, originalPosition, SEEK_SET);
fwrite(bom, 1, bytesRead, file);

// 判断是否为UTF-8 without BOM编码
if (bom[0] == (char)0xEF && bom[1] == (char)0xBB && bom[2] == (char)0xBF) {
// 文件包含BOM,不是UTF-8 without BOM编码
rewind(file);
return 0;
res = 0;
} else {
// 文件不包含BOM,可能是UTF-8 without BOM编码
rewind(file);
return 1;
res = 1;
}
}
return res;
}
2 changes: 1 addition & 1 deletion core/utils/utf2gbk/UTF2GBK.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
#pragma once
#include <stdio.h>

FILE* convertUtf8ToGbk(FILE* input, const char* outputFileName);
FILE* convertUtf8ToGbk(FILE* input);
void convertGbkToUtf8(FILE* input);
int isUtf8(FILE* file);

0 comments on commit de9825a

Please sign in to comment.