diff --git a/.github/ISSUE_TEMPLATE/workflows/book.yml b/.github/ISSUE_TEMPLATE/workflows/book.yml new file mode 100644 index 0000000..55e548d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/workflows/book.yml @@ -0,0 +1,32 @@ +name: Book + +on: + push: + branches: + - release + paths: + - 'docs/**' + +jobs: + deploy_en: + name: Deploy book on gh-pages + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install mdBook + uses: peaceiris/actions-mdbook@v1 + - name: Render book + run: | + mdbook build -d gh-pages docs/zh-CN + mkdir gh-pages + mv docs/zh-CN/gh-pages gh-pages/zh-CN + - name: Deploy + uses: peaceiris/actions-gh-pages@v2.5.1 + with: + emptyCommits: true + keepFiles: false + env: + ACTIONS_DEPLOY_KEY: ${{ secrets.ACTIONS_DEPLOY_KEY }} + PUBLISH_BRANCH: gh-pages + PUBLISH_DIR: gh-pages \ No newline at end of file diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml new file mode 100644 index 0000000..ff3f0c9 --- /dev/null +++ b/.github/workflows/book.yml @@ -0,0 +1,34 @@ +name: Book + +on: + push: + branches: + - release + paths: + - 'docs/**' + +jobs: + deploy_en: + name: Deploy book on gh-pages + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install mdBook + uses: peaceiris/actions-mdbook@v1 + - name: Render book + run: | + mdbook build -d gh-pages docs/zh-CN + mkdir gh-pages + mv docs/zh-CN/gh-pages gh-pages/zh-CN + mv docs/index.html gh-pages + + - name: Deploy + uses: peaceiris/actions-gh-pages@v2.5.1 + with: + emptyCommits: true + keepFiles: false + env: + ACTIONS_DEPLOY_KEY: ${{ secrets.ACTIONS_DEPLOY_KEY }} + PUBLISH_BRANCH: gh-pages + PUBLISH_DIR: gh-pages diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..88bbf62 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,39 @@ + + +on: + push: + tags: + - 'v*' + +name: release + +jobs: + build: + name: Upload Release Asset + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Build project # This would actually build your project, using zip for an example artifact + run: | + cargo build --release + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.ACTIONS_DEPLOY_KEY }} + with: + tag_name: ${{ github.ref }} + release_name: Release ${{ github.ref }} + draft: false + prerelease: false + - name: Upload Release Asset + id: upload-release-asset + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.ACTIONS_DEPLOY_KEY }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps + asset_path: ./target/release/chubaodb + asset_name: chubaodb-linux + asset_content_type: application/zip diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..3c13d1b --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/README.md b/README.md index 7261a86..772d538 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ As a scalable non-relational structured data infrastructure, ChubaoDB has severa * rich indexes for efficient search + ## External Interface collection, document, field diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..ddf77d6 --- /dev/null +++ b/docs/index.html @@ -0,0 +1 @@ + 中文文档 \ No newline at end of file diff --git a/docs/zh-CN/.gitignore b/docs/zh-CN/.gitignore new file mode 100644 index 0000000..7585238 --- /dev/null +++ b/docs/zh-CN/.gitignore @@ -0,0 +1 @@ +book diff --git a/docs/zh-CN/book.toml b/docs/zh-CN/book.toml new file mode 100644 index 0000000..db0559d --- /dev/null +++ b/docs/zh-CN/book.toml @@ -0,0 +1,6 @@ +[book] +authors = ["AnsjSun"] +language = "cn" +multilingual = false +src = "src" +title = "chubaodb 白皮书" diff --git a/docs/zh-CN/src/SUMMARY.md b/docs/zh-CN/src/SUMMARY.md new file mode 100644 index 0000000..3757e45 --- /dev/null +++ b/docs/zh-CN/src/SUMMARY.md @@ -0,0 +1,16 @@ +# Summary + +- [介绍](./introduction.md) +- [编译与安装](./install.md) + - [配置文件](./config.md) + - [集群模式](./cluster.md) +- [元数据管理](./master.md) + - [库表管理](./collection.md) +- [数据操作](./document.md) + - [CRUD](./crud.md) + - [搜索](./search.md) + - [聚合](./aggregation.md) + - [向量](./vector.md) +- [设计](./design.md) + - [架构](./architecture.md) + - [计划](./plan.md) diff --git a/docs/zh-CN/src/aggregation.md b/docs/zh-CN/src/aggregation.md new file mode 100644 index 0000000..a2c89c1 --- /dev/null +++ b/docs/zh-CN/src/aggregation.md @@ -0,0 +1,127 @@ +# 聚合那些事儿 + +下面我们来介绍一下chubaodb的聚合功能,在这之前,你确信已经通过[库表管理](*./collection.md*)创建了表。 + +我们先插入一些测试数据吧,先创建5个人 + +```` +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "张三", + "age": 20, + "birthday": "2000-02-02", + "description": "zhangsan can use java good at php, but python not good at", + "skills": ["java", "php", "python"] +} +' "http://127.0.0.1:8080/put/person/1" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "李四", + "age": 30, + "birthday": "1990-02-02", + "description": "lisi can use java ,only java", + "skills": ["java"] +} +' "http://127.0.0.1:8080/put/person/2" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "王五", + "age": 20, + "birthday": "2000-03-20", + "description": "wangwu is c++ rust good at!", + "skills": ["c++", "rust"] +} +' "http://127.0.0.1:8080/put/person/3" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "牛六", + "age": 35, + "birthday": "1985-12-02", + "description": "niuliu age too old", + "skills": ["java", "golang", "python", "rust"] +} +' "http://127.0.0.1:8080/put/person/4" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "赵七", + "age": 18, + "birthday": "2002-03-12", + "description": "is a web user, he can use ruby and php", + "skills": ["php", "ruby"] +} +' "http://127.0.0.1:8080/put/person/5" +```` + + + +插入完成后,我们通过search接口可以查看到这五个人 `http://127.0.0.1:8080/search/person` + + + +我们先了解下聚合的真谛。 我们从一句大家都熟悉的sql 入手吧 + +`select age,count(age) from person group by age` + +这个聚合函数可以堪称两个部分。分别是 `group by ` 分组,和 `count(*)` 指标连个部分。所以聚合份两种,一个是分组,一个和统计。 + +ok,现在让我们看看在`chubaodb`中这两部分是如何完成的。我们通过一个例子入手 + +`http://127.0.0.1:8080/agg/person?group=term(age)&fun=stats(age)` + +* agg 是方法的操作 +* group= 是聚合方法。例子中我们用的term 方式进行分组。 +* fun = 是指标方法。例子中我们统计 max ,min 等 + +![image-20200715135803824](image/image-20200715135803824.png) + + + +好的恭喜你已经看到结果了。默认是按照value中的count 进行排序 + + + +### 目前group 支持的方法有 + +`term(name)` 按照字段进行聚合 + +`date(name,format)` example: date(birthday, yyyy-MM) 按照时间格式进行聚合 + +`range(name, range_str)` example:*range(age,-0,0-20,20-30,30-40,40-)* + + + +### Fun 支持的方法有 + +`hits(size)`:每个分组中返回size个文档,无排序功能 + +`stats(name)`: 每个分组中,count max, min , sum missing 的个数 + +* fun 可以为空,如果为空则默认为count方式 + + + +现在让我们自举一些例子: + +### 按照年龄分组,分别显示每个年龄中的人 + + ![image-20200715140655966](image/image-20200715140655966.png) + +### 照技能分组,显示每个技能的人数 + +`http://127.0.0.1:8080/agg/person?group=term(skills)` + +![image-20200715140812082](image/image-20200715140812082.png) + +### 按照技能分组,显示每个技能的人数,按照技能名称排序 + +`http://127.0.0.1:8080/agg/person?group=term(skills)&sort=key:asc` + +![image-20200715140902639](image/image-20200715140902639.png) + +### 按照年龄和技能分组查看人数 + +![image-20200715141018443](image/image-20200715141018443.png) \ No newline at end of file diff --git a/docs/zh-CN/src/architecture.md b/docs/zh-CN/src/architecture.md new file mode 100644 index 0000000..c503cff --- /dev/null +++ b/docs/zh-CN/src/architecture.md @@ -0,0 +1 @@ +# 关于架构 diff --git a/docs/zh-CN/src/cluster.md b/docs/zh-CN/src/cluster.md new file mode 100644 index 0000000..373f8f6 --- /dev/null +++ b/docs/zh-CN/src/cluster.md @@ -0,0 +1,3 @@ +# 集群模式 + +编造ing...... diff --git a/docs/zh-CN/src/collection.md b/docs/zh-CN/src/collection.md new file mode 100644 index 0000000..30ccbe7 --- /dev/null +++ b/docs/zh-CN/src/collection.md @@ -0,0 +1,60 @@ +# 库表管理 + +下面我们会对`创建表`,`查询表`,`删除表` 进行演示。目前表结构一旦创建不支持修改。 +打开master的管理地址 http://127.0.0.1:7070 + +### 创建表 + +通过 `collectionCreate`接口进行创建 + +![image-20200715112907536](image/image-20200715112907536.png) + +* Name 是表名称, +* partitionNum 是这个表分多少个分片。分片多会提高插入的并发能力,但是会降低搜索效率,并非越多或者越少越好 +* partitionReplicaNum 是每个分片多少个副本。建议要么1,要么3+ 。在传统分布式系统环境,可以设置为3,单机版智能设置1.partitionReplicaNum 必须小于等于你的机器个数 +* Fields 是这个表里面的字段。我们提供了 `int`, `float`, `string`, `text`, `vector`, `date` 几种字段格式,注意 text 和string的区别是。text是全文检索,比如 `中国银行` 搜索`中国`是会被召回的, `string`的话必须输入完整的 匹配。 + + + +下面我们创建一个人物表,包含 `ID`, `姓名`, `年龄` ,`生日` , `简介` ,`技能` 几个字段, + +```` +mutation{ + collectionCreate( + name:"person", + partitionNum:1, + partitionReplicaNum:1 + fields:{ + int:[{name:"age", value:true }] + string:[{name:"name" }, {name:"skills", value:true, array:true}] + date:[{name:"birthday", value:true }] + text:[{name:"description"}] + } + + ) +} +```` + +![image-20200715115437856](image/image-20200715115437856.png) + + + +出现如下结构意味着创建表成功了。每种类型有自己的参数大家可以参阅iql的文档。 + + + +### 查询表 + +![image-20200715115617185](image/image-20200715115617185.png) + + + + + +删除表 + +![image-20200715115703796](image/image-20200715115703796.png) + + + +ok 你已经具备了元数据管理的基本技能。 \ No newline at end of file diff --git a/docs/zh-CN/src/config.md b/docs/zh-CN/src/config.md new file mode 100644 index 0000000..f650080 --- /dev/null +++ b/docs/zh-CN/src/config.md @@ -0,0 +1,156 @@ +# 配置文件 + +前面我们已经学会了,如果以最基本的方式启动chubaodb。下面我们来学一种非常高级的用法,通过配置文件和启动参数来启动chubaodb。在学这个之前我们可能需要了简单解下chubaodb的架构,chubaodb份了三个角色`master`,`pserver`,`router`. + +* master 是集群管理,元数据管理的模块。表结构,数据均衡,failover 等 +* pserver 是数据存储及计算节点。 +* router 是提供了restful的数据查询组件。router 是无状态的。 + +下面是一个简单的config例子。也是默认config的参数 + +```` +[global] +# the name will validate join cluster by same name +name = "chubaodb" +# your server ip for connect, you can use -i in rags to set it +# ip = "127.0.0.1" +# log path , If you are in a production environment, You'd better set absolute paths +log = "logs/" +# default log type for any model +log_level = "info" +# log file size for rolling +log_limit_bytes = 128000000 +# number of reserved log files +log_file_count = 10 +# Whether to use distributed storage. If it's true, there's only one +shared_disk = false + +[router] +# port for server +http_port = 8080 + +[ps] +#set zone num, default is 0 +zone = "default" +# you data save to disk path ,If you are in a production environment, You'd better set absolute paths +data = "data/" +# port for server +rpc_port = 9090 +# how often to refresh the index +flush_sleep_sec = 3 + [ps.raft] + heartbeat_port = 10030 + replicate_port = 10031 + # how size of num for memory + log_max_num = 200000 + # how size of num for memory + log_min_num = 100000 + # how size of num for memory + log_file_size_mb = 128 + # Three without a heartbeat , follower to begin consecutive elections + heartbeate_ms = 500 + + +[[masters]] +# master ip for service +ip = "127.0.0.1" +# port for server +http_port = 7070 +# master data path for meta +data = "data/meta/" +```` + +可以发现我们把 三个模块写到了同一个配置文件。同时各个节点通过参数来选择启动的模块。实现了一个配置文件走天下的易用功能。 + +可以通过 `./chubaodb --help` 获得参数说明 + +```` +(base) ➜ release git:(async-std) ✗ ./chubaodb --help +chubaodb 0.1.0 +hello index world + +USAGE: + chubaodb [SUBCOMMAND] + +FLAGS: + -h, --help Prints help information + -V, --version Prints version information + +SUBCOMMANDS: + all + help Prints this message or the help of the given subcommand(s) + master + ps + router +```` + +启动可以通过 `./chubaodb all --help ` , `./chubaodb ps --help ` .....来获取更多。参数说明 + +* 比如我们想把三个模块在一个进程中启动 那么就是. `./chubaodb all -c ../../config/config.toml` . + +```` +./chubaodb all -c ../../config/config.toml +load config by path: ../../config/config.toml +2020-07-15 11:05:39 - INFO - chubaodb::util::config(189) - log init ok +2020-07-15 11:05:39 - INFO - chubaodb(149) - All ChubaoDB servers were started successfully! +2020-07-15 11:05:39 - INFO - chubaodb::router::server(29) - router is listening on http://0.0.0.0:8080 +2020-07-15 11:05:39 - INFO - actix_server::builder(262) - Starting 8 workers +2020-07-15 11:05:39 - INFO - chubaodb::util::http_client(21) - send get for url:http://127.0.0.1:7070/my_ip +2020-07-15 11:05:39 - INFO - surf::middleware::logger::native(119) - sending request +2020-07-15 11:05:39 - INFO - actix_server::builder(276) - Starting "actix-web-service-0.0.0.0:8080" service on 0.0.0.0:8080 +2020-07-15 11:05:39 - WARN - isahc::handler(209) - request completed with error [id=AtomicCell { value: 0 }]: ConnectFailed: failed to connect to the server +2020-07-15 11:05:39 - ERROR - chubaodb::pserver::server(41) - got ip from master has err:Error(InternalErr, "ConnectFailed: failed to connect to the server") +2020-07-15 11:05:39 - INFO - chubaodb::master::server(43) - master listening on http://0.0.0.0:7070 +2020-07-15 11:05:39 - INFO - actix_server::builder(262) - Starting 8 workers +2020-07-15 11:05:39 - INFO - actix_server::builder(276) - Starting "actix-web-service-0.0.0.0:7070" service on 0.0.0.0:7070 +2020-07-15 11:05:40 - INFO - chubaodb::util::http_client(21) - send get for url:http://127.0.0.1:7070/my_ip +2020-07-15 11:05:40 - INFO - surf::middleware::logger::native(119) - sending request +2020-07-15 11:05:40 - INFO - chubaodb::master::server(440) - success_response [Object({"ip": String("127.0.0.1")})] +2020-07-15 11:05:40 - INFO - surf::middleware::logger::native(119) - request completed +2020-07-15 11:05:40 - INFO - chubaodb::pserver::server(36) - got my ip:127.0.0.1 from master +2020-07-15 11:05:40 - INFO - chubaodb::util::http_client(51) - send post for url:http://127.0.0.1:7070/pserver/register +2020-07-15 11:05:40 - INFO - surf::middleware::logger::native(119) - sending request +2020-07-15 11:05:40 - INFO - chubaodb::master::server(301) - prepare to heartbeat with address 127.0.0.1:9090, zone default +2020-07-15 11:05:40 - INFO - chubaodb::master::server(440) - success_response [PServer { id: Some(1), addr: "127.0.0.1:9090", write_partitions: [], zone: "default", modify_time: 0 }] +2020-07-15 11:05:40 - INFO - surf::middleware::logger::native(119) - request completed +2020-07-15 11:05:40 - INFO - chubaodb::pserver::service(111) - register to master ok: node_id:Some(1) +2020-07-15 11:05:40 - INFO - chubaodb::pserver::service(126) - register server line:PServer { id: Some(1), addr: "127.0.0.1:9090", write_partitions: [], zone: "default", modify_time: 0 } +2020-07-15 11:05:40 - INFO - chubaodb::pserver::server(59) - init pserver OK use time:Ok(5.333ms) +```` + +* 比如我们只启动 router 那么就是. `./chubaodb router -c ../../config/config.toml` . + +```` +(base) ➜ release git:(async-std) ✗ ./chubaodb router -c ../../config/config.toml +load config by path: ../../config/config.toml +2020-07-15 11:01:59 - INFO - chubaodb::util::config(189) - log init ok +2020-07-15 11:01:59 - INFO - chubaodb(149) - All ChubaoDB servers were started successfully! +2020-07-15 11:01:59 - INFO - chubaodb::router::server(29) - router is listening on http://0.0.0.0:8080 +2020-07-15 11:01:59 - INFO - actix_server::builder(262) - Starting 8 workers +2020-07-15 11:01:59 - INFO - actix_server::builder(276) - Starting "actix-web-service-0.0.0.0:8080" service on 0.0.0.0:8080 +```` + +* 比如我们只启动 master 那么就是. `./chubaodb master -c ../../config/config.toml` . + +```` +load config by path: ../../config/config.toml +2020-07-15 11:03:11 - INFO - chubaodb::util::config(189) - log init ok +2020-07-15 11:03:11 - INFO - chubaodb(149) - All ChubaoDB servers were started successfully! +2020-07-15 11:03:11 - INFO - chubaodb::master::server(43) - master listening on http://0.0.0.0:7070 +2020-07-15 11:03:11 - INFO - actix_server::builder(262) - Starting 8 workers +2020-07-15 11:03:11 - INFO - actix_server::builder(276) - Starting "actix-web-service-0.0.0.0:7070" service on 0.0.0.0:7070 +^C2020-07-15 11:03:12 - INFO - actix_server::builder(321) - SIGINT received, exiting +```` + + +* 比如我们只启动 pserver 那么就是. `./chubaodb ps -c ../../config/config.toml` . ps会通过你配置文件中master的配置自动去master 注册为数据节点 + +```` +load config by path: ../../config/config.toml +2020-07-15 11:03:45 - INFO - chubaodb::util::config(189) - log init ok +2020-07-15 11:03:45 - INFO - chubaodb(149) - All ChubaoDB servers were started successfully! +2020-07-15 11:03:45 - INFO - chubaodb::util::http_client(21) - send get for url:http://127.0.0.1:7070/my_ip +2020-07-15 11:03:45 - INFO - surf::middleware::logger::native(119) - sending request +2020-07-15 11:03:45 - WARN - isahc::handler(209) - request completed with error [id=AtomicCell { value: 0 }]: ConnectFailed: failed to connect to the server +```` + diff --git a/docs/zh-CN/src/crud.md b/docs/zh-CN/src/crud.md new file mode 100644 index 0000000..88451de --- /dev/null +++ b/docs/zh-CN/src/crud.md @@ -0,0 +1,74 @@ +# crud那些事儿 + +在[库表管理](./collection.md)里我们学习了如何创建一个表。下面让我们对这个表进行数据的增删改查。 + +在这之前我们了解下插入数据的姿势。 + +* `put` 代表不管有没有都插进去。document的version归为1 +* `create` 必须没有,如果有报错,document的version为1 +* `update` 必须存在,如果不存在就报错, document的version 递增+1 +* `upsert` 有则走`update`逻辑,没有则走`create`逻辑 + +好了你已经学会了存储的真谛,让我们来试着插入一条数据吧!注意数据操作是在 router 上进行,也就是默认的`8080`端口上。 + +## put + +```` +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "张三", + "age": 20, + "birthday": "2000-02-02", + "description": "一个用来测试的倒霉蛋", + "skills": ["java", "php", "python"] +} +' "http://127.0.0.1:8080/put/person/1" +```` +看到如下 + +![image-20200715124902323](image/image-20200715124902323.png) + +代表插入成功! + +* `http://127.0.0.1:8080/put/person/1` 地址中`person` 是我们创建的表名称, 1 位当前用户的唯一id。字符串格式。(ps:id 还有一种方式,双keyid。这是一种很高级的做法,后门我们会对次情况单门一章来说明) + +我们可以通过get 接口来获取这条数据 http://127.0.0.1:8080/get/person/1 + + + +![image-20200715125145828](image/image-20200715125145828.png) + +## update + +比如我们尝试更新张三的技能增加rust 通过如下方式 + +```` +curl -H "Content-Type: application/json" -XPOST -d' +{ + "skills": ["java", "php", "python","rust"] +} +' "http://127.0.0.1:8080/update/person/1" +```` + + + +![image-20200715125335276](image/image-20200715125335276.png) + +我们可以开心的看到张三学会了`rust` 并且 version 改为了2. + + + +## delete + +通过 + +``` +curl -XDELETE http://127.0.0.1:8080/delete/person/1 +{"code":200,"message":"success"} +``` + +可以删除张三这条记录。我们看看删除后再做get会得到什么 + +![image-20200715125658865](image/image-20200715125658865.png) + +嗯。很好本章结束了! \ No newline at end of file diff --git a/docs/zh-CN/src/design.md b/docs/zh-CN/src/design.md new file mode 100644 index 0000000..925a8be --- /dev/null +++ b/docs/zh-CN/src/design.md @@ -0,0 +1 @@ +# 设计 diff --git a/docs/zh-CN/src/document.md b/docs/zh-CN/src/document.md new file mode 100644 index 0000000..b7e43d8 --- /dev/null +++ b/docs/zh-CN/src/document.md @@ -0,0 +1 @@ +# 数据操作 diff --git a/docs/zh-CN/src/image/image-20200715111140350.png b/docs/zh-CN/src/image/image-20200715111140350.png new file mode 100644 index 0000000..f1717ed Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715111140350.png differ diff --git a/docs/zh-CN/src/image/image-20200715111454748.png b/docs/zh-CN/src/image/image-20200715111454748.png new file mode 100644 index 0000000..570ff47 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715111454748.png differ diff --git a/docs/zh-CN/src/image/image-20200715111749876.png b/docs/zh-CN/src/image/image-20200715111749876.png new file mode 100644 index 0000000..58fc9fd Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715111749876.png differ diff --git a/docs/zh-CN/src/image/image-20200715111932528.png b/docs/zh-CN/src/image/image-20200715111932528.png new file mode 100644 index 0000000..2c122b1 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715111932528.png differ diff --git a/docs/zh-CN/src/image/image-20200715112907536.png b/docs/zh-CN/src/image/image-20200715112907536.png new file mode 100644 index 0000000..aaf99de Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715112907536.png differ diff --git a/docs/zh-CN/src/image/image-20200715115437856.png b/docs/zh-CN/src/image/image-20200715115437856.png new file mode 100644 index 0000000..d98bfb2 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715115437856.png differ diff --git a/docs/zh-CN/src/image/image-20200715115617185.png b/docs/zh-CN/src/image/image-20200715115617185.png new file mode 100644 index 0000000..e72b520 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715115617185.png differ diff --git a/docs/zh-CN/src/image/image-20200715115703796.png b/docs/zh-CN/src/image/image-20200715115703796.png new file mode 100644 index 0000000..9f5c3f8 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715115703796.png differ diff --git a/docs/zh-CN/src/image/image-20200715124902323.png b/docs/zh-CN/src/image/image-20200715124902323.png new file mode 100644 index 0000000..7c440ce Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715124902323.png differ diff --git a/docs/zh-CN/src/image/image-20200715125145828.png b/docs/zh-CN/src/image/image-20200715125145828.png new file mode 100644 index 0000000..5aee525 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715125145828.png differ diff --git a/docs/zh-CN/src/image/image-20200715125335276.png b/docs/zh-CN/src/image/image-20200715125335276.png new file mode 100644 index 0000000..195c9cd Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715125335276.png differ diff --git a/docs/zh-CN/src/image/image-20200715125349302.png b/docs/zh-CN/src/image/image-20200715125349302.png new file mode 100644 index 0000000..195c9cd Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715125349302.png differ diff --git a/docs/zh-CN/src/image/image-20200715125658865.png b/docs/zh-CN/src/image/image-20200715125658865.png new file mode 100644 index 0000000..e2dd0f7 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715125658865.png differ diff --git a/docs/zh-CN/src/image/image-20200715130811821.png b/docs/zh-CN/src/image/image-20200715130811821.png new file mode 100644 index 0000000..057980d Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715130811821.png differ diff --git a/docs/zh-CN/src/image/image-20200715132053132.png b/docs/zh-CN/src/image/image-20200715132053132.png new file mode 100644 index 0000000..c10cdcf Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715132053132.png differ diff --git a/docs/zh-CN/src/image/image-20200715134112636.png b/docs/zh-CN/src/image/image-20200715134112636.png new file mode 100644 index 0000000..741451c Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715134112636.png differ diff --git a/docs/zh-CN/src/image/image-20200715134143880.png b/docs/zh-CN/src/image/image-20200715134143880.png new file mode 100644 index 0000000..3df6e66 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715134143880.png differ diff --git a/docs/zh-CN/src/image/image-20200715134337378.png b/docs/zh-CN/src/image/image-20200715134337378.png new file mode 100644 index 0000000..ff90e2d Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715134337378.png differ diff --git a/docs/zh-CN/src/image/image-20200715134556712.png b/docs/zh-CN/src/image/image-20200715134556712.png new file mode 100644 index 0000000..de8c01d Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715134556712.png differ diff --git a/docs/zh-CN/src/image/image-20200715134811989.png b/docs/zh-CN/src/image/image-20200715134811989.png new file mode 100644 index 0000000..6168f27 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715134811989.png differ diff --git a/docs/zh-CN/src/image/image-20200715135803824.png b/docs/zh-CN/src/image/image-20200715135803824.png new file mode 100644 index 0000000..ef66529 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715135803824.png differ diff --git a/docs/zh-CN/src/image/image-20200715140655966.png b/docs/zh-CN/src/image/image-20200715140655966.png new file mode 100644 index 0000000..4b388bb Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715140655966.png differ diff --git a/docs/zh-CN/src/image/image-20200715140812082.png b/docs/zh-CN/src/image/image-20200715140812082.png new file mode 100644 index 0000000..48b414c Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715140812082.png differ diff --git a/docs/zh-CN/src/image/image-20200715140902639.png b/docs/zh-CN/src/image/image-20200715140902639.png new file mode 100644 index 0000000..97a6c67 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715140902639.png differ diff --git a/docs/zh-CN/src/image/image-20200715141018443.png b/docs/zh-CN/src/image/image-20200715141018443.png new file mode 100644 index 0000000..10a6194 Binary files /dev/null and b/docs/zh-CN/src/image/image-20200715141018443.png differ diff --git a/docs/zh-CN/src/install.md b/docs/zh-CN/src/install.md new file mode 100644 index 0000000..80d449e --- /dev/null +++ b/docs/zh-CN/src/install.md @@ -0,0 +1,32 @@ +# 编译与安装 + +`chubaodb`采用rust编写同时可能会依赖一些c库,支持跨平台编译,目前支持操作系统有 windows , linux , macos.可能还有其他不知道的,碰到再说。 + + + +| 共功能\系统 | windows | linux | macos | +| ---- | ---- | ---- | ---- | +| 存储 | 支持 | 支持 | 支持 | +| 全文检索 | 支持 | 支持 | 支持 | +| 向量检索 | `不支持` | 支持 | 支持 | + + +----------------- + +**重点**: 如果你之前没有rust环境,且对rust 没什么兴趣直接下载我们的release 版本。 (TODO: release连接),珍爱生命远离rust编译 + + +**开始编译以在macos 下编译为例** +* 在项目根目录执行 `cargo build --release` , +* 如果运气好没有报错,那么进入 `target/release` 目录获得编译好的二进制文件 `chubaodb` +* 执行 ./chubaodb +* 打开浏览器访问 http://127.0.0.1:8080 为router地址, 这个地址用来对数据进行CRUD +* 打开浏览器访问 http://127.0.0.1:7070 为master地址, 这个地址用来对库表及数据结构进行管理。 +* 之后的章节会详细介绍这些地址的用法,如果一切顺利恭喜你,chubaodb已经在你的电脑中完美运行了。 + + + + + + + diff --git a/docs/zh-CN/src/introduction.md b/docs/zh-CN/src/introduction.md new file mode 100644 index 0000000..053c2c0 --- /dev/null +++ b/docs/zh-CN/src/introduction.md @@ -0,0 +1,16 @@ + +### 介绍 +`chubaodb` 是一个分布式高可用的云原生,同时支持传统的分布式文档搜索及存储系统,支持`全文检索`,`聚合查询`,`向量搜索`,`标量搜索`的功能,采用轻schema策略,尽可能提高了存储文档的灵活度。同时吸取其他类似软件的经验,初心于在有限的计算节点情况下,支持不限容量的存储及计算,同时尽可能低的学习成本,完成尽可能多的需求。 + +### 初心 +我们期望于`chubaodb` 是一个一看就会,开箱即用的软件,让使用者不在对被使用的软件所使用。严格准守,山寨三体的软件设计原则, +* 1.程序必须正确 +* 2.程序必须可维护,如果和第1条冲突,以第1条为准 +* 3.程序必须高效,如和第1条冲突,则以第1条为准,如和第2条冲突,则以第2条为准。 + +### 特点 +* `graphql` 进行schema管理, +* `restful api`进行数据操作, +* `raft`保证数据的最终一致性, +* `rocksdb`, `tantivy`, `faiss` 提供了底层存储及计算的能力。 +* 通过`chubaofs` 提供了无限存储的可能,通过架构可以释放或加载宝贵的cpu和内存资源。 \ No newline at end of file diff --git a/docs/zh-CN/src/master.md b/docs/zh-CN/src/master.md new file mode 100644 index 0000000..683b413 --- /dev/null +++ b/docs/zh-CN/src/master.md @@ -0,0 +1,45 @@ +# 元数据管理 + +元数据管理是在master api上,提供了graphql的方式。同时内置了一个iql。如果你没有对配置做过更改,master 的地址应该为 http://127.0.0.1:7070 + +打开地址你就看到如下 + +![image-20200715111140350](image/image-20200715111140350.png) + + + +熟悉iql的就不解释了,不熟悉的用用就差不多。 + + + +我们简单说一下各个接口的功能,以collectionGet 为例,这个接口是获取一个表的结构, 点击右侧第二个方法。可以看到需要的参数 + +![image-20200715111454748](image/image-20200715111454748.png) + + + +参数为,id ,name 。类型后门没有跟`!`意思就非必须字段。但是必须二选一。意味着你可以通过一个name 或者id 去查询这个collection.然后我们输入query + +```` +{ + collectionGet(name:"t1") +} +```` + + + + + +![image-20200715111749876](image/image-20200715111749876.png) + + + + + +可以看到报错了。没错。就是错了。因为我没还没有创建名字为`t1`的collection。后门会有创建的方式。那我们换一个简单的的吧。我们通过query来查询当前系统的版本。 + +![image-20200715111932528](image/image-20200715111932528.png) + + + +红色部分为返回结果。太过简单就不解释了。 \ No newline at end of file diff --git a/docs/zh-CN/src/plan.md b/docs/zh-CN/src/plan.md new file mode 100644 index 0000000..94322c0 --- /dev/null +++ b/docs/zh-CN/src/plan.md @@ -0,0 +1 @@ +# 计划 diff --git a/docs/zh-CN/src/search.md b/docs/zh-CN/src/search.md new file mode 100644 index 0000000..306265b --- /dev/null +++ b/docs/zh-CN/src/search.md @@ -0,0 +1,109 @@ +# 搜索那些事儿 + +下面我们来介绍一下chubaodb的搜索功能,在这之前,你确信已经通过[库表管理](*./collection.md*)创建了表。 + +我们先插入一些测试数据吧,先创建5个人 + +```` +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "张三", + "age": 20, + "birthday": "2000-02-02", + "description": "zhangsan can use java good at php, but python not good at", + "skills": ["java", "php", "python"] +} +' "http://127.0.0.1:8080/put/person/1" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "李四", + "age": 30, + "birthday": "1990-02-02", + "description": "lisi can use java ,only java", + "skills": ["java"] +} +' "http://127.0.0.1:8080/put/person/2" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "王五", + "age": 20, + "birthday": "2000-03-20", + "description": "wangwu is c++ rust good at!", + "skills": ["c++", "rust"] +} +' "http://127.0.0.1:8080/put/person/3" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "牛六", + "age": 35, + "birthday": "1985-12-02", + "description": "niuliu age too old", + "skills": ["java", "golang", "python", "rust"] +} +' "http://127.0.0.1:8080/put/person/4" + +curl -H "Content-Type: application/json" -XPOST -d' +{ + "name": "赵七", + "age": 18, + "birthday": "2002-03-12", + "description": "is a web user, he can use ruby and php", + "skills": ["php", "ruby"] +} +' "http://127.0.0.1:8080/put/person/5" +```` + + + +插入完成后,我们通过search接口可以查看到这五个人 `http://127.0.0.1:8080/search/person` + + + +![image-20200715134112636](image/image-20200715134112636.png) + + + +http://127.0.0.1:8080/search/person` 参数为空时语义为query:* search 又如下参数 + +* query 查询语句,也就是类似lucene 的dsl 。(TODO: 专门一章介绍dsl) +* def_fields 默认查询字段。当query不指定字段时候以此字段为查询,为or的关系,可以多个字段用逗号`,`隔开 +* size: 返回数据条数,默认为20 +* sort: 排序规则 example:*name:asc|age:desc* , 默认为score排序也就是相关度 + +下面我们把这些query 都用上做一个查询吧! + +`http://127.0.0.1:8080/search/person?query=name:%E5%BC%A0%E4%B8%89&size=3&sort=age:asc` + +![image-20200715134143880](image/image-20200715134143880.png) + +下面让我们自举一些需求。 + +查找摘要中 包含 rust 或者 java 的人 + +* `http://127.0.0.1:8080/search/person?query=description:java%20OR%20description:rust&size=3&sort=age:asc` +* 上述语句等同于 `http://127.0.0.1:8080/search/person?query=java%20OR%20rust&def_fields=description&size=3&sort=age:asc` + + + +![image-20200715134337378](image/image-20200715134337378.png) + + + +查找摘要中 包含 java 的人 按照年龄倒序 + +```` +http://127.0.0.1:8080/search/person?query=java&def_fields=description&size=3&sort=age:desc +```` + +![image-20200715134556712](image/image-20200715134556712.png) + + + +### 精确查找 + +在用户名或者摘要中查找 `web user` 为关键字的用户。 + +![image-20200715134811989](image/image-20200715134811989.png) \ No newline at end of file diff --git a/docs/zh-CN/src/vector.md b/docs/zh-CN/src/vector.md new file mode 100644 index 0000000..3444bbb --- /dev/null +++ b/docs/zh-CN/src/vector.md @@ -0,0 +1 @@ +# 向量那些事儿 diff --git a/src/pserver/simba/aggregation/function.rs b/src/pserver/simba/aggregation/function.rs index 2279318..18bd73c 100644 --- a/src/pserver/simba/aggregation/function.rs +++ b/src/pserver/simba/aggregation/function.rs @@ -253,7 +253,7 @@ impl Hits { } pub fn map(&mut self, value: Vec) -> ASResult { - if self.result.hits.len() > self.result.size as usize { + if self.result.hits.len() >= self.result.size as usize { return Ok(false); } let result = &mut self.result;