Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.

Commit

Permalink
refactor: use database
Browse files Browse the repository at this point in the history
  • Loading branch information
ZTL-UwU committed Dec 23, 2023
1 parent 09f5f0b commit a0986da
Show file tree
Hide file tree
Showing 27 changed files with 683 additions and 306 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
data.json

# Generated by Cargo
# will have compiled files and executables
/target/
Expand All @@ -15,3 +13,6 @@ Cargo.lock
# Added by cargo

/target

# Database
db.sqlite
9 changes: 4 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
name = "plagiarism-detector-rust"
version = "1.0.0"
edition = "2021"
publish = false

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[workspace]
members = [".", "api", "service", "entity", "migration"]

[dependencies]
jieba-rs = "0.6.7"
rocket = { version = "0.5.0-rc.3", features = ["json"] }
serde = "1.0.157"
serde_json = "1.0.94"
plagiarism-detector-rust-api = { path = "api" }
40 changes: 25 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# Rust 简易论文查重

![](https://img.shields.io/github/languages/code-size/SMS-COSMO/Plagiarism-Detector-Rust?color=yellow&style=flat-square)

## 部署

修改配置见 `Rocket.toml`

1. 直接下载
Expand All @@ -20,40 +19,46 @@
cargo run --release
```

端口: `9999`

**测试:**

```sh
cargo run
```

端口: `8000`

## API

- `POST /check`
- `POST /add`
### 请求:

`check` 为检查论文相似度
- `POST /`

`add``check` 之后将论文加入数据库 **!!暂不支持修改!!**
| 名称 | 类型 | 内容 |
| :------ | :----- | :--------------------------------------------------------------------------------- |
| `id` | String | 论文 id,不允许重复 |
| `text` | String | 文本内容 |
| `write` | bool | `false`:检查论文相似度 <br> `true`:检查之后将论文加入数据库 **!!暂不支持修改!!** |

**请求:**

```json
{
"id": "1",
"text": "基于 jieba 分词,tf-idf 算法求相似度 !(*@1=-+!9)"
"text": "基于 jieba 分词,tf-idf 算法求相似度 !(*@1=-+!9)",
"write": true
}
```

其中 id 应不与已有论文重复。
### 响应:

| 名称 | 类型 | 内容 |
| :----------- | :----- | :--------------------------------------------------------------------- |
| `code` | i32 | 响应代码 |
| `msg` | String | 返回信息 |
| `similarity` | [] | 表示前 5 个相似度最高的论文,每项第一个值表示相似度,第二个值为论文 id |

**响应:**

```json
{
"code": 200,
"msg": "加入成功",
"similarity": [
[
0.97821377847197237,
Expand All @@ -79,4 +84,9 @@ cargo run
}
```

表示前 5 个相似度最高的论文,每项第一个值表示相似度,第二个值为论文 id。
## 技术栈

- [Rocket](https://rocket.rs/) - A web framework for Rust that makes it simple to write fast, type-safe, secure web applications with incredible usability, productivity and performance
- [SeaORM](https://www.sea-ql.org/SeaORM/) - 🐚 SeaORM is a relational ORM to help you build web services in Rust
- [jieba-rs](https://github.com/messense/jieba-rs) - The Jieba Chinese Word Segmentation Implemented in Rust

13 changes: 3 additions & 10 deletions Rocket.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
[default]
address = "0.0.0.0"
# no file lock, worker has to be 1
workers = 1

[debug]
port = 8000
limits = { json = "50 MiB" }

[release]
port = 9999
ip_header = false
limits = { json = "10 MiB" }
[default.databases.sea_orm]
# sqlite
url = "sqlite://db.sqlite?mode=rwc"
23 changes: 23 additions & 0 deletions api/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "plagiarism-detector-rust-api"
version = "0.1.0"
edition = "2021"
publish = false

[dependencies]
async-stream = { version = "0.3" }
async-trait = { version = "0.1" }
plagiarism-detector-rust-service = { path = "../service" }
futures = { version = "0.3" }
futures-util = { version = "0.3" }
rocket = { version = "0.5.0-rc.4", features = ["json"] }
rocket_dyn_templates = { version = "0.1.0-rc.1", features = ["tera"] }
serde_json = { version = "1" }
entity = { path = "../entity" }
migration = { path = "../migration" }
tokio = "1.29.0"
jieba-rs = "0.6"

[dependencies.sea-orm-rocket]
# path = "../../../sea-orm-rocket/lib" # remove this line in your own project and uncomment the following line
version = "0.5.3"
9 changes: 9 additions & 0 deletions api/src/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/// Get stopword list from stopwords.txt
pub fn stop_words() -> std::collections::HashSet<String> {
let mut words = std::collections::HashSet::new();
for item in String::from_utf8_lossy(include_bytes!("../../stopwords.txt")).split_whitespace() {
words.insert(item.to_string());
}

words
}
72 changes: 72 additions & 0 deletions api/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
extern crate rocket;

use rocket::fairing::{self, AdHoc};
use rocket::serde::json::Json;
use rocket::{Build, Rocket};

use migration::MigratorTrait;
use sea_orm_rocket::{Connection, Database};

mod pool;
use ::entity::req::*;
use pool::Db;

mod data;
mod process;

pub use entity::paper;
pub use entity::paper::Entity as Post;

#[rocket::post("/", format = "json", data = "<data>")]
async fn check(
data: Json<ReqData>,
state: &rocket::State<SharedData>,
conn: Connection<'_, Db>,
) -> Json<ResData> {
let req: ReqData = data.into_inner();
let db = conn.into_inner();

match process::similarity(&req, req.write, db, &state.jieba, &state.stop_words).await {
Ok(r) => Json(ResData {
code: 200,
msg: format!("{}成功", if req.write { "加入" } else { "查询" }),
similarity: r,
}),
Err(e) => Json(ResData {
code: 500,
msg: e.to_string(),
similarity: [].to_vec(),
}),
}
}

async fn run_migrations(rocket: Rocket<Build>) -> fairing::Result {
let conn = &Db::fetch(&rocket).unwrap().conn;
let _ = migration::Migrator::up(conn, None).await;
Ok(rocket)
}

#[tokio::main]
async fn start() -> Result<(), rocket::Error> {
rocket::build()
.attach(Db::init())
.attach(AdHoc::try_on_ignite("Migrations", run_migrations))
.mount("/", rocket::routes![check])
.manage(SharedData {
jieba: jieba_rs::Jieba::new(),
stop_words: data::stop_words(),
})
.launch()
.await
.map(|_| ())
}

pub fn main() {
let result = start();

println!("Rocket: deorbit.");

if let Some(err) = result.err() {
println!("Error: {err}");
}
}
42 changes: 42 additions & 0 deletions api/src/pool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use plagiarism_detector_rust_service::sea_orm;

use async_trait::async_trait;
use sea_orm::ConnectOptions;
use sea_orm_rocket::{rocket::figment::Figment, Config, Database};
use std::time::Duration;

#[derive(Database, Debug)]
#[database("sea_orm")]
pub struct Db(SeaOrmPool);

#[derive(Debug, Clone)]
pub struct SeaOrmPool {
pub conn: sea_orm::DatabaseConnection,
}

#[async_trait]
impl sea_orm_rocket::Pool for SeaOrmPool {
type Error = sea_orm::DbErr;

type Connection = sea_orm::DatabaseConnection;

async fn init(figment: &Figment) -> Result<Self, Self::Error> {
let config = figment.extract::<Config>().unwrap();
let mut options: ConnectOptions = config.url.into();
options
.max_connections(config.max_connections as u32)
.min_connections(config.min_connections.unwrap_or_default())
.connect_timeout(Duration::from_secs(config.connect_timeout))
.sqlx_logging(false);
if let Some(idle_timeout) = config.idle_timeout {
options.idle_timeout(Duration::from_secs(idle_timeout));
}
let conn = sea_orm::Database::connect(options).await?;

Ok(SeaOrmPool { conn })
}

fn borrow(&self) -> &Self::Connection {
&self.conn
}
}
Loading

0 comments on commit a0986da

Please sign in to comment.