From fd24fe8e61822781980de2fc8fa40952c55e8e86 Mon Sep 17 00:00:00 2001 From: Junxiang Huang Date: Mon, 7 Oct 2024 16:45:56 +0800 Subject: [PATCH 1/2] update --- manual/README.md | 20 +++++++- manual/config/README.md | 96 +++++++++++++++++++++------------------ manual/deploy/README.md | 9 ++-- manual/repo/seasearch.yml | 32 +++++++++++++ 4 files changed, 106 insertions(+), 51 deletions(-) diff --git a/manual/README.md b/manual/README.md index a385220..4d3da97 100644 --- a/manual/README.md +++ b/manual/README.md @@ -1,3 +1,21 @@ # Introduction -ZincSearch is a full-text search server implemented in Go language, providing an API compatible with ElasticSearch DSL. It uses Bluge as the indexing engine. Bluge is a fork version of Bleve (developed by CouchBase), a widely used Go language full-text indexing library, which refactors the code to make it more modern and flexible. +**SeaSearch** is a lightweight search engine will replace ElasticSearch as the default search engine, built on open source search engine ([ZincSearch](https://zincsearch-docs.zinc.dev/)) implemented in Go language. + +Why use **SeaSearch**: + +- **Problems of ElasticSearch**: + - Not designed for large number of indexes (like one index per library) + - Need to search entire storage when searching inside a library + - Need to filter out results that the user has permissions to acces + - Can become slow when you have ~billion of files to search + - Heavyweight Java program + - Upgrade often requires rebuilding index + +- **How about SeaSearch**: + - Lightweight and can support one index per library + - API compatible with ElasticSearch + - Architecture Highlights + - Cloud-native: can use S3 as storage (for single-node or cluster) + - Shared storage: in a cluster, nodes use S3 as shared storage and store index metadata in etcd + - Failover: node switching is easy thanks to shared storage architecture. ES replicates data between the nodes so consistency is more complex. \ No newline at end of file diff --git a/manual/config/README.md b/manual/config/README.md index f824b22..b613972 100644 --- a/manual/config/README.md +++ b/manual/config/README.md @@ -1,58 +1,58 @@ # SeaSearch Configuration -## Original Configurations +## Single-Node Configurations -The original configurations of environment variables can be referenced:[https://zincsearch-docs.zinc.dev/environment-variables/](https://zincsearch-docs.zinc.dev/environment-variables/) +### Basic Configurations -The following configuration instructions are for our extended configuration items. All configurations are set in the form of environment variables. +```shell +# log mode of gin framework,default release +ZINC_WAL_ENABLE=release -## Extended Configurations in SeaSearch +# type of storage's engine, i.e., s3 +ZINC_STORAGE_TYPE= -### Single-Node Configurations +# the number of shards, since seaseach has one index per database, in order to improve loading efficiency, the default value is changed to 1 +ZINC_SHARD_NUM=1 +``` + +### S3 Storage Configurations + +To enable s3 storage configurations, the term `ZINC_STORAGE_TYPE` has to be set as `ZINC_STORAGE_TYPE=s3`. + +```shell +# the maximum local cache file size +ZINC_MAX_OBJ_CACHE_SIZE= +# S3 relative informations +ZINC_S3_ACCESS_ID= +ZINC_S3_USE_V4_SIGNATURE= +ZINC_S3_ACCESS_SECRET= +ZINC_S3_ENDPOINT= +ZINC_S3_USE_HTTPS= +ZINC_S3_PATH_STYLE_REQUEST= +ZINC_S3_AWS_REGION= ``` -GIN_MODE, log mode of gin framework,default release -ZINC_WAL_ENABLE, whether to enable WAL,defaule enabled -ZINC_STORAGE_TYPE -ZINC_MAX_OBJ_CACHE_SIZE, when s3 and oss are enabled, the maximum local cache file size -ZINC_SHARD_LOAD_OBJS_GOROUTINE_NUM, index loading parallelism, when S3 and oss are enabled, can improve the index loading speed - -ZINC_SHARD_NUM zincsearch the original default value is 3. Since seaseach has one index per database, in order to improve loading efficiency, the default value is changed to 1 - -S3 related, only valid when ZINC_STORAGE_TYPE=s3 -ZINC_S3_ACCESS_ID -ZINC_S3_USE_V4_SIGNATURE -ZINC_S3_ACCESS_SECRET -ZINC_S3_ENDPOINT -ZINC_S3_USE_HTTPS -ZINC_S3_PATH_STYLE_REQUEST -ZINC_S3_AWS_REGION - -OSS related, only valid when ZINC_STORAGE_TYPE=oss -ZINC_OSS_ACCESS_ID -ZINC_OSS_ACCESS_SECRET -ZINC_OSS_BUCKET -ZINC_OSS_ENDPOINT - -cluster related -ZINC_SERVER_MODE, default none for standalone deployment, optional to cluster, must be cluster for cluster deployment -ZINC_CLUSTER_ID, cluster id,need to be globally unique -ZINC_ETCD_ENDPOINTS, etcd address -ZINC_ETCD_ENDPOINTS, etcd key prefix, default /zinc -ZINC_ETCD_USERNAME, etcd username -ZINC_ETCD_PASSWORD, etcd password - -log related -ZINC_LOG_OUTPUT, whether to output logs to files, default yes -ZINC_LOG_DIR, log directory, recommended configuration, default is the log subdirectory under the current directory -ZINC_LOG_LEVEL, log level,default debug +## Cluster Configurations + +### Basic Configurations + +```shell +# default none for standalone deployment, optional to cluster, must be cluster for cluster deployment +ZINC_SERVER_MODE= + +# cluster id,need to be globally unique +ZINC_CLUSTER_ID= + +ZINC_ETCD_ENDPOINTS= +ZINC_ETCD_USERNAME= +ZINC_ETCD_PASSWORD= ``` ### Proxy Configurations -``` -ZINC_CLUSTER_PROXY_LOG_DIR=./log +```shell +ZINC_CLUSTER_PROXY_LOG_DIR=/opt/seasearch/data/log ZINC_CLUSTER_PROXY_HOST=0.0.0.0 ZINC_CLUSTER_PROXY_PORT=4082 ZINC_SERVER_MODE=proxy # must be proxy @@ -64,10 +64,18 @@ ZINC_CLUSTER_MANAGER_ADDR=127.0.0.1:4081 # manager address ### Cluster-manger Configurations -``` -ZINC_CLUSTER_MANAGER_LOG_DIR=./log +```shell +ZINC_CLUSTER_MANAGER_LOG_DIR=/opt/seasearch/data/log ZINC_CLUSTER_MANAGER_HOST=0.0.0.0 ZINC_CLUSTER_MANAGER_PORT=4081 ZINC_CLUSTER_MANAGER_ETCD_ENDPOINTS=127.0.0.1:2379 ZINC_CLUSTER_MANAGER_ETCD_PREFIX=/zinc ``` + +## Logs Configurations + +```shell +ZINC_LOG_OUTPUT=yes #whether to output logs to files, default yes +ZINC_LOG_DIR=/opt/seasearch/data/log #log directory +ZINC_LOG_LEVEL=debug #log level,default debug +``` diff --git a/manual/deploy/README.md b/manual/deploy/README.md index 5e99cac..d0005cc 100644 --- a/manual/deploy/README.md +++ b/manual/deploy/README.md @@ -10,13 +10,12 @@ wget https://haiwen.github.io/seasearch-docs/repo/seasearch.yml ## Modify .env file -First, you need to specify the environment variables used by the SeaSearch image in the relevant `.env` file. Some environment variables can be found in [here](../config/README.md). Please add and modify the values (i.e., `<...>`) ​​of the following fields in the `.env` file. +First, you need to specify the environment variables used by the SeaSearch image in the relevant `.env` file. Some environment variables can be found in [here](../config/README.md). Please add and modify the environment variables (i.e., `<...>`) ​​of the following fields in the `.env` file. + -```shell -# If seasearch uses a separate configuration file such as seasearch.yml, you need to write it into COMPOSE_FILE -COMPOSE_FILE='docker-compose.yml,seasearch.yml' +```shell # other environment variables in .env file # For Apple's chip (M2, e.g.), you should use the images with -nomkl tags (i.e., seafileltd/seasearch-nomkl:latest) SEASEARCH_IMAGE=seafileltd/seasearch:latest @@ -26,8 +25,6 @@ ZINC_FIRST_ADMIN_USER= ZINC_FIRST_ADMIN_PASSWORD= ``` -Note: if new environment variables are added in .env, they also need to **be set synchronously** in the `seasearch.yml` - ## Restart the Service ```shell diff --git a/manual/repo/seasearch.yml b/manual/repo/seasearch.yml index c8cc75a..33ec361 100644 --- a/manual/repo/seasearch.yml +++ b/manual/repo/seasearch.yml @@ -9,6 +9,38 @@ services: environment: - ZINC_FIRST_ADMIN_USER=${ZINC_FIRST_ADMIN_USER} - ZINC_FIRST_ADMIN_PASSWORD=${ZINC_FIRST_ADMIN_PASSWORD} + - ZINC_WAL_ENABLE=${ZINC_WAL_ENABLE:-release} + - ZINC_STORAGE_TYPE=${ZINC_STORAGE_TYPE} + - ZINC_SHARD_NUM=${ZINC_SHARD_NUM} + - ZINC_MAX_OBJ_CACHE_SIZE=${ZINC_MAX_OBJ_CACHE_SIZE} + - ZINC_S3_ACCESS_ID=${ZINC_S3_ACCESS_ID} + - ZINC_S3_USE_V4_SIGNATURE=${ZINC_S3_USE_V4_SIGNATURE} + - ZINC_S3_ACCESS_SECRET=${ZINC_S3_ACCESS_SECRET} + - ZINC_S3_ENDPOINT=${ZINC_S3_ENDPOINT} + - ZINC_S3_USE_HTTPS=${ZINC_S3_USE_HTTPS} + - ZINC_S3_PATH_STYLE_REQUEST=${ZINC_S3_PATH_STYLE_REQUEST} + - ZINC_S3_AWS_REGION=${ZINC_S3_AWS_REGION} + - ZINC_SERVER_MODE=${ZINC_SERVER_MODE} + - ZINC_CLUSTER_ID=${ZINC_CLUSTER_ID} + - ZINC_ETCD_ENDPOINTS=${ZINC_ETCD_ENDPOINTS} + - ZINC_ETCD_USERNAME=${ZINC_ETCD_USERNAME} + - ZINC_ETCD_PASSWORD=${ZINC_ETCD_PASSWORD} + - ZINC_CLUSTER_PROXY_LOG_DIR=${ZINC_CLUSTER_PROXY_LOG_DIR:-/opt/seasearch/data/log} + - ZINC_CLUSTER_PROXY_HOST=${ZINC_CLUSTER_PROXY_HOST:-0.0.0.0} + - ZINC_CLUSTER_PROXY_PORT=${ZINC_CLUSTER_PROXY_PORT:-4082} + - ZINC_SERVER_MODE=${ZINC_SERVER_MODE:-proxy} + - ZINC_ETCD_ENDPOINTS=${ZINC_ETCD_ENDPOINTS:-127.0.0.1:2379} + - ZINC_ETCD_PREFIX=${ZINC_ETCD_PREFIX:-/zinc} + - ZINC_MAX_DOCUMENT_SIZE=${ZINC_MAX_DOCUMENT_SIZE:-1m} + - ZINC_CLUSTER_MANAGER_ADDR=${ZINC_CLUSTER_MANAGER_ADDR:-127.0.0.1:4081} + - ZINC_CLUSTER_MANAGER_LOG_DIR=${ZINC_CLUSTER_MANAGER_LOG_DIR:-/opt/seasearch/data/log} + - ZINC_CLUSTER_MANAGER_HOST=${ZINC_CLUSTER_MANAGER_HOST:-0.0.0.0} + - ZINC_CLUSTER_MANAGER_PORT=${ZINC_CLUSTER_MANAGER_PORT:-4081} + - ZINC_CLUSTER_MANAGER_ETCD_ENDPOINTS=${ZINC_CLUSTER_MANAGER_ETCD_ENDPOINTS:-127.0.0.1:2379} + - ZINC_CLUSTER_MANAGER_ETCD_PREFIX=${ZINC_CLUSTER_MANAGER_ETCD_PREFIX:-/zinc} + - ZINC_LOG_OUTPUT=${ZINC_LOG_OUTPUT:-yes} + - ZINC_LOG_DIR=${ZINC_LOG_DIR:-/opt/seasearch/data/log} + - ZINC_LOG_LEVEL=${ZINC_LOG_LEVEL:-debug} networks: - frontend-net - backend-scheduler-net From a22cf561813a3f4e90c7e0595e092fc06211d3b3 Mon Sep 17 00:00:00 2001 From: Junxiang Huang Date: Tue, 8 Oct 2024 10:12:44 +0800 Subject: [PATCH 2/2] update --- manual/config/README.md | 13 ++++++------- manual/repo/seasearch.yml | 7 +++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/manual/config/README.md b/manual/config/README.md index b613972..756643f 100644 --- a/manual/config/README.md +++ b/manual/config/README.md @@ -6,7 +6,7 @@ ```shell # log mode of gin framework,default release -ZINC_WAL_ENABLE=release +ZINC_WAL_ENABLE=true # type of storage's engine, i.e., s3 ZINC_STORAGE_TYPE= @@ -50,14 +50,13 @@ ZINC_ETCD_PASSWORD= ``` ### Proxy Configurations +If the current node is a proxy node, the term `ZINC_SERVER_MODE` has to be set as **proxy** and the `ZINC_ETCD_ENDPOINTS` has to be pointed (i.e., =127.0.0.1:2379). ```shell ZINC_CLUSTER_PROXY_LOG_DIR=/opt/seasearch/data/log ZINC_CLUSTER_PROXY_HOST=0.0.0.0 ZINC_CLUSTER_PROXY_PORT=4082 -ZINC_SERVER_MODE=proxy # must be proxy -ZINC_ETCD_ENDPOINTS=127.0.0.1:2379 -ZINC_ETCD_PREFIX=/zinc +ZINC_ETCD_PREFIX= ZINC_MAX_DOCUMENT_SIZE=1m # Bulk and multisearch limit on the maximum single document,default 1m ZINC_CLUSTER_MANAGER_ADDR=127.0.0.1:4081 # manager address ``` @@ -68,14 +67,14 @@ ZINC_CLUSTER_MANAGER_ADDR=127.0.0.1:4081 # manager address ZINC_CLUSTER_MANAGER_LOG_DIR=/opt/seasearch/data/log ZINC_CLUSTER_MANAGER_HOST=0.0.0.0 ZINC_CLUSTER_MANAGER_PORT=4081 -ZINC_CLUSTER_MANAGER_ETCD_ENDPOINTS=127.0.0.1:2379 -ZINC_CLUSTER_MANAGER_ETCD_PREFIX=/zinc +ZINC_CLUSTER_MANAGER_ETCD_ENDPOINTS= +ZINC_CLUSTER_MANAGER_ETCD_PREFIX= ``` ## Logs Configurations ```shell -ZINC_LOG_OUTPUT=yes #whether to output logs to files, default yes +ZINC_LOG_OUTPUT=true #whether to output logs to files, default yes ZINC_LOG_DIR=/opt/seasearch/data/log #log directory ZINC_LOG_LEVEL=debug #log level,default debug ``` diff --git a/manual/repo/seasearch.yml b/manual/repo/seasearch.yml index 33ec361..0322302 100644 --- a/manual/repo/seasearch.yml +++ b/manual/repo/seasearch.yml @@ -9,7 +9,8 @@ services: environment: - ZINC_FIRST_ADMIN_USER=${ZINC_FIRST_ADMIN_USER} - ZINC_FIRST_ADMIN_PASSWORD=${ZINC_FIRST_ADMIN_PASSWORD} - - ZINC_WAL_ENABLE=${ZINC_WAL_ENABLE:-release} + - GIN_MODE=${GIN_MODE:-release} + - ZINC_WAL_ENABLE=${ZINC_WAL_ENABLE:-true} - ZINC_STORAGE_TYPE=${ZINC_STORAGE_TYPE} - ZINC_SHARD_NUM=${ZINC_SHARD_NUM} - ZINC_MAX_OBJ_CACHE_SIZE=${ZINC_MAX_OBJ_CACHE_SIZE} @@ -22,13 +23,11 @@ services: - ZINC_S3_AWS_REGION=${ZINC_S3_AWS_REGION} - ZINC_SERVER_MODE=${ZINC_SERVER_MODE} - ZINC_CLUSTER_ID=${ZINC_CLUSTER_ID} - - ZINC_ETCD_ENDPOINTS=${ZINC_ETCD_ENDPOINTS} - ZINC_ETCD_USERNAME=${ZINC_ETCD_USERNAME} - ZINC_ETCD_PASSWORD=${ZINC_ETCD_PASSWORD} - ZINC_CLUSTER_PROXY_LOG_DIR=${ZINC_CLUSTER_PROXY_LOG_DIR:-/opt/seasearch/data/log} - ZINC_CLUSTER_PROXY_HOST=${ZINC_CLUSTER_PROXY_HOST:-0.0.0.0} - ZINC_CLUSTER_PROXY_PORT=${ZINC_CLUSTER_PROXY_PORT:-4082} - - ZINC_SERVER_MODE=${ZINC_SERVER_MODE:-proxy} - ZINC_ETCD_ENDPOINTS=${ZINC_ETCD_ENDPOINTS:-127.0.0.1:2379} - ZINC_ETCD_PREFIX=${ZINC_ETCD_PREFIX:-/zinc} - ZINC_MAX_DOCUMENT_SIZE=${ZINC_MAX_DOCUMENT_SIZE:-1m} @@ -38,7 +37,7 @@ services: - ZINC_CLUSTER_MANAGER_PORT=${ZINC_CLUSTER_MANAGER_PORT:-4081} - ZINC_CLUSTER_MANAGER_ETCD_ENDPOINTS=${ZINC_CLUSTER_MANAGER_ETCD_ENDPOINTS:-127.0.0.1:2379} - ZINC_CLUSTER_MANAGER_ETCD_PREFIX=${ZINC_CLUSTER_MANAGER_ETCD_PREFIX:-/zinc} - - ZINC_LOG_OUTPUT=${ZINC_LOG_OUTPUT:-yes} + - ZINC_LOG_OUTPUT=${ZINC_LOG_OUTPUT:-true} - ZINC_LOG_DIR=${ZINC_LOG_DIR:-/opt/seasearch/data/log} - ZINC_LOG_LEVEL=${ZINC_LOG_LEVEL:-debug} networks: