Skip to content

Commit

Permalink
Improve init indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
bkis committed Jul 23, 2024
1 parent 819b986 commit b4f5c22
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Tekst-API/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
# TEKST_ES__PREFIX=tekst
# default: tekst

# TEKST_ES__INIT_TIMEOUT_S=120
# TEKST_ES__INIT_TIMEOUT_S=240
# default: 120

# TEKST_ES__MAX_FIELD_MAPPINGS=1000
Expand Down
2 changes: 1 addition & 1 deletion Tekst-API/tekst/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class ElasticsearchConfig(ConfigSubSection):
host: str = "127.0.0.1"
port: int = 9200
prefix: str = "tekst"
init_timeout_s: int = 120
init_timeout_s: int = 240
max_field_mappings: int = 1000

@field_validator("host", mode="before")
Expand Down
2 changes: 1 addition & 1 deletion Tekst-API/tekst/resources/text_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ async def contents_changed_hook(
cls,
resource_id: PydanticObjectId,
) -> None:
op_id = log_op_start(f"Update aggregations for resource {resource_id}")
op_id = log_op_start(f"Generate aggregations for resource {resource_id}")
try:
await cls._update_aggregations(resource_id)
except Exception as e:
Expand Down
34 changes: 21 additions & 13 deletions Tekst-API/tekst/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,37 @@
_es_client: Elasticsearch | None = None


async def init_es_client(
es_uri: str = _cfg.es.uri,
) -> Elasticsearch:
async def _wait_for_es() -> bool:
global _es_client
if _es_client is None:
log.info("Initializing Elasticsearch client...")
_es_client = Elasticsearch(es_uri)
if _es_client is not None:
for i in range(_cfg.es.init_timeout_s):
if _es_client.ping():
break
return True
if i % 10 == 0:
log.debug(
f"Waiting for Elasticsearch service at {es_uri} "
f"Waiting for Elasticsearch service at {_cfg.es.uri} "
f"({i}/{_cfg.es.init_timeout_s} seconds)..."
)
await asyncio.sleep(1)
await asyncio.sleep(1)
else:
log.critical(f"Could not connect to Elasticsearch at {es_uri}!")
raise RuntimeError("Timed out waiting for Elasticsearch service!")
log.critical(f"Could not connect to Elasticsearch at {_cfg.es.uri}!")
return False
else:
await init_es_client()


async def init_es_client() -> Elasticsearch:
global _es_client
if _es_client is None:
log.info("Initializing Elasticsearch client...")
_es_client = Elasticsearch(_cfg.es.uri)
if not await _wait_for_es():
raise RuntimeError("Waiting for Elasticsearch client exceeded timeout!")
return _es_client


async def _get_es_client(es_uri: str = _cfg.es.uri) -> Elasticsearch:
return await init_es_client(es_uri)
async def _get_es_client() -> Elasticsearch:
return await init_es_client()


def get_es_status() -> dict[str, Any] | None:
Expand Down Expand Up @@ -107,6 +114,7 @@ async def create_indices_task(
overwrite_existing_indices: bool = True,
) -> dict[str, float]:
op_id = log_op_start("Create search indices", level="INFO")
await _wait_for_es()
await _setup_index_templates()

# get existing search indices
Expand Down
2 changes: 1 addition & 1 deletion docs/content/setup/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Configuration for the connection to the Elasticsearch server
| `TEKST_ES__HOST` | Elasticsearch host (String – default: `127.0.0.1`) |
| `TEKST_ES__PORT` | Elasticsearch port (Integer – default: `9200`) |
| `TEKST_ES__PREFIX` | Elasticsearch prefix (for index, templates, etc.) (String – default: `tekst`) |
| `TEKST_ES__INIT_TIMEOUT_S` | Timeout for waiting for Elasticsearch service to be available on startup (Integer – default: `120`) |
| `TEKST_ES__INIT_TIMEOUT_S` | Timeout for waiting for Elasticsearch service to be available on startup (Integer – default: `240`) |
| `TEKST_ES__MAX_FIELD_MAPPINGS` | Max. number of field mappings per search index – given there is enough memory, this can be increased in case there are e.g. annotation resources with many distinct annotation keys (these are dynamically mapped fields). The admin maintenance UI shows a warning if an index is about to hit this value. Any field mapping surpassing this value will be ignored and won't be searchable. (Integer – default: `1000`) |


Expand Down

0 comments on commit b4f5c22

Please sign in to comment.