From 1e0135af21944e26673d92375b01ab48f8812215 Mon Sep 17 00:00:00 2001 From: EYHN Date: Sat, 19 Oct 2024 21:14:32 +0800 Subject: [PATCH] feat(core): journal index --- .../infra/src/modules/db/entities/table.ts | 2 ++ .../docs-search/entities/docs-indexer.ts | 14 +++++++-- .../modules/docs-search/worker/in-worker.ts | 30 ++++++++++++++----- .../src/modules/docs-search/worker/types.ts | 3 +- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/packages/common/infra/src/modules/db/entities/table.ts b/packages/common/infra/src/modules/db/entities/table.ts index 348493d99cb5f..a5c22bc5b3de0 100644 --- a/packages/common/infra/src/modules/db/entities/table.ts +++ b/packages/common/infra/src/modules/db/entities/table.ts @@ -22,6 +22,8 @@ export class WorkspaceDBTable< .docState$(this.props.storageDocId) .map(docState => docState.loading); + ydocId = this.props.storageDocId; + create = this.table.create.bind(this.table); update = this.table.update.bind(this.table); get = this.table.get.bind(this.table); diff --git a/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts b/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts index 19a99e4825377..678b78e6d27a8 100644 --- a/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts +++ b/packages/frontend/core/src/modules/docs-search/entities/docs-indexer.ts @@ -6,6 +6,7 @@ import type { WorkspaceService, } from '@toeverything/infra'; import { + Document, Entity, IndexedDBIndexStorage, IndexedDBJobQueue, @@ -79,7 +80,8 @@ export class DocsIndexer extends Entity { constructor( private readonly workspaceService: WorkspaceService, - private readonly workspaceLocalState: WorkspaceLocalState + private readonly workspaceLocalState: WorkspaceLocalState, + private readonly workspaceDBService: WorkspaceDBService ) { super(); } @@ -135,7 +137,11 @@ export class DocsIndexer extends Entity { await this.workspaceEngine.doc.storage.loadDocFromLocal( this.workspaceId ); - if (!rootDocBuffer) { + const propertyDBDocBuffer = + await this.workspaceEngine.doc.storage.loadDocFromLocal( + this.workspaceDBService.db.docProperties.ydocId + ); + if (!rootDocBuffer || !propertyDBDocBuffer) { return; } @@ -145,18 +151,20 @@ export class DocsIndexer extends Entity { type: 'all', }, { + fields: ['journal'], pagination: { limit: Number.MAX_SAFE_INTEGER, skip: 0, }, } ) - ).nodes.map(n => n.id); + ).nodes.map(n => Document.from(n.id, n.fields)); workerOutput = await worker.run({ type: 'rootDoc', allIndexedDocs, rootDocBuffer, + propertyDBDocBuffer, reindexAll: isUpgrade, }); } else { diff --git a/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts b/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts index 21d03eccd130d..21bf5e6a4f1d5 100644 --- a/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts +++ b/packages/frontend/core/src/modules/docs-search/worker/in-worker.ts @@ -85,7 +85,7 @@ async function crawlingDocData({ } else { const ydoc = new YDoc(); let docTitle = ''; - let summaryLenNeeded = 1000; + let summaryLenNeeded = 500; let summary = ''; const blockDocuments: Document[] = []; @@ -363,16 +363,24 @@ async function crawlingDocData({ } } -function crawlingRootDocData({ +async function crawlingRootDocData({ allIndexedDocs, rootDocBuffer, reindexAll, + propertyDBDocBuffer, }: WorkerInput & { type: 'rootDoc'; -}): WorkerOutput { - const ydoc = new YDoc(); +}): Promise { + const rootDocBufferHash = toHexString(await digest(rootDocBuffer)); - applyUpdate(ydoc, rootDocBuffer); + let ydoc; + if (cachedRootDoc && cachedRootDoc.hash === rootDocBufferHash) { + ydoc = cachedRootDoc.doc; + } else { + ydoc = new YDoc(); + applyUpdate(ydoc, rootDocBuffer); + cachedRootDoc = { doc: ydoc, hash: rootDocBufferHash }; + } const docs = ydoc.getMap('meta').get('pages') as | YArray> @@ -398,10 +406,16 @@ function crawlingRootDocData({ } } - const needDelete = difference(allIndexedDocs, availableDocs); + const needDelete = difference( + allIndexedDocs.map(doc => doc.id), + availableDocs + ); const needAdd = reindexAll ? availableDocs - : difference(availableDocs, allIndexedDocs); + : difference( + availableDocs, + allIndexedDocs.map(doc => doc.id) + ); return { reindexDoc: [...needAdd, ...needDelete].map(docId => ({ @@ -422,7 +436,7 @@ globalThis.onmessage = async (event: MessageEvent) => { try { let data; if (input.type === 'rootDoc') { - data = crawlingRootDocData(input); + data = await crawlingRootDocData(input); } else { data = await crawlingDocData(input); } diff --git a/packages/frontend/core/src/modules/docs-search/worker/types.ts b/packages/frontend/core/src/modules/docs-search/worker/types.ts index b3d33a8ec7347..cb8388d88c488 100644 --- a/packages/frontend/core/src/modules/docs-search/worker/types.ts +++ b/packages/frontend/core/src/modules/docs-search/worker/types.ts @@ -30,7 +30,8 @@ export type WorkerInput = | { type: 'rootDoc'; rootDocBuffer: Uint8Array; - allIndexedDocs: string[]; + propertyDBDocBuffer: Uint8Array; + allIndexedDocs: Document[]; reindexAll?: boolean; } | {