Skip to content

Commit

Permalink
feat: UI for advanced reasoning models (#1605)
Browse files Browse the repository at this point in the history
* feat: add a reasoning dropdown for CoT models

* feat: add status updates

* fix: various cleanups
- pass content & status to result dropdown
- dont store streaming updates in db
- make status generation non blocking

* fix: make sure not to push reasoning token stream to db

* feat: add time indicator and make the ui match websearch

* fix: change in status update & prompt
  • Loading branch information
nsarrazin authored Dec 2, 2024
1 parent 54306a5 commit f6f410e
Show file tree
Hide file tree
Showing 16 changed files with 357 additions and 51 deletions.
3 changes: 3 additions & 0 deletions chart/env/prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ envVars:
"websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
"description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
"reasoning": {
"type": "summarize"
},
"parameters": {
"stop": ["<|im_end|>"],
"truncate": 12288,
Expand Down
3 changes: 1 addition & 2 deletions src/lib/components/OpenWebSearchResults.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import EosIconsLoading from "~icons/eos-icons/loading";
import IconInternet from "./icons/IconInternet.svelte";
export let classNames = "";
export let webSearchMessages: MessageWebSearchUpdate[] = [];
$: sources = webSearchMessages.find(isMessageWebSearchSourcesUpdate)?.sources;
Expand All @@ -23,7 +22,7 @@
</script>

<details
class="flex w-fit rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900 {classNames} max-w-full"
class="flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
>
<summary class="grid min-w-72 select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2">
<div
Expand Down
35 changes: 29 additions & 6 deletions src/lib/components/chat/ChatMessage.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
type MessageWebSearchSourcesUpdate,
type MessageWebSearchUpdate,
type MessageFinalAnswerUpdate,
type MessageReasoningUpdate,
MessageReasoningUpdateType,
} from "$lib/types/MessageUpdate";
import { base } from "$app/paths";
import { useConvTreeStore } from "$lib/stores/convTree";
Expand All @@ -33,6 +35,7 @@
import { enhance } from "$app/forms";
import { browser } from "$app/environment";
import MarkdownRenderer from "./MarkdownRenderer.svelte";
import OpenReasoningResults from "./OpenReasoningResults.svelte";
export let model: Model;
export let id: Message["id"];
Expand Down Expand Up @@ -90,9 +93,13 @@
}
}
$: searchUpdates = (message.updates?.filter(({ type }) => type === "webSearch") ??
$: searchUpdates = (message.updates?.filter(({ type }) => type === MessageUpdateType.WebSearch) ??
[]) as MessageWebSearchUpdate[];
$: reasoningUpdates = (message.updates?.filter(
({ type }) => type === MessageUpdateType.Reasoning
) ?? []) as MessageReasoningUpdate[];
$: messageFinalAnswer = message.updates?.find(
({ type }) => type === MessageUpdateType.FinalAnswer
) as MessageFinalAnswerUpdate;
Expand Down Expand Up @@ -208,9 +215,17 @@
</div>
{/if}
{#if searchUpdates && searchUpdates.length > 0}
<OpenWebSearchResults
classNames={message.content.length ? "mb-3.5" : ""}
webSearchMessages={searchUpdates}
<OpenWebSearchResults webSearchMessages={searchUpdates} />
{/if}
{#if reasoningUpdates && reasoningUpdates.length > 0}
{@const summaries = reasoningUpdates
.filter((u) => u.subtype === MessageReasoningUpdateType.Status)
.map((u) => u.status)}

<OpenReasoningResults
summary={summaries[summaries.length - 1] || ""}
content={message.reasoning || ""}
loading={loading && message.content.length === 0}
/>
{/if}

Expand All @@ -224,11 +239,19 @@
{/each}
{/if}

<div bind:this={contentEl}>
<div
bind:this={contentEl}
class:mt-2={reasoningUpdates.length > 0 || searchUpdates.length > 0}
>
{#if isLast && loading && $settings.disableStream}
<IconLoading classNames="loading inline ml-2 first:ml-0" />
{/if}
<MarkdownRenderer content={message.content} sources={webSearchSources} />

<div
class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
>
<MarkdownRenderer content={message.content} sources={webSearchSources} />
</div>
</div>

<!-- Web Search sources -->
Expand Down
26 changes: 11 additions & 15 deletions src/lib/components/chat/MarkdownRenderer.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -106,21 +106,17 @@
});
</script>

<div
class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
>
{#each tokens as token}
{#if token.type === "code"}
<CodeBlock lang={token.lang} code={token.text} />
{:else}
{@const parsed = marked.parse(processLatex(escapeHTML(token.raw)), options)}
{#await parsed then parsed}
<!-- eslint-disable-next-line svelte/no-at-html-tags -->
{@html DOMPurify.sanitize(parsed)}
{/await}
{/if}
{/each}
</div>
{#each tokens as token}
{#if token.type === "code"}
<CodeBlock lang={token.lang} code={token.text} />
{:else}
{@const parsed = marked.parse(processLatex(escapeHTML(token.raw)), options)}
{#await parsed then parsed}
<!-- eslint-disable-next-line svelte/no-at-html-tags -->
{@html DOMPurify.sanitize(parsed)}
{/await}
{/if}
{/each}

<style lang="postcss">
:global(.katex-display) {
Expand Down
74 changes: 74 additions & 0 deletions src/lib/components/chat/OpenReasoningResults.svelte
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<script lang="ts">
import IconThought from "~icons/carbon/circle-packing";
import MarkdownRenderer from "./MarkdownRenderer.svelte";
export let summary: string;
export let content: string;
export let loading: boolean = false;
</script>

<details
class="u flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
>
<summary
class="grid min-w-72 cursor-pointer select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2"
>
<div
class="relative grid aspect-square place-content-center overflow-hidden rounded-lg bg-gray-100 dark:bg-gray-800"
>
<svg
class="absolute inset-0 text-gray-300 transition-opacity dark:text-gray-700 {loading
? 'opacity-100'
: 'opacity-0'}"
width="40"
height="40"
viewBox="0 0 38 38"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
class="loading-path"
d="M8 2.5H30C30 2.5 35.5 2.5 35.5 8V30C35.5 30 35.5 35.5 30 35.5H8C8 35.5 2.5 35.5 2.5 30V8C2.5 8 2.5 2.5 8 2.5Z"
stroke="currentColor"
stroke-width="1"
stroke-linecap="round"
id="shape"
/>
</svg>

<IconThought class="text-[1rem]" />
</div>
<dl class="leading-4">
<dd class="text-sm">Reasoning</dd>
<dt
class="flex items-center gap-1 truncate whitespace-nowrap text-[.82rem] text-gray-400"
class:animate-pulse={loading}
>
{summary}
</dt>
</dl>
</summary>

<div
class="border-t border-gray-200 px-5 pb-2 pt-2 text-sm text-gray-600 dark:border-gray-800 dark:text-gray-400"
>
<MarkdownRenderer {content} />
</div>
</details>

<style>
details summary::-webkit-details-marker {
display: none;
}
.loading-path {
stroke-dasharray: 61.45;
animation: loading 2s linear infinite;
}
@keyframes loading {
to {
stroke-dashoffset: 122.9;
}
}
</style>
9 changes: 7 additions & 2 deletions src/lib/server/generateFromDefaultEndpoint.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import { smallModel } from "$lib/server/models";
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
import type { EndpointMessage } from "./endpoints/endpoints";

export async function generateFromDefaultEndpoint({
export async function* generateFromDefaultEndpoint({
messages,
preprompt,
generateSettings,
}: {
messages: EndpointMessage[];
preprompt?: string;
generateSettings?: Record<string, unknown>;
}): Promise<string> {
}): AsyncGenerator<MessageUpdate, string, undefined> {
const endpoint = await smallModel.getEndpoint();

const tokenStream = await endpoint({ messages, preprompt, generateSettings });
Expand All @@ -25,6 +26,10 @@ export async function generateFromDefaultEndpoint({
}
return generated_text;
}
yield {
type: MessageUpdateType.Stream,
token: output.token.text,
};
}
throw new Error("Generation failed");
}
16 changes: 16 additions & 0 deletions src/lib/server/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@ import { isHuggingChat } from "$lib/utils/isHuggingChat";

type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;

const reasoningSchema = z.union([
z.object({
type: z.literal("regex"), // everything is reasoning, extract the answer from the regex
regex: z.string(),
}),
z.object({
type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
beginToken: z.string(),
endToken: z.string(),
}),
z.object({
type: z.literal("summarize"), // everything is reasoning, summarize the answer
}),
]);

const modelConfig = z.object({
/** Used as an identifier in DB */
id: z.string().optional(),
Expand Down Expand Up @@ -70,6 +85,7 @@ const modelConfig = z.object({
embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
/** Used to enable/disable system prompt usage */
systemRoleSupported: z.boolean().default(true),
reasoning: reasoningSchema.optional(),
});

const modelsRaw = z.array(modelConfig).parse(JSON5.parse(env.MODELS));
Expand Down
Loading

0 comments on commit f6f410e

Please sign in to comment.