Skip to content

Commit

Permalink
Fix important bug: reasoning from R1 is name "reasoning" not "thinking"!
Browse files Browse the repository at this point in the history
Add UI for showing reasoning/COT in the UI nicely
  • Loading branch information
scosman committed Feb 3, 2025
1 parent da54912 commit 5c2df02
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 35 deletions.
18 changes: 18 additions & 0 deletions app/web_ui/src/lib/ui/info_tooltip.svelte
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<script lang="ts">
export let tooltip_text: string
</script>

<button class="tooltip" data-tip={tooltip_text}>
<svg
fill="currentColor"
class="w-6 h-6 inline"
viewBox="0 0 1024 1024"
version="1"
xmlns="http://www.w3.org/2000/svg"
><path
d="M512 717a205 205 0 1 0 0-410 205 205 0 0 0 0 410zm0 51a256 256 0 1 1 0-512 256 256 0 0 1 0 512z"
/><path
d="M485 364c7-7 16-11 27-11s20 4 27 11c8 8 11 17 11 28 0 10-3 19-11 27-7 7-16 11-27 11s-20-4-27-11c-8-8-11-17-11-27 0-11 3-20 11-28zM479 469h66v192h-66z"
/></svg
>
</button>
17 changes: 3 additions & 14 deletions app/web_ui/src/lib/utils/form_element.svelte
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<script lang="ts">
import InfoTooltip from "$lib/ui/info_tooltip.svelte"
export let inputType: "input" | "textarea" | "select" = "input"
export let id: string
export let label: string
Expand Down Expand Up @@ -82,20 +84,7 @@
>
{#if info_description}
<div>
<button class="tooltip tooltip-left" data-tip={info_description}>
<svg
fill="currentColor"
class="w-6 h-6 inline"
viewBox="0 0 1024 1024"
version="1"
xmlns="http://www.w3.org/2000/svg"
><path
d="M512 717a205 205 0 1 0 0-410 205 205 0 0 0 0 410zm0 51a256 256 0 1 1 0-512 256 256 0 0 1 0 512z"
/><path
d="M485 364c7-7 16-11 27-11s20 4 27 11c8 8 11 17 11 28 0 10-3 19-11 27-7 7-16 11-27 11s-20-4-27-11c-8-8-11-17-11-27 0-11 3-20 11-28zM479 469h66v192h-66z"
/></svg
>
</button>
<InfoTooltip tooltip_text={info_description} />
</div>
{/if}
</div>
Expand Down
43 changes: 26 additions & 17 deletions app/web_ui/src/routes/(app)/run/run.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import { fly } from "svelte/transition"
import { onMount } from "svelte"
import TagDropdown from "./tag_dropdown.svelte"
import InfoTooltip from "$lib/ui/info_tooltip.svelte"
export let project_id: string
export let task: Task
Expand Down Expand Up @@ -324,6 +325,15 @@
delete_repair_submitting = false
}
}
function get_intermediate_output_title(name: string): string {
return (
{
reasoning: "Model Reasoning Output",
chain_of_thought: "Chain of Thought Output",
}[name] || name
)
}
</script>

<div>
Expand All @@ -345,6 +355,19 @@
</div>
{/if}
<Output raw_output={run.output.output} />
{#if run.intermediate_outputs}
{#each Object.entries(run.intermediate_outputs) as [name, intermediate_output]}
<div
class="text-xs font-bold text-gray-500 mt-4 mb-1 flex flex-row items-center gap-1"
>
{get_intermediate_output_title(name)}
<InfoTooltip
tooltip_text={`This is intermediate output from the model, and not considered part of the final answer. This thinking helped formulate the final answer above. This is known as 'chain of thought', 'thinking output', or 'inference time compute'.`}
/>
</div>
<Output raw_output={intermediate_output} />
{/each}
{/if}
<div>
<div class="mt-2">
<button class="text-xs link" on:click={toggle_raw_data}
Expand Down Expand Up @@ -486,23 +509,9 @@
{#each task.requirements as requirement, index}
<div class="flex items-center">
{requirement.name}:
<button
class="tooltip"
data-tip={`Requirement #${index + 1} - ${requirement.instruction || "No instruction provided"}${requirement.type === "pass_fail_critical" ? " Use 'critical' rating for responses which are never tolerable, beyond a typical failure." : ""}`}
>
<svg
fill="currentColor"
class="w-6 h-6 inline"
viewBox="0 0 1024 1024"
version="1"
xmlns="http://www.w3.org/2000/svg"
><path
d="M512 717a205 205 0 1 0 0-410 205 205 0 0 0 0 410zm0 51a256 256 0 1 1 0-512 256 256 0 0 1 0 512z"
/><path
d="M485 364c7-7 16-11 27-11s20 4 27 11c8 8 11 17 11 28 0 10-3 19-11 27-7 7-16 11-27 11s-20-4-27-11c-8-8-11-17-11-27 0-11 3-20 11-28zM479 469h66v192h-66z"
/></svg
>
</button>
<InfoTooltip
tooltip_text={`Requirement #${index + 1} - ${requirement.instruction || "No instruction provided"}${requirement.type === "pass_fail_critical" ? " Use 'critical' rating for responses which are never tolerable, beyond a typical failure." : ""}`}
/>
</div>
<div class="flex items-center">
<Rating
Expand Down
4 changes: 2 additions & 2 deletions libs/core/kiln_ai/adapters/fine_tune/dataset_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def build_training_data(
include_cot
and task_run.intermediate_outputs is not None
and (
"thinking" in task_run.intermediate_outputs
"reasoning" in task_run.intermediate_outputs
or "chain_of_thought" in task_run.intermediate_outputs
)
):
Expand All @@ -94,7 +94,7 @@ def build_training_data(
"TaskRuns for training required a parent Task for building a chain of thought prompts. Train without COT, or save this TaskRun to a parent Task."
)
thinking = task_run.intermediate_outputs.get(
"thinking"
"reasoning"
) or task_run.intermediate_outputs.get("chain_of_thought")
thinking_instructions = chain_of_thought_prompt(parent_task)
thinking_final_answer_prompt = COT_FINAL_ANSWER_PROMPT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -514,9 +514,9 @@ def test_build_training_data_with_thinking(mock_task):
# Setup with needed fields for thinking
mock_task_run = mock_task.runs()[0]
assert mock_task_run.parent_task() == mock_task
# It should just use the thinking output if both thinking and chain_of_thought are present
# It should just use the reasoning output if both thinking and chain_of_thought are present
mock_task_run.intermediate_outputs = {
"thinking": "thinking output",
"reasoning": "thinking output",
"chain_of_thought": "cot output",
}
mock_task.thinking_instruction = "thinking instructions"
Expand Down

0 comments on commit 5c2df02

Please sign in to comment.