From nobody Sun Feb 8 05:26:57 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 891612853EE for ; Sun, 11 Jan 2026 21:29:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166963; cv=none; b=JVumGh/kiRspVWNBPzVpHhj0WSaNzYd85vAYiFBb7P2omUL41RhIDQyoiPFWXRnNz28YzTeuVrec4HikCTY3GeWGoFJIqrrWQJZu360wKWhHZbtV+mMkGzxGFzvY2TLCMh+Pi1LZNDd/0WTrz1zrKcMEZ5f57eW2tqKuXM7gSY8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166963; c=relaxed/simple; bh=mAvsryD46w5Cimgnn+GzWEn7fLSvnQP+2ct0WhzfIX8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=TMyfWbFYsiAXk0aoM5f5sSNaKIMtyQz/zx/aLXMSRP7NO650RMnG3s/Fmrk2AJJfJANfr4d4ES6iUKiLDXAH4FpX6DZt7YQ6HVfIFVE2GeaL7rdZHNO006pWcRS20oRFq7TH1GAk4YJzrnpy9LydULlQ0jrk0dGIWMI6fByxbxc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=k7+9vCdH; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="k7+9vCdH" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C2447C4CEF7; Sun, 11 Jan 2026 21:29:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166963; bh=mAvsryD46w5Cimgnn+GzWEn7fLSvnQP+2ct0WhzfIX8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k7+9vCdHFNvnQgliWuzugu71/ueuxfvDTHHh33v2ON0HNkfyOJkgb28x+PHlgX5OG TE721Q8KHSKIrpA8WiicD5VtCQhMqVffnSQqb/kutkg+8ARrW86oSIXQTatseyLnq6 tPqwFVJyuwHO7WVBUI1Gy0Xh/BRn/X+gkXJtKDBHzAi0B6kcYOMWoCqpvBppbjMIlF ndCa1z02EyRSxfy4JsYWPNDsUkd8lumHLNYYFdv6S3Up/+tiOuNiBJj7il/m7fN9Ef ka26cJ9Uj0/KowgdDbjkesin6l48JfzFaGVqWe1RmXQwsg6fwhJZresE1TKbsekU3G JODadhPxTmSWg== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 6/7] LLMinus: Add prompt token limit enforcement Date: Sun, 11 Jan 2026 16:29:14 -0500 Message-ID: <20260111212915.195056-7-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the max-tokens option with a 100K default to prevent prompt overflow errors with various LLM providers. Token count is estimated at roughly 4 characters per token. When prompts exceed the limit, RAG examples are progressively removed until the prompt fits. The token count is displayed in invoke output. Signed-off-by: Sasha Levin --- tools/llminus/src/main.rs | 114 +++++++++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 19 deletions(-) diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index ff668244688f..5c469e23f09a 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -14,6 +14,18 @@ =20 const STORE_PATH: &str =3D ".llminus-resolutions.json"; =20 +/// Default maximum tokens for prompt (conservative for broad provider com= patibility) +/// Most providers support at least 128K; we use 100K as a safe default. +const DEFAULT_MAX_TOKENS: usize =3D 100_000; + +/// Approximate characters per token (for English text) +const CHARS_PER_TOKEN: usize =3D 4; + +/// Estimate the number of tokens in a text string +fn estimate_tokens(text: &str) -> usize { + text.len() / CHARS_PER_TOKEN +} + #[derive(Parser)] #[command(name =3D "llminus")] #[command(about =3D "LLM-powered git conflict resolution tool")] @@ -45,6 +57,9 @@ enum Commands { Resolve { /// Command to invoke. The prompt will be passed via stdin. command: String, + /// Maximum tokens for prompt (reduces RAG examples if exceeded) + #[arg(short, long, default_value_t =3D DEFAULT_MAX_TOKENS)] + max_tokens: usize, }, /// Pull a kernel patch/pull request from lore.kernel.org and merge it Pull { @@ -53,6 +68,9 @@ enum Commands { /// Command to invoke for LLM assistance #[arg(short, long, default_value =3D "llm")] command: String, + /// Maximum tokens for prompt (reduces RAG examples if exceeded) + #[arg(long, default_value_t =3D DEFAULT_MAX_TOKENS)] + max_tokens: usize, }, } =20 @@ -825,6 +843,7 @@ fn parse_conflict_file(path: &str) -> Result> { } =20 /// Result of a similarity search +#[derive(Clone)] struct SimilarResolution { resolution: MergeResolution, similarity: f32, @@ -1632,7 +1651,7 @@ fn build_resolve_prompt( prompt } =20 -fn resolve(command: &str) -> Result<()> { +fn resolve(command: &str, max_tokens: usize) -> Result<()> { // Get merge context (what branch/tag is being merged) let merge_ctx =3D get_merge_context(); if let Some(ref source) =3D merge_ctx.merge_source { @@ -1649,17 +1668,45 @@ fn resolve(command: &str) -> Result<()> { =20 // Try to find similar historical resolutions (gracefully handles miss= ing database) println!("Looking for similar historical conflicts..."); - let similar =3D try_find_similar_resolutions(3, &conflicts); + let all_similar =3D try_find_similar_resolutions(3, &conflicts); =20 - if similar.is_empty() { + if all_similar.is_empty() { println!("No historical resolution database found (run 'llminus le= arn' and 'llminus vectorize' to build one)"); println!("Proceeding without historical examples..."); } else { - println!("Found {} similar historical resolutions", similar.len()); + println!("Found {} similar historical resolutions", all_similar.le= n()); + } + + // Build the prompt with adaptive RAG example reduction + let mut similar =3D all_similar.clone(); + let mut prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_c= tx, None); + let mut tokens =3D estimate_tokens(&prompt); + + // Reduce RAG examples until we're under the token limit + while tokens > max_tokens && !similar.is_empty() { + let original_count =3D all_similar.len(); + similar.pop(); // Remove the least similar (last) example + prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = None); + tokens =3D estimate_tokens(&prompt); + + if similar.len() < original_count { + println!( + "Reduced RAG examples from {} to {} to fit token limit (~{= } tokens, limit: {})", + original_count, + similar.len(), + tokens, + max_tokens + ); + } + } + + if tokens > max_tokens { + println!( + "Warning: Prompt still exceeds token limit (~{} tokens, limit:= {}) even without RAG examples", + tokens, max_tokens + ); } =20 - // Build the prompt and invoke LLM - let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = None); invoke_llm(command, &prompt) } =20 @@ -1668,7 +1715,8 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<= ()> { use std::io::Write; use std::process::Stdio; =20 - println!("Invoking: {} (prompt: {} bytes)", command, prompt.len()); + let tokens =3D estimate_tokens(prompt); + println!("Invoking: {} (prompt: {} bytes, ~{} tokens)", command, promp= t.len(), tokens); println!("{}", "=3D".repeat(80)); =20 // Parse command (handle arguments) @@ -1708,7 +1756,7 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<= ()> { } =20 /// Pull a kernel pull request from lore.kernel.org -fn pull(message_id: &str, command: &str) -> Result<()> { +fn pull(message_id: &str, command: &str, max_tokens: usize) -> Result<()> { check_repo()?; =20 // Step 1: Fetch and parse the pull request email @@ -1744,16 +1792,44 @@ fn pull(message_id: &str, command: &str) -> Result<= ()> { =20 // Try to find similar historical resolutions println!("Looking for similar historical conflicts..."); - let similar =3D try_find_similar_resolutions(3, &conflicts); + let all_similar =3D try_find_similar_resolutions(3, &conflicts); =20 - if similar.is_empty() { + if all_similar.is_empty() { println!("No historical resolution database found (this is optiona= l)"); } else { - println!("Found {} similar historical resolutions", similar.len()); + println!("Found {} similar historical resolutions", all_similar.le= n()); + } + + // Build the prompt with adaptive RAG example reduction + let mut similar =3D all_similar.clone(); + let mut prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_c= tx, Some(&pull_req)); + let mut tokens =3D estimate_tokens(&prompt); + + // Reduce RAG examples until we're under the token limit + while tokens > max_tokens && !similar.is_empty() { + let original_count =3D all_similar.len(); + similar.pop(); // Remove the least similar (last) example + prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = Some(&pull_req)); + tokens =3D estimate_tokens(&prompt); + + if similar.len() < original_count { + println!( + "Reduced RAG examples from {} to {} to fit token limit (~{= } tokens, limit: {})", + original_count, + similar.len(), + tokens, + max_tokens + ); + } + } + + if tokens > max_tokens { + println!( + "Warning: Prompt still exceeds token limit (~{} tokens, limit:= {}) even without RAG examples", + tokens, max_tokens + ); } =20 - // Build the prompt with pull request context and invoke LLM - let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = Some(&pull_req)); println!("\n=3D=3D=3D Invoking LLM for Conflict Resolution =3D=3D=3D"); invoke_llm(command, &prompt)?; =20 @@ -1815,8 +1891,8 @@ fn main() -> Result<()> { Commands::Learn { range } =3D> learn(range.as_deref()), Commands::Vectorize { batch_size } =3D> vectorize(batch_size), Commands::Find { n } =3D> find(n), - Commands::Resolve { command } =3D> resolve(&command), - Commands::Pull { message_id, command } =3D> pull(&message_id, &com= mand), + Commands::Resolve { command, max_tokens } =3D> resolve(&command, m= ax_tokens), + Commands::Pull { message_id, command, max_tokens } =3D> pull(&mess= age_id, &command, max_tokens), } } =20 @@ -1890,7 +1966,7 @@ fn test_find_command_with_n() { fn test_resolve_command_parses() { let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm"]).= unwrap(); match cli.command { - Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= "), + Commands::Resolve { command, .. } =3D> assert_eq!(command, "my= -llm"), _ =3D> panic!("Expected Resolve command"), } } @@ -1899,7 +1975,7 @@ fn test_resolve_command_parses() { fn test_resolve_command_with_args() { let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm --m= odel fancy"]).unwrap(); match cli.command { - Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= --model fancy"), + Commands::Resolve { command, .. } =3D> assert_eq!(command, "my= -llm --model fancy"), _ =3D> panic!("Expected Resolve command"), } } @@ -2210,7 +2286,7 @@ fn test_parse_multiple_conflicts() { fn test_pull_command_parses() { let cli =3D Cli::try_parse_from(["llminus", "pull", "test@kernel.o= rg"]).unwrap(); match cli.command { - Commands::Pull { message_id, command } =3D> { + Commands::Pull { message_id, command, .. } =3D> { assert_eq!(message_id, "test@kernel.org"); assert_eq!(command, "llm"); // default } @@ -2224,7 +2300,7 @@ fn test_pull_command_with_custom_command() { "llminus", "pull", "test@kernel.org", "-c", "my-llm --model fa= ncy" ]).unwrap(); match cli.command { - Commands::Pull { message_id, command } =3D> { + Commands::Pull { message_id, command, .. } =3D> { assert_eq!(message_id, "test@kernel.org"); assert_eq!(command, "my-llm --model fancy"); } --=20 2.51.0