From nobody Sun Feb 8 06:54:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 426E8288513 for ; Sun, 11 Jan 2026 21:29:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166959; cv=none; b=UhCcLztykHs6Bxqv0BcsdPDv/EH17Kvd+GdjHB9Hv31rxJMztFL/bV5rA+fGNsaIaSjOe3Ar9mHGp6ML96VvBCgk1KuYRXuynBTiXaRp9FCmIBpyC/j54ZGR9K6JKezrrAnq+1j2IzA8CO3iXyHHrh98JXAwRGWrE1POev6fICw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166959; c=relaxed/simple; bh=2+VxHrhxVoyZ1Rdsttum0zhYJZDXH9AXm2cRYbigjRg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=QHGUHnMISuFGz4YvM1GSGoW7Q/ViMbniU91GxBh8s6ACIzgzZYTV77BRJXRfRD1dWlofPHVkp95cLHCnYl1uL+XQJLykmmYkifroMzbJNO6T50Yhi3jQaGzBHXilHNFwzXmjXmcLfZ82+cilHGO/iiX+n7A/ZJec/XU3nKHpAZ4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=CzitKUvv; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="CzitKUvv" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6C84DC19425; Sun, 11 Jan 2026 21:29:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166959; bh=2+VxHrhxVoyZ1Rdsttum0zhYJZDXH9AXm2cRYbigjRg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=CzitKUvvkxInatrxcCC1vwTThvuhGrPo4DAdcgHtJLucUx7rN/8PdcmcFC10zomaM RVZhlGUDkyJ0sekGAoEdJDjxhngJjJITW6H7Anlc88pr19uVOXDE412paN+rSvUy/r zjxcSv2vPVrBJDalT73uRSErDMijTcla8NsGC1L8KlWToeAKXJejPnO0Ths6HR27rQ buRFYdXl7N3byBPx+E7IrRJFSMN28gJu75GMtjd7KikXMdImJW5a1+PE2EGdvSSmAH EjjvfodO2bx6Z/sFNY59DeiXgk/gaRzSeTTzRfhNLIRfUCjQn9DnD+5AVYW+FQZMYg hYcBG9mnV2mIg== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 1/7] LLMinus: Add skeleton project with learn command Date: Sun, 11 Jan 2026 16:29:09 -0500 Message-ID: <20260111212915.195056-2-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Introduce LLMinus, an LLM-powered git conflict resolution tool for the Linux kernel. The CLI uses clap with derive macros. Data structures store conflict resolutions: DiffHunk for individual hunks, FileResolution for per-file data, MergeResolution for per-commit metadata, and ResolutionStore for JSON persistence. The learn command walks merge commit history, identifies files modified in both parent branches, extracts actual conflict resolutions (not trivial merges), and stores the ours/theirs/resolution diffs. This builds a database for future RAG-based similarity search. Signed-off-by: Sasha Levin --- tools/llminus/.gitignore | 1 + tools/llminus/Cargo.toml | 18 + tools/llminus/src/main.rs | 798 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 817 insertions(+) create mode 100644 tools/llminus/.gitignore create mode 100644 tools/llminus/Cargo.toml create mode 100644 tools/llminus/src/main.rs diff --git a/tools/llminus/.gitignore b/tools/llminus/.gitignore new file mode 100644 index 000000000000..b83d22266ac8 --- /dev/null +++ b/tools/llminus/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/tools/llminus/Cargo.toml b/tools/llminus/Cargo.toml new file mode 100644 index 000000000000..bdb42561a056 --- /dev/null +++ b/tools/llminus/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name =3D "llminus" +version =3D "0.1.0" +edition =3D "2024" +authors =3D ["Sasha Levin "] +description =3D "LLM-powered git conflict resolution tool for the Linux ke= rnel" +license =3D "GPL-2.0" +repository =3D "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/l= inux.git" + +[dependencies] +anyhow =3D "1" +clap =3D { version =3D "4", features =3D ["derive"] } +rayon =3D "1" +serde =3D { version =3D "1", features =3D ["derive"] } +serde_json =3D "1" + +[dev-dependencies] +tempfile =3D "3" diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs new file mode 100644 index 000000000000..508bdc085173 --- /dev/null +++ b/tools/llminus/src/main.rs @@ -0,0 +1,798 @@ +//! llminus - LLM-powered git conflict resolution tool + +use anyhow::{bail, Context, Result}; +use clap::{Parser, Subcommand}; +use rayon::prelude::*; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::path::Path; +use std::process::Command; +use std::sync::atomic::{AtomicUsize, Ordering}; + +const STORE_PATH: &str =3D ".llminus-resolutions.json"; + +#[derive(Parser)] +#[command(name =3D "llminus")] +#[command(about =3D "LLM-powered git conflict resolution tool")] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Learn from historical merge conflict resolutions + Learn { + /// Git revision range (e.g., "v6.0..v6.1"). If not specified, lea= rns from entire history. + range: Option, + }, +} + +/// A single diff hunk representing a change region +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DiffHunk { + /// Starting line in the original file + pub start_line: u32, + /// Number of lines in original + pub original_count: u32, + /// Number of lines in new version + pub new_count: u32, + /// The actual diff content (unified diff format lines) + pub content: String, +} + +/// A single file's conflict resolution within a merge +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileResolution { + pub file_path: String, + pub file_type: String, // Extension: "c", "h", "rs", etc. + pub subsystem: String, // Extracted from path: "drivers/gpu" -> "= gpu" + + /// Changes from base to ours (what our branch did) + pub ours_diff: Vec, + /// Changes from base to theirs (what their branch did) + pub theirs_diff: Vec, + /// The final resolution diff (base to merge result) + pub resolution_diff: Vec, +} + +/// Format a section of diff hunks with a title header +fn format_hunk_section(title: &str, hunks: &[DiffHunk]) -> String { + if hunks.is_empty() { + return String::new(); + } + let mut text =3D format!("=3D=3D=3D {} =3D=3D=3D\n", title); + for h in hunks { + text.push_str(&h.content); + text.push('\n'); + } + text.push('\n'); + text +} + +impl FileResolution { + /// Generate embedding text for this file's resolution + pub fn to_embedding_text(&self) -> String { + format!( + "File: {}\n\n{}{}{}", + self.file_path, + format_hunk_section("OURS", &self.ours_diff), + format_hunk_section("THEIRS", &self.theirs_diff), + format_hunk_section("RESOLUTION", &self.resolution_diff), + ) + } + + /// Compute a content hash for deduplication + /// Two FileResolutions with identical file_path and diffs will have t= he same hash + pub fn content_hash(&self) -> u64 { + let mut hasher =3D DefaultHasher::new(); + self.file_path.hash(&mut hasher); + for hunk in &self.ours_diff { + hunk.content.hash(&mut hasher); + } + for hunk in &self.theirs_diff { + hunk.content.hash(&mut hasher); + } + for hunk in &self.resolution_diff { + hunk.content.hash(&mut hasher); + } + hasher.finish() + } +} + +/// A merge commit's conflict resolution (may contain multiple files) +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MergeResolution { + pub commit_hash: String, + pub commit_summary: String, + pub commit_date: String, // ISO format + pub author: String, + + /// All files that required manual conflict resolution in this merge + pub files: Vec, + + /// 384-dimensional embedding vector (BGE-small model) for the entire = merge + #[serde(skip_serializing_if =3D "Option::is_none")] + pub embedding: Option>, +} + +impl MergeResolution { + /// Generate embedding text from all file resolutions + pub fn to_embedding_text(&self) -> String { + let mut text =3D format!("Merge: {}\n{}\n\n", self.commit_hash, se= lf.commit_summary); + for file in &self.files { + text.push_str(&file.to_embedding_text()); + text.push_str("\n---\n\n"); + } + text + } +} + +/// Collection of all learned resolutions +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct ResolutionStore { + pub version: u32, + pub resolutions: Vec, + /// Commits we've processed (including those with only duplicate files) + #[serde(default)] + pub processed_commits: Vec, +} + +impl ResolutionStore { + pub fn load(path: &Path) -> Result { + if path.exists() { + let content =3D std::fs::read_to_string(path)?; + Ok(serde_json::from_str(&content)?) + } else { + Ok(Self { version: 2, resolutions: Vec::new(), processed_commi= ts: Vec::new() }) + } + } + + pub fn save(&self, path: &Path) -> Result<()> { + // Use compact JSON for faster serialization (use jq to pretty-pri= nt if needed) + let content =3D serde_json::to_string(self)?; + std::fs::write(path, content)?; + Ok(()) + } +} + +/// Run a git command and return stdout +fn git(args: &[&str]) -> Result { + let output =3D Command::new("git") + .args(args) + .output() + .context("Failed to run git")?; + + if !output.status.success() { + let stderr =3D String::from_utf8_lossy(&output.stderr); + bail!("git {} failed: {}", args.join(" "), stderr); + } + + Ok(String::from_utf8_lossy(&output.stdout).to_string()) +} + +/// Run a git command, return stdout, allow failure +fn git_allow_fail(args: &[&str]) -> Option { + Command::new("git") + .args(args) + .output() + .ok() + .filter(|o| o.status.success()) + .map(|o| String::from_utf8_lossy(&o.stdout).to_string()) +} + +/// Check we're in a git repository +fn check_repo() -> Result<()> { + git(&["rev-parse", "--git-dir"])?; + Ok(()) +} + +/// Get merge commits in range (or all history) +fn get_merge_commits(range: Option<&str>) -> Result> { + let args: Vec<&str> =3D match range { + Some(r) =3D> vec!["log", "--merges", "--format=3D%H", r], + None =3D> vec!["log", "--merges", "--format=3D%H"], + }; + + let output =3D git(&args)?; + Ok(output.lines().map(|s| s.to_string()).collect()) +} + +/// Metadata extracted from a git commit +struct CommitMetadata { + summary: String, + date: String, + author: String, +} + +/// Get commit metadata +fn get_commit_metadata(hash: &str) -> CommitMetadata { + let format =3D git_allow_fail(&["log", "-1", "--format=3D%s%n%aI%n%an = <%ae>", hash]) + .unwrap_or_default(); + let mut lines =3D format.lines(); + CommitMetadata { + summary: lines.next().unwrap_or_default().to_string(), + date: lines.next().unwrap_or_default().to_string(), + author: lines.next().unwrap_or_default().to_string(), + } +} + +/// Get parent commits of a merge +fn get_parents(hash: &str) -> Result> { + let output =3D git(&["log", "-1", "--format=3D%P", hash])?; + Ok(output.split_whitespace().map(|s| s.to_string()).collect()) +} + +/// Get merge base between two commits +fn get_merge_base(commit1: &str, commit2: &str) -> Option { + git_allow_fail(&["merge-base", commit1, commit2]) + .map(|s| s.trim().to_string()) +} + +/// Extract file type from path +fn get_file_type(path: &str) -> String { + Path::new(path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_string() +} + +/// Extract subsystem from path (first or second directory component) +fn get_subsystem(path: &str) -> String { + let parts: Vec<&str> =3D path.split('/').collect(); + match parts.first() { + Some(&"drivers") | Some(&"fs") | Some(&"net") | Some(&"arch") | So= me(&"sound") =3D> { + parts.get(1).unwrap_or(&"").to_string() + } + Some(first) =3D> first.to_string(), + None =3D> String::new(), + } +} + +/// Get unified diff between two commits for a specific file +fn get_diff(from: &str, to: &str, file: &str) -> Option { + git_allow_fail(&["diff", "-U3", from, to, "--", file]) +} + +/// Get file content at a specific commit +fn get_file_at_commit(commit: &str, path: &str) -> Option { + git_allow_fail(&["show", &format!("{}:{}", commit, path)]) +} + +/// Parse unified diff into hunks +fn parse_diff_hunks(diff: &str) -> Vec { + let mut hunks =3D Vec::new(); + let mut current_hunk: Option<(u32, u32, u32, Vec)> =3D None; + + for line in diff.lines() { + if line.starts_with("@@") { + // Save previous hunk + if let Some((start, orig_count, new_count, lines)) =3D current= _hunk.take() { + hunks.push(DiffHunk { + start_line: start, + original_count: orig_count, + new_count, + content: lines.join("\n"), + }); + } + + // Parse hunk header: @@ -start,count +start,count @@ + if let Some(header) =3D parse_hunk_header(line) { + current_hunk =3D Some((header.0, header.1, header.2, vec![= line.to_string()])); + } + } else if current_hunk.is_some() && (line.starts_with('+') || line= .starts_with('-') || line.starts_with(' ')) { + if let Some((_, _, _, ref mut lines)) =3D current_hunk { + lines.push(line.to_string()); + } + } + } + + // Save last hunk + if let Some((start, orig_count, new_count, lines)) =3D current_hunk { + hunks.push(DiffHunk { + start_line: start, + original_count: orig_count, + new_count, + content: lines.join("\n"), + }); + } + + hunks +} + +/// Parse a hunk header like "@@ -10,5 +10,7 @@" -> (start, orig_count, ne= w_count) +fn parse_hunk_header(line: &str) -> Option<(u32, u32, u32)> { + let line =3D line.trim_start_matches("@@ "); + let parts: Vec<&str> =3D line.split(' ').collect(); + if parts.len() < 2 { + return None; + } + + let parse_range =3D |s: &str| -> (u32, u32) { + let s =3D s.trim_start_matches(['-', '+']); + if let Some((start, count)) =3D s.split_once(',') { + (start.parse().unwrap_or(1), count.parse().unwrap_or(1)) + } else { + (s.parse().unwrap_or(1), 1) + } + }; + + let (orig_start, orig_count) =3D parse_range(parts[0]); + let (_, new_count) =3D parse_range(parts[1]); + + Some((orig_start, orig_count, new_count)) +} + +/// Find files modified in both branches +fn find_modified_in_both(parent1: &str, parent2: &str, base: &str) -> Resu= lt> { + let changed1 =3D git_allow_fail(&["diff", "--name-only", base, parent1= ]) + .unwrap_or_default(); + let changed2 =3D git_allow_fail(&["diff", "--name-only", base, parent2= ]) + .unwrap_or_default(); + + let files1: HashSet<_> =3D changed1.lines().collect(); + let files2: HashSet<_> =3D changed2.lines().collect(); + + Ok(files1.intersection(&files2).map(|s| s.to_string()).collect()) +} + +/// Extract conflict resolutions from a merge commit +/// Returns None if no manual conflict resolution was needed +fn extract_resolution(hash: &str) -> Result> { + let parents =3D get_parents(hash)?; + if parents.len() < 2 { + return Ok(None); + } + + let parent1 =3D &parents[0]; + let parent2 =3D &parents[1]; + + let base =3D match get_merge_base(parent1, parent2) { + Some(b) =3D> b, + None =3D> return Ok(None), + }; + + let meta =3D get_commit_metadata(hash); + let modified =3D find_modified_in_both(parent1, parent2, &base)?; + + let mut files =3D Vec::new(); + + for file_path in modified { + // Get diffs: base->ours, base->theirs, base->resolution + let ours_diff_raw =3D get_diff(&base, parent1, &file_path); + let theirs_diff_raw =3D get_diff(&base, parent2, &file_path); + let resolution_diff_raw =3D get_diff(&base, hash, &file_path); + + // Parse into hunks + let ours_hunks =3D ours_diff_raw.as_ref().map(|d| parse_diff_hunks= (d)).unwrap_or_default(); + let theirs_hunks =3D theirs_diff_raw.as_ref().map(|d| parse_diff_h= unks(d)).unwrap_or_default(); + let resolution_hunks =3D resolution_diff_raw.as_ref().map(|d| pars= e_diff_hunks(d)).unwrap_or_default(); + + // Skip if no actual changes + if ours_hunks.is_empty() && theirs_hunks.is_empty() { + continue; + } + + // Skip if ours =3D=3D theirs (no real conflict) + if ours_diff_raw =3D=3D theirs_diff_raw { + continue; + } + + // Only keep if resolution differs from BOTH parents (manual merge= required) + let ours_content =3D get_file_at_commit(parent1, &file_path); + let theirs_content =3D get_file_at_commit(parent2, &file_path); + let resolution_content =3D get_file_at_commit(hash, &file_path); + + if resolution_content =3D=3D ours_content || resolution_content = =3D=3D theirs_content { + continue; // Trivial resolution, no manual merge needed + } + + files.push(FileResolution { + file_path: file_path.clone(), + file_type: get_file_type(&file_path), + subsystem: get_subsystem(&file_path), + ours_diff: ours_hunks, + theirs_diff: theirs_hunks, + resolution_diff: resolution_hunks, + }); + } + + // Only return if there were actual conflicts + if files.is_empty() { + return Ok(None); + } + + Ok(Some(MergeResolution { + commit_hash: hash.to_string(), + commit_summary: meta.summary, + commit_date: meta.date, + author: meta.author, + files, + embedding: None, + })) +} + +fn learn(range: Option<&str>) -> Result<()> { + check_repo()?; + + let store_path =3D Path::new(STORE_PATH); + let mut store =3D ResolutionStore::load(store_path)?; + store.version =3D 3; // Upgrade version (grouped by commit) + + // First, deduplicate existing store + let mut seen_hashes: HashSet =3D HashSet::new(); + let mut existing_duplicates_removed =3D 0usize; + + store.resolutions =3D store.resolutions + .into_iter() + .filter_map(|mut resolution| { + let original_len =3D resolution.files.len(); + resolution.files.retain(|f| { + let hash =3D f.content_hash(); + if seen_hashes.contains(&hash) { + false // Duplicate, remove + } else { + seen_hashes.insert(hash); + true // Unique, keep + } + }); + existing_duplicates_removed +=3D original_len - resolution.fil= es.len(); + + if resolution.files.is_empty() { + None + } else { + Some(resolution) + } + }) + .collect(); + + if existing_duplicates_removed > 0 { + println!("Deduplicated existing store: removed {} duplicate files", + existing_duplicates_removed); + } + + // Track existing commits to avoid re-analyzing (includes commits with= only duplicate files) + let mut processed_commits: HashSet<_> =3D store.processed_commits.iter= ().cloned().collect(); + // Also include commits from resolutions (for backwards compatibility = with old stores) + for r in &store.resolutions { + processed_commits.insert(r.commit_hash.clone()); + } + + println!("Existing store: {} commits, {} unique file resolutions, {} t= otal processed", + store.resolutions.len(), seen_hashes.len(), processed_commits= .len()); + + let merge_commits =3D get_merge_commits(range)?; + let total_commits =3D merge_commits.len(); + + // Filter to only new commits + let new_commits: Vec<_> =3D merge_commits + .into_iter() + .filter(|h| !processed_commits.contains(h)) + .collect(); + + println!("Found {} merge commits ({} new to analyze)", total_commits, = new_commits.len()); + + if new_commits.is_empty() { + if existing_duplicates_removed > 0 { + store.save(store_path)?; + println!("Store saved after deduplication."); + } else { + println!("No new commits to process."); + } + return Ok(()); + } + + // Configure thread pool for git subprocesses + let num_threads =3D std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(8); + + let pool =3D rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .context("Failed to build thread pool")?; + + println!("Using {} threads", num_threads); + + // Progress counter + let processed =3D AtomicUsize::new(0); + let total_new =3D new_commits.len(); + + // Process commits in parallel + let resolutions: Vec =3D pool.install(|| { + new_commits + .par_iter() + .filter_map(|hash| { + let count =3D processed.fetch_add(1, Ordering::Relaxed) + = 1; + if count % 100 =3D=3D 0 || count =3D=3D total_new { + eprintln!(" Progress: {}/{}", count, total_new); + } + + match extract_resolution(hash) { + Ok(Some(resolution)) =3D> Some(resolution), + Ok(None) =3D> None, + Err(e) =3D> { + eprintln!("Warning: Failed to analyze {}: {}", &ha= sh[..12], e); + None + } + } + }) + .collect() + }); + + // Count files before deduplication + let total_files_before: usize =3D resolutions.iter().map(|r| r.files.l= en()).sum(); + let commits_with_conflicts_before =3D resolutions.len(); + + // Filter out duplicate file resolutions using the same hash set + let mut new_duplicates_skipped =3D 0usize; + + let deduped_resolutions: Vec =3D resolutions + .into_iter() + .filter_map(|mut resolution| { + let original_len =3D resolution.files.len(); + resolution.files.retain(|f| { + let hash =3D f.content_hash(); + if seen_hashes.contains(&hash) { + false // Duplicate, skip + } else { + seen_hashes.insert(hash); + true // Unique, keep + } + }); + new_duplicates_skipped +=3D original_len - resolution.files.le= n(); + + // Only keep commits that still have at least one unique file + if resolution.files.is_empty() { + None + } else { + Some(resolution) + } + }) + .collect(); + + // Aggregate results + let commits_stored =3D deduped_resolutions.len(); + let files_stored: usize =3D deduped_resolutions.iter().map(|r| r.files= .len()).sum(); + + // Track all processed commits (including those with only duplicate fi= les) + for commit in &new_commits { + processed_commits.insert(commit.clone()); + } + store.processed_commits =3D processed_commits.into_iter().collect(); + + store.resolutions.extend(deduped_resolutions); + store.save(store_path)?; + + // Calculate approximate size + let json_size =3D std::fs::metadata(store_path).map(|m| m.len()).unwra= p_or(0); + let total_stored_files: usize =3D store.resolutions.iter().map(|r| r.f= iles.len()).sum(); + + println!("\nResults:"); + println!(" Merge commits analyzed: {}", total_commits); + println!(" Commits with conflicts: {}", commits_with_conflicts_before= ); + println!(" Files found: {}", total_files_before); + if existing_duplicates_removed > 0 { + println!(" Existing duplicates removed: {}", existing_duplicates_= removed); + } + println!(" New duplicate files skipped: {}", new_duplicates_skipped); + println!(" New commits stored: {}", commits_stored); + println!(" New files stored: {}", files_stored); + println!(" Total in store: {} commits, {} files", store.resolutions.l= en(), total_stored_files); + println!(" Output size: {:.2} MB", json_size as f64 / 1024.0 / 1024.0= ); + println!("\nResolutions saved to: {}", store_path.display()); + + Ok(()) +} + +fn main() -> Result<()> { + let cli =3D Cli::parse(); + + match cli.command { + Commands::Learn { range } =3D> learn(range.as_deref()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use clap::CommandFactory; + use std::fs; + use tempfile::TempDir; + + #[test] + fn verify_cli() { + Cli::command().debug_assert(); + } + + #[test] + fn test_learn_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "learn"]).unwrap(); + match cli.command { + Commands::Learn { range } =3D> assert!(range.is_none()), + } + } + + #[test] + fn test_learn_command_with_range() { + let cli =3D Cli::try_parse_from(["llminus", "learn", "v6.0..v6.1"]= ).unwrap(); + match cli.command { + Commands::Learn { range } =3D> assert_eq!(range, Some("v6.0..v= 6.1".to_string())), + } + } + + #[test] + fn test_get_file_type() { + assert_eq!(get_file_type("foo/bar.c"), "c"); + assert_eq!(get_file_type("foo/bar.rs"), "rs"); + assert_eq!(get_file_type("Makefile"), ""); + assert_eq!(get_file_type("include/linux/module.h"), "h"); + } + + #[test] + fn test_get_subsystem() { + assert_eq!(get_subsystem("drivers/gpu/drm/foo.c"), "gpu"); + assert_eq!(get_subsystem("fs/ext4/inode.c"), "ext4"); + assert_eq!(get_subsystem("kernel/sched/core.c"), "kernel"); + assert_eq!(get_subsystem("net/ipv4/tcp.c"), "ipv4"); + assert_eq!(get_subsystem("mm/memory.c"), "mm"); + } + + #[test] + fn test_parse_hunk_header() { + assert_eq!(parse_hunk_header("@@ -10,5 +10,7 @@"), Some((10, 5, 7)= )); + assert_eq!(parse_hunk_header("@@ -1 +1,2 @@"), Some((1, 1, 2))); + assert_eq!(parse_hunk_header("@@ -100,20 +105,25 @@ func"), Some((= 100, 20, 25))); + } + + #[test] + fn test_parse_diff_hunks() { + let diff =3D r#"diff --git a/file.c b/file.c +index 123..456 789 +--- a/file.c ++++ b/file.c +@@ -10,3 +10,4 @@ context + unchanged +-removed ++added ++another +"#; + let hunks =3D parse_diff_hunks(diff); + assert_eq!(hunks.len(), 1); + assert_eq!(hunks[0].start_line, 10); + assert!(hunks[0].content.contains("-removed")); + assert!(hunks[0].content.contains("+added")); + } + + fn init_test_repo() -> TempDir { + let dir =3D TempDir::new().unwrap(); + Command::new("git") + .args(["init"]) + .current_dir(dir.path()) + .output() + .unwrap(); + Command::new("git") + .args(["config", "user.email", "test@test.com"]) + .current_dir(dir.path()) + .output() + .unwrap(); + Command::new("git") + .args(["config", "user.name", "Test"]) + .current_dir(dir.path()) + .output() + .unwrap(); + dir + } + + fn create_commit(dir: &TempDir, filename: &str, content: &str, msg: &s= tr) { + fs::write(dir.path().join(filename), content).unwrap(); + Command::new("git") + .args(["add", filename]) + .current_dir(dir.path()) + .output() + .unwrap(); + Command::new("git") + .args(["commit", "-m", msg]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + fn create_branch(dir: &TempDir, name: &str) { + Command::new("git") + .args(["checkout", "-b", name]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + fn checkout(dir: &TempDir, name: &str) { + Command::new("git") + .args(["checkout", name]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + fn merge(dir: &TempDir, branch: &str, msg: &str) { + Command::new("git") + .args(["merge", "--no-ff", "-m", msg, branch]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + #[test] + fn test_resolution_store_roundtrip() { + let dir =3D TempDir::new().unwrap(); + let store_path =3D dir.path().join("resolutions.json"); + + let mut store =3D ResolutionStore { version: 3, resolutions: Vec::= new() }; + store.resolutions.push(MergeResolution { + commit_hash: "abc123".to_string(), + commit_summary: "Test merge".to_string(), + commit_date: "2024-01-15T10:00:00Z".to_string(), + author: "Test ".to_string(), + files: vec![FileResolution { + file_path: "test.c".to_string(), + file_type: "c".to_string(), + subsystem: "test".to_string(), + ours_diff: vec![DiffHunk { + start_line: 10, + original_count: 3, + new_count: 4, + content: "@@ -10,3 +10,4 @@\n-old\n+new".to_string(), + }], + theirs_diff: vec![], + resolution_diff: vec![], + }], + embedding: None, + }); + + store.save(&store_path).unwrap(); + let loaded =3D ResolutionStore::load(&store_path).unwrap(); + + assert_eq!(loaded.version, 3); + assert_eq!(loaded.resolutions.len(), 1); + assert_eq!(loaded.resolutions[0].commit_hash, "abc123"); + assert_eq!(loaded.resolutions[0].files.len(), 1); + assert_eq!(loaded.resolutions[0].files[0].file_path, "test.c"); + assert_eq!(loaded.resolutions[0].files[0].file_type, "c"); + + // Test embedding text generation for merge + let embedding =3D loaded.resolutions[0].to_embedding_text(); + assert!(embedding.contains("Merge: abc123")); + assert!(embedding.contains("File: test.c")); + assert!(embedding.contains("=3D=3D=3D OURS =3D=3D=3D")); + assert!(embedding.contains("-old")); + assert!(embedding.contains("+new")); + } + + #[test] + fn test_git_in_repo() { + let dir =3D init_test_repo(); + std::env::set_current_dir(dir.path()).unwrap(); + create_commit(&dir, "file.txt", "initial", "initial commit"); + let result =3D check_repo(); + assert!(result.is_ok()); + } + + #[test] + fn test_get_merge_commits() { + let dir =3D init_test_repo(); + std::env::set_current_dir(dir.path()).unwrap(); + + create_commit(&dir, "file.txt", "initial", "initial commit"); + create_branch(&dir, "feature"); + create_commit(&dir, "feature.txt", "feature", "feature commit"); + checkout(&dir, "master"); + create_commit(&dir, "main.txt", "main", "main commit"); + merge(&dir, "feature", "Merge feature"); + + let merges =3D get_merge_commits(None).unwrap(); + assert_eq!(merges.len(), 1); + } +} --=20 2.51.0 From nobody Sun Feb 8 06:54:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 42DFE28AAEB for ; Sun, 11 Jan 2026 21:29:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166960; cv=none; b=mjaZ7h7q59f/cYFE2i549iAZLwz0weyVoOiZFvAgPtaGgBgTe0SfzlkP2HaQBwxg7mTBegxUaPXQrbLtwE0RPUrxug+AbOueQ02hwggw+cAyQRV3n+tpl//MlnRmHcLu9tCnGlqrb9omqjFqSnP1FtBKfscQhbtGB+5SGAmF3w4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166960; c=relaxed/simple; bh=rB9ge6/sTvQ0rqdXrCN2IZzyFDW2qz+HFsSCmyubcyY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=iV0vSPqudY5zwqc/IZiegYFNsqMinHu3vvsbb0nXBj9tNt/O7XIO11fmyUfr8wtc1TNdmpKoq575OrZTp7tIEV35SXm8VyjdmWeUE4Jk46SN2xY8Pf6oyBqLwR+NG/w0riEu/M/7slPbe7jv1dbdqV8eKmRsygdpWiTHrbpufWc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=riE0PQip; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="riE0PQip" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 590CDC4CEF7; Sun, 11 Jan 2026 21:29:19 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166959; bh=rB9ge6/sTvQ0rqdXrCN2IZzyFDW2qz+HFsSCmyubcyY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=riE0PQipZiuNr8AlQf1TUzhdp4I3+eRp7RDtAKQ4SYyUSKiZtw3zcT/WJ2oUHbYTG 3Xw2BZwT445HPXsW5eNLutXFaPmTYWBDcc6Mxk8wv1Q+PaonnBpMjv4OorrxWsdbWl rU5dZx3E85/vFlbNKcUsY+UvQogaqR3WfB5zK/o7AT6ER4T3Q2QdswuD+/BzC+E2m5 oVCv1CfKXy2kWGVuM/b44DlD46CewMEH6BEdS29hK2BtYqKwdhNOPtyqwGCf/BLQkq kugruM7zI+TYibidw82w+qw9eLE/BAUQHUhpVvyVO38pjauhQfk2+j1HpP8Um2Coo9 meqjNs9duguPg== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 2/7] LLMinus: Add vectorize command with fastembed Date: Sun, 11 Jan 2026 16:29:10 -0500 Message-ID: <20260111212915.195056-3-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the vectorize command that generates embeddings for stored conflict resolutions using the BGE-small-en-v1.5 model via fastembed. The model produces 384-dimensional vectors. Processing is batched with incremental saves after each batch for crash recovery. Resolutions with existing embeddings are skipped. This enables RAG-based similarity search for finding historical conflict resolutions similar to current merge conflicts. Also adds cosine_similarity= () and init_embedding_model() helpers with corresponding tests. Signed-off-by: Sasha Levin --- tools/llminus/Cargo.toml | 1 + tools/llminus/src/main.rs | 157 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/tools/llminus/Cargo.toml b/tools/llminus/Cargo.toml index bdb42561a056..86740174de59 100644 --- a/tools/llminus/Cargo.toml +++ b/tools/llminus/Cargo.toml @@ -10,6 +10,7 @@ repository =3D "https://git.kernel.org/pub/scm/linux/kern= el/git/torvalds/linux.git [dependencies] anyhow =3D "1" clap =3D { version =3D "4", features =3D ["derive"] } +fastembed =3D "5" rayon =3D "1" serde =3D { version =3D "1", features =3D ["derive"] } serde_json =3D "1" diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index 508bdc085173..b97505d0cd99 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -2,6 +2,7 @@ =20 use anyhow::{bail, Context, Result}; use clap::{Parser, Subcommand}; +use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use std::collections::HashSet; @@ -28,6 +29,12 @@ enum Commands { /// Git revision range (e.g., "v6.0..v6.1"). If not specified, lea= rns from entire history. range: Option, }, + /// Generate embeddings for stored resolutions (for RAG similarity sea= rch) + Vectorize { + /// Batch size for embedding generation (default: 64) + #[arg(short, long, default_value =3D "64")] + batch_size: usize, + }, } =20 /// A single diff hunk representing a change region @@ -588,11 +595,118 @@ fn learn(range: Option<&str>) -> Result<()> { Ok(()) } =20 +/// Compute cosine similarity between two vectors +fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() !=3D b.len() || a.is_empty() { + return 0.0; + } + + let dot: f32 =3D a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); + let norm_a: f32 =3D a.iter().map(|x| x * x).sum::().sqrt(); + let norm_b: f32 =3D b.iter().map(|x| x * x).sum::().sqrt(); + + if norm_a =3D=3D 0.0 || norm_b =3D=3D 0.0 { + return 0.0; + } + + dot / (norm_a * norm_b) +} + +/// Initialize the BGE-small embedding model +fn init_embedding_model() -> Result { + TextEmbedding::try_new( + InitOptions::new(EmbeddingModel::BGESmallENV15) + .with_show_download_progress(true), + ).context("Failed to initialize embedding model") +} + +fn vectorize(batch_size: usize) -> Result<()> { + let store_path =3D Path::new(STORE_PATH); + + if !store_path.exists() { + bail!("No resolutions found. Run 'llminus learn' first."); + } + + let mut store =3D ResolutionStore::load(store_path)?; + + // Count how many need embeddings + let need_embedding: Vec =3D store + .resolutions + .iter() + .enumerate() + .filter(|(_, r)| r.embedding.is_none()) + .map(|(i, _)| i) + .collect(); + + if need_embedding.is_empty() { + println!("All {} resolutions already have embeddings.", store.reso= lutions.len()); + return Ok(()); + } + + println!("Found {} resolutions needing embeddings", need_embedding.len= ()); + println!("Initializing embedding model (BGE-small-en, ~33MB download o= n first run)..."); + + // Initialize the embedding model + let mut model =3D init_embedding_model()?; + + println!("Model loaded. Generating embeddings...\n"); + + // Process in batches + let total_batches =3D need_embedding.len().div_ceil(batch_size); + + for (batch_num, chunk) in need_embedding.chunks(batch_size).enumerate(= ) { + // Collect texts for this batch + let texts: Vec =3D chunk + .iter() + .map(|&i| store.resolutions[i].to_embedding_text()) + .collect(); + + // Generate embeddings + let embeddings =3D model + .embed(texts, None) + .context("Failed to generate embeddings")?; + + // Assign embeddings back to resolutions + for (j, &idx) in chunk.iter().enumerate() { + store.resolutions[idx].embedding =3D Some(embeddings[j].clone(= )); + } + + // Progress report + let done =3D (batch_num + 1) * batch_size.min(chunk.len()); + let pct =3D (done as f64 / need_embedding.len() as f64 * 100.0).mi= n(100.0); + println!( + " Batch {}/{}: {:.1}% ({}/{})", + batch_num + 1, + total_batches, + pct, + done.min(need_embedding.len()), + need_embedding.len() + ); + + // Save after each batch (incremental progress) + store.save(store_path)?; + } + + // Final stats + let json_size =3D std::fs::metadata(store_path).map(|m| m.len()).unwra= p_or(0); + let with_embeddings =3D store.resolutions.iter().filter(|r| r.embeddin= g.is_some()).count(); + + println!("\nResults:"); + println!(" Total resolutions: {}", store.resolutions.len()); + println!(" With embeddings: {}", with_embeddings); + println!(" Embedding dimensions: 384"); + println!(" Output size: {:.2} MB", json_size as f64 / 1024.0 / 1024.0= ); + println!("\nEmbeddings saved to: {}", store_path.display()); + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 match cli.command { Commands::Learn { range } =3D> learn(range.as_deref()), + Commands::Vectorize { batch_size } =3D> vectorize(batch_size), } } =20 @@ -613,6 +727,7 @@ fn test_learn_command_parses() { let cli =3D Cli::try_parse_from(["llminus", "learn"]).unwrap(); match cli.command { Commands::Learn { range } =3D> assert!(range.is_none()), + _ =3D> panic!("Expected Learn command"), } } =20 @@ -621,9 +736,51 @@ fn test_learn_command_with_range() { let cli =3D Cli::try_parse_from(["llminus", "learn", "v6.0..v6.1"]= ).unwrap(); match cli.command { Commands::Learn { range } =3D> assert_eq!(range, Some("v6.0..v= 6.1".to_string())), + _ =3D> panic!("Expected Learn command"), } } =20 + #[test] + fn test_vectorize_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "vectorize"]).unwrap(); + match cli.command { + Commands::Vectorize { batch_size } =3D> assert_eq!(batch_size,= 64), + _ =3D> panic!("Expected Vectorize command"), + } + } + + #[test] + fn test_vectorize_command_with_batch_size() { + let cli =3D Cli::try_parse_from(["llminus", "vectorize", "-b", "12= 8"]).unwrap(); + match cli.command { + Commands::Vectorize { batch_size } =3D> assert_eq!(batch_size,= 128), + _ =3D> panic!("Expected Vectorize command"), + } + } + + #[test] + fn test_cosine_similarity() { + // Identical vectors should have similarity 1.0 + let a =3D vec![1.0, 0.0, 0.0]; + let b =3D vec![1.0, 0.0, 0.0]; + assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.0001); + + // Orthogonal vectors should have similarity 0.0 + let a =3D vec![1.0, 0.0, 0.0]; + let b =3D vec![0.0, 1.0, 0.0]; + assert!((cosine_similarity(&a, &b) - 0.0).abs() < 0.0001); + + // Opposite vectors should have similarity -1.0 + let a =3D vec![1.0, 0.0, 0.0]; + let b =3D vec![-1.0, 0.0, 0.0]; + assert!((cosine_similarity(&a, &b) - (-1.0)).abs() < 0.0001); + + // Different length vectors return 0 + let a =3D vec![1.0, 0.0]; + let b =3D vec![1.0, 0.0, 0.0]; + assert_eq!(cosine_similarity(&a, &b), 0.0); + } + #[test] fn test_get_file_type() { assert_eq!(get_file_type("foo/bar.c"), "c"); --=20 2.51.0 From nobody Sun Feb 8 06:54:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E20E228CF5F for ; Sun, 11 Jan 2026 21:29:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166961; cv=none; b=FhYEdLbF30riiAaXlp3Cy/PneBZQh4PTQQFINKHFQwJ+j5i/I7jXjsojvSK2Lnc1tjsDOCqYL9jSQAlcQLZqF3rH+QoUPjaDzrgzbX0SY85dd4JTjA1ZyNX+/KWgkY98tIIVYG5e/3BaR7AtvuqxC+UHvO3Ofybom9oVIDReI8U= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166961; c=relaxed/simple; bh=VvaIEDEPTP86+x2MhUJftEpO6xpXqqhemVSJMjBncRs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=BWBXD3vAsIqvSHozclrIxMePm83eak6uOlVlXIH/SAnbGewTluMIx7B3jFukUMA5FLfbLXGM9dKVWFbzaT3jdurfDtx2ZhBDIJxmWNuPRQ4cv0J61PJRY+1Yvyy1sOuOBAm2jTdyhG5xKn1n47wGp8W3rglGnn7t7CoLRtb7xDA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=B9FQbncj; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="B9FQbncj" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 32B5DC19422; Sun, 11 Jan 2026 21:29:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166960; bh=VvaIEDEPTP86+x2MhUJftEpO6xpXqqhemVSJMjBncRs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=B9FQbncjJDHTsadr2apSXiAkYgjZXSYvzgOH6QYZNmOL14bPU2sXSZrhmtOjY+O8C c6RBAiazEUft3FfNygvDk5RBysa+1hPOnhqbIPyYd1Lb84Pokbe9hT0ge/YWz9N7qU P6dWC9IRecekKRVUJcZfv/zLCE06o0oiAxhF7BhcPvwcoL+TOHVngcGgj1/SQSqz1y PKncJy4LiQLmifOQc9yZp8uRxfOeUN4TZBCEYqBNfTqeebYPteu6oQbQqfQeryB0dw c7zO/reg5EIPIYFSQvgdDH9MZrAqEpYm9OdVrxr8VfsE+pGrBYW0QI6+Gb/PxymbD6 riIIOCAirsn/g== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 3/7] LLMinus: Add find command for similarity search Date: Sun, 11 Jan 2026 16:29:11 -0500 Message-ID: <20260111212915.195056-4-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the find command that searches for historical conflict resolutions similar to current merge conflicts using vector similarity. It detects conflicts via git diff, parses conflict markers including diff3 style with base content, generates embeddings, and computes cosine similarity against stored resolutions to display the top N most similar matches. This enables developers to quickly find relevant examples of how similar conflicts were resolved in the past. Signed-off-by: Sasha Levin --- tools/llminus/src/main.rs | 327 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index b97505d0cd99..df7262bd6a91 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -35,6 +35,12 @@ enum Commands { #[arg(short, long, default_value =3D "64")] batch_size: usize, }, + /// Find similar historical conflict resolutions for current conflicts + Find { + /// Number of similar resolutions to show (default: 1) + #[arg(default_value =3D "1")] + n: usize, + }, } =20 /// A single diff hunk representing a change region @@ -701,12 +707,244 @@ fn vectorize(batch_size: usize) -> Result<()> { Ok(()) } =20 +/// A file with active conflict markers +#[derive(Debug)] +struct ConflictFile { + path: String, + ours_content: String, + theirs_content: String, + base_content: Option, +} + +impl ConflictFile { + /// Generate embedding text for this conflict + fn to_embedding_text(&self) -> String { + let mut text =3D format!("File: {}\n\n", self.path); + + text.push_str("=3D=3D=3D OURS =3D=3D=3D\n"); + text.push_str(&self.ours_content); + text.push_str("\n\n"); + + text.push_str("=3D=3D=3D THEIRS =3D=3D=3D\n"); + text.push_str(&self.theirs_content); + text.push('\n'); + + if let Some(ref base) =3D self.base_content { + text.push_str("\n=3D=3D=3D BASE =3D=3D=3D\n"); + text.push_str(base); + text.push('\n'); + } + + text + } +} + +/// Get list of files with unmerged conflicts +fn get_conflicted_files() -> Result> { + // git diff --name-only --diff-filter=3DU shows unmerged files + let output =3D git(&["diff", "--name-only", "--diff-filter=3DU"])?; + Ok(output.lines().map(|s| s.to_string()).filter(|s| !s.is_empty()).col= lect()) +} + +/// State machine for parsing conflict markers +enum ConflictParseState { + Outside, + InOurs, + InBase, + InTheirs, +} + +/// Append a line to a string, adding newline separator if non-empty +fn append_line(s: &mut String, line: &str) { + if !s.is_empty() { + s.push('\n'); + } + s.push_str(line); +} + +/// Parse conflict markers from a file and extract ours/theirs/base content +fn parse_conflict_file(path: &str) -> Result> { + let content =3D std::fs::read_to_string(path) + .with_context(|| format!("Failed to read {}", path))?; + + let mut conflicts =3D Vec::new(); + let mut state =3D ConflictParseState::Outside; + let mut current_ours =3D String::new(); + let mut current_theirs =3D String::new(); + let mut current_base: Option =3D None; + + for line in content.lines() { + if line.starts_with("<<<<<<<") { + state =3D ConflictParseState::InOurs; + current_ours.clear(); + current_theirs.clear(); + current_base =3D None; + } else if line.starts_with("|||||||") { + // diff3 style - base content follows + state =3D ConflictParseState::InBase; + current_base =3D Some(String::new()); + } else if line.starts_with("=3D=3D=3D=3D=3D=3D=3D") { + state =3D ConflictParseState::InTheirs; + } else if line.starts_with(">>>>>>>") { + // End of conflict block - save it + conflicts.push(ConflictFile { + path: path.to_string(), + ours_content: std::mem::take(&mut current_ours), + theirs_content: std::mem::take(&mut current_theirs), + base_content: current_base.take(), + }); + state =3D ConflictParseState::Outside; + } else { + match state { + ConflictParseState::InOurs =3D> append_line(&mut current_o= urs, line), + ConflictParseState::InBase =3D> { + if let Some(ref mut base) =3D current_base { + append_line(base, line); + } + } + ConflictParseState::InTheirs =3D> append_line(&mut current= _theirs, line), + ConflictParseState::Outside =3D> {} + } + } + } + + Ok(conflicts) +} + +/// Result of a similarity search +struct SimilarResolution { + resolution: MergeResolution, + similarity: f32, +} + +/// Find similar resolutions (shared logic for find and resolve) +fn find_similar_resolutions(n: usize) -> Result<(Vec, Vec)> { + check_repo()?; + + let store_path =3D Path::new(STORE_PATH); + if !store_path.exists() { + bail!("No resolutions database found. Run 'llminus learn' first."); + } + + // Find current conflicts + let conflict_paths =3D get_conflicted_files()?; + if conflict_paths.is_empty() { + bail!("No conflicts detected. Run this command when you have activ= e merge conflicts."); + } + + // Parse all conflict regions + let mut all_conflicts =3D Vec::new(); + for path in &conflict_paths { + if let Ok(conflicts) =3D parse_conflict_file(path) { + all_conflicts.extend(conflicts); + } + } + + if all_conflicts.is_empty() { + bail!("Could not parse any conflict markers from the conflicted fi= les."); + } + + // Load the resolution store + let store =3D ResolutionStore::load(store_path)?; + let with_embeddings: Vec<_> =3D store.resolutions.iter() + .filter(|r| r.embedding.is_some()) + .collect(); + + if with_embeddings.is_empty() { + bail!("No embeddings in database. Run 'llminus vectorize' first."); + } + + // Initialize embedding model + let mut model =3D init_embedding_model()?; + + // Generate embedding for current conflicts + let conflict_text: String =3D all_conflicts.iter() + .map(|c| c.to_embedding_text()) + .collect::>() + .join("\n---\n\n"); + + let query_embeddings =3D model + .embed(vec![conflict_text], None) + .context("Failed to generate embedding for current conflict")?; + let query_embedding =3D &query_embeddings[0]; + + // Compute similarities and take top N (clone resolutions to own them) + let mut similarities: Vec<_> =3D with_embeddings.iter() + .map(|r| { + let sim =3D cosine_similarity(query_embedding, r.embedding.as_= ref().unwrap()); + (r, sim) + }) + .collect(); + + similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::= Ordering::Equal)); + + let top_n: Vec =3D similarities.into_iter() + .take(n) + .map(|(r, sim)| SimilarResolution { + resolution: (*r).clone(), + similarity: sim, + }) + .collect(); + + Ok((all_conflicts, top_n)) +} + +fn find(n: usize) -> Result<()> { + // Use find_similar_resolutions for core search logic + let (_conflicts, top_n) =3D find_similar_resolutions(n)?; + + // Display results + println!("\n{}", "=3D".repeat(80)); + println!("Top {} similar historical conflict resolution(s):", top_n.le= n()); + println!("{}", "=3D".repeat(80)); + + for (i, result) in top_n.iter().enumerate() { + let r =3D &result.resolution; + println!("\n{}. [similarity: {:.4}]", i + 1, result.similarity); + println!(" Commit: {}", r.commit_hash); + println!(" Summary: {}", r.commit_summary); + println!(" Author: {}", r.author); + println!(" Date: {}", r.commit_date); + println!(" Files ({}):", r.files.len()); + for file in &r.files { + println!(" - {} ({})", file.file_path, file.subsystem); + } + + // Show the resolution diffs for each file + println!("\n Resolution details:"); + for file in &r.files { + println!(" --- {} ---", file.file_path); + if !file.resolution_diff.is_empty() { + for hunk in &file.resolution_diff { + // Indent and print the diff + for line in hunk.content.lines() { + println!(" {}", line); + } + } + } else { + println!(" (no diff hunks recorded)"); + } + } + println!(); + } + + // Provide git show command for easy access + if let Some(top) =3D top_n.first() { + println!("{}", "-".repeat(80)); + println!("To see the full commit:"); + println!(" git show {}", top.resolution.commit_hash); + } + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 match cli.command { Commands::Learn { range } =3D> learn(range.as_deref()), Commands::Vectorize { batch_size } =3D> vectorize(batch_size), + Commands::Find { n } =3D> find(n), } } =20 @@ -758,6 +996,24 @@ fn test_vectorize_command_with_batch_size() { } } =20 + #[test] + fn test_find_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "find"]).unwrap(); + match cli.command { + Commands::Find { n } =3D> assert_eq!(n, 1), + _ =3D> panic!("Expected Find command"), + } + } + + #[test] + fn test_find_command_with_n() { + let cli =3D Cli::try_parse_from(["llminus", "find", "5"]).unwrap(); + match cli.command { + Commands::Find { n } =3D> assert_eq!(n, 5), + _ =3D> panic!("Expected Find command"), + } + } + #[test] fn test_cosine_similarity() { // Identical vectors should have similarity 1.0 @@ -952,4 +1208,75 @@ fn test_get_merge_commits() { let merges =3D get_merge_commits(None).unwrap(); assert_eq!(merges.len(), 1); } + + #[test] + fn test_parse_conflict_markers() { + let dir =3D TempDir::new().unwrap(); + let conflict_file =3D dir.path().join("conflict.c"); + let content =3D r#"int main() { +<<<<<<< HEAD + printf("ours"); +=3D=3D=3D=3D=3D=3D=3D + printf("theirs"); +>>>>>>> feature + return 0; +} +"#; + fs::write(&conflict_file, content).unwrap(); + + let conflicts =3D parse_conflict_file(conflict_file.to_str().unwra= p()).unwrap(); + assert_eq!(conflicts.len(), 1); + assert!(conflicts[0].ours_content.contains("ours")); + assert!(conflicts[0].theirs_content.contains("theirs")); + assert!(conflicts[0].base_content.is_none()); + } + + #[test] + fn test_parse_conflict_markers_diff3() { + let dir =3D TempDir::new().unwrap(); + let conflict_file =3D dir.path().join("conflict.c"); + // diff3 style with base content + let content =3D r#"int main() { +<<<<<<< HEAD + printf("ours"); +||||||| base + printf("base"); +=3D=3D=3D=3D=3D=3D=3D + printf("theirs"); +>>>>>>> feature + return 0; +} +"#; + fs::write(&conflict_file, content).unwrap(); + + let conflicts =3D parse_conflict_file(conflict_file.to_str().unwra= p()).unwrap(); + assert_eq!(conflicts.len(), 1); + assert!(conflicts[0].ours_content.contains("ours")); + assert!(conflicts[0].theirs_content.contains("theirs")); + assert!(conflicts[0].base_content.as_ref().unwrap().contains("base= ")); + } + + #[test] + fn test_parse_multiple_conflicts() { + let dir =3D TempDir::new().unwrap(); + let conflict_file =3D dir.path().join("conflict.c"); + let content =3D r#"<<<<<<< HEAD +first ours +=3D=3D=3D=3D=3D=3D=3D +first theirs +>>>>>>> feature +middle +<<<<<<< HEAD +second ours +=3D=3D=3D=3D=3D=3D=3D +second theirs +>>>>>>> feature +"#; + fs::write(&conflict_file, content).unwrap(); + + let conflicts =3D parse_conflict_file(conflict_file.to_str().unwra= p()).unwrap(); + assert_eq!(conflicts.len(), 2); + assert!(conflicts[0].ours_content.contains("first ours")); + assert!(conflicts[1].ours_content.contains("second ours")); + } } --=20 2.51.0 From nobody Sun Feb 8 06:54:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C6B6D28DB52 for ; Sun, 11 Jan 2026 21:29:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166962; cv=none; b=GSkHr51Khsk6+D1mUDO6GO8gaNLOtjkuwoYu8QgSCygHXlZPkJMg9RS/23HctVk0pQ/AjzcT8D1QmB0S7UJpXBLhYcvr00hr/aO7GkAWzRc9InecrtHh3dMCnrcEyLyrvWYezI/QUk+Qky6Y72Dzp+q2tZ6ifXtP6JSB9qQCmNc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166962; c=relaxed/simple; bh=AsfelDmn7x1VH+bQd85aXGTCvHa6ix78xjw+eaWqLoc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=dBVYY8xOXSZRADYHbP+jKc/stJQNjexppEDxKnElRWoeURXmGojxILrc+L1d4FHghjqVmNkOmR/peFQI/gjGiFxlhnYIGC7pAdpMDpQKC8/wubkp8WBtrxwA5Htpl4uN8gLxy/jI4jqmiQBP5Io2JFyY0PDMxjEN8i11FxrIg0Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=eku6zbIz; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="eku6zbIz" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0C6B0C4AF09; Sun, 11 Jan 2026 21:29:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166961; bh=AsfelDmn7x1VH+bQd85aXGTCvHa6ix78xjw+eaWqLoc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=eku6zbIzpCNmLRmtDcqnl7walvN5CAH6Fi75CjgSecMSQyIbXUEXv30nW1YqxVtIB RCXYUGnz439Nv6Et7e+ncbS+wW7AGp1wJOzpm6JWbhPBRwdDnY8KjxMW6uT+oqu0PP jeB6hHqNj0Z19WniI84C42XrE7xYeOix5xjWo7v7YwSjK244LhHFjOTqGnByHdRDf5 beSf5XZQ4A4hzLxh+IDSPAZUJf/GyLiz2DGSPBjF3Cd2zDrzlYG2iBMDIlMQ0eEMO3 YCNyReuzrf1KfILJh1YoqDt+xh2DX/6ZYU2QQYKKExuy4jaqov5tvDhYrqa2artS6U VSdlUTkSN21OQ== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 4/7] LLMinus: Add resolve command for LLM-assisted conflict resolution Date: Sun, 11 Jan 2026 16:29:12 -0500 Message-ID: <20260111212915.195056-5-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the resolve command that invokes an external LLM to assist with merge conflicts. It parses .git/MERGE_MSG for merge context, gathers conflict information from all files with markers, and finds similar historical resolutions from the database when available. The generated prompt includes current conflict content, similar historical resolutions with diffs, and instructions emphasizing understanding before acting. The LLM is directed to search lore.kernel.org for maintainer guidance, trace git history to understand both sides, and flag uncertainty rather than guessing. Output streams directly to the terminal. Signed-off-by: Sasha Levin --- tools/llminus/src/main.rs | 476 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 476 insertions(+) diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index df7262bd6a91..0388a881e413 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -41,6 +41,11 @@ enum Commands { #[arg(default_value =3D "1")] n: usize, }, + /// Resolve current conflicts using an LLM + Resolve { + /// Command to invoke. The prompt will be passed via stdin. + command: String, + }, } =20 /// A single diff hunk representing a change region @@ -938,6 +943,422 @@ fn find(n: usize) -> Result<()> { Ok(()) } =20 +/// Context about the current merge operation +#[derive(Debug, Default)] +struct MergeContext { + /// The branch/tag/ref being merged (from MERGE_HEAD or MERGE_MSG) + merge_source: Option, + /// The target branch (HEAD) + head_branch: Option, + /// The merge message (from .git/MERGE_MSG) + merge_message: Option, +} + +/// Extract context about the current merge operation +fn get_merge_context() -> MergeContext { + let mut ctx =3D MergeContext { + head_branch: git_allow_fail(&["rev-parse", "--abbrev-ref", "HEAD"]) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty() && s !=3D "HEAD"), + ..Default::default() + }; + + // Try to read MERGE_MSG for merge context + if let Ok(merge_msg) =3D std::fs::read_to_string(".git/MERGE_MSG") { + ctx.merge_message =3D Some(merge_msg.clone()); + + // Parse merge source from MERGE_MSG + // Common formats: + // "Merge branch 'feature-branch'" + // "Merge tag 'v6.1'" + // "Merge remote-tracking branch 'origin/main'" + // "Merge commit 'abc123'" + let first_line =3D merge_msg.lines().next().unwrap_or(""); + if let Some(source) =3D parse_merge_source(first_line) { + ctx.merge_source =3D Some(source); + } + } + + // If no merge source found from MERGE_MSG, try to describe MERGE_HEAD + if ctx.merge_source.is_none() { + // Try to get a tag name for MERGE_HEAD + if let Some(tag) =3D git_allow_fail(&["describe", "--tags", "--exa= ct-match", "MERGE_HEAD"]) { + ctx.merge_source =3D Some(tag.trim().to_string()); + } else if let Some(branch) =3D git_allow_fail(&["name-rev", "--nam= e-only", "MERGE_HEAD"]) { + let branch =3D branch.trim(); + if !branch.is_empty() && branch !=3D "undefined" { + ctx.merge_source =3D Some(branch.to_string()); + } + } + } + + ctx +} + +/// Parse merge source from a merge message first line +fn parse_merge_source(line: &str) -> Option { + // "Merge branch 'feature'" -> "feature" + // "Merge tag 'v6.1'" -> "v6.1" + // "Merge remote-tracking branch 'origin/main'" -> "origin/main" + // "Merge commit 'abc123'" -> "abc123" + + let line =3D line.trim(); + + // Look for quoted source + if let Some(start) =3D line.find('\'') { + if let Some(end) =3D line[start + 1..].find('\'') { + return Some(line[start + 1..start + 1 + end].to_string()); + } + } + + // Look for "Merge X into Y" pattern without quotes + if let Some(rest) =3D line.strip_prefix("Merge ") { + // Skip "branch ", "tag ", "commit ", "remote-tracking branch " + let rest =3D rest + .strip_prefix("remote-tracking branch ") + .or_else(|| rest.strip_prefix("branch ")) + .or_else(|| rest.strip_prefix("tag ")) + .or_else(|| rest.strip_prefix("commit ")) + .unwrap_or(rest); + + // Take until " into " or end of line + if let Some(into_pos) =3D rest.find(" into ") { + return Some(rest[..into_pos].trim().to_string()); + } + let word =3D rest.split_whitespace().next()?; + if !word.is_empty() { + return Some(word.to_string()); + } + } + + None +} + +/// Get current conflicts from the working directory +fn get_current_conflicts() -> Result> { + check_repo()?; + + // Find current conflicts + let conflict_paths =3D get_conflicted_files()?; + if conflict_paths.is_empty() { + bail!("No conflicts detected. Run this command when you have activ= e merge conflicts."); + } + + // Parse all conflict regions + let mut all_conflicts =3D Vec::new(); + for path in &conflict_paths { + if let Ok(conflicts) =3D parse_conflict_file(path) { + all_conflicts.extend(conflicts); + } + } + + if all_conflicts.is_empty() { + bail!("Could not parse any conflict markers from the conflicted fi= les."); + } + + Ok(all_conflicts) +} + +/// Try to find similar resolutions, returns empty vec if no database or e= mbeddings +fn try_find_similar_resolutions(n: usize, conflicts: &[ConflictFile]) -> V= ec { + let store_path =3D Path::new(STORE_PATH); + if !store_path.exists() { + return Vec::new(); + } + + let store =3D match ResolutionStore::load(store_path) { + Ok(s) =3D> s, + Err(_) =3D> return Vec::new(), + }; + + let with_embeddings: Vec<_> =3D store.resolutions.iter() + .filter(|r| r.embedding.is_some()) + .collect(); + + if with_embeddings.is_empty() { + return Vec::new(); + } + + // Initialize embedding model + let mut model =3D match init_embedding_model() { + Ok(m) =3D> m, + Err(_) =3D> return Vec::new(), + }; + + // Generate embedding for current conflicts + let conflict_text: String =3D conflicts.iter() + .map(|c| c.to_embedding_text()) + .collect::>() + .join("\n---\n\n"); + + let query_embeddings =3D match model.embed(vec![conflict_text], None) { + Ok(e) =3D> e, + Err(_) =3D> return Vec::new(), + }; + let query_embedding =3D &query_embeddings[0]; + + // Compute similarities and take top N + let mut similarities: Vec<_> =3D with_embeddings.iter() + .map(|r| { + let sim =3D cosine_similarity(query_embedding, r.embedding.as_= ref().unwrap()); + (r, sim) + }) + .collect(); + + similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::= Ordering::Equal)); + + similarities.into_iter() + .take(n) + .map(|(r, sim)| SimilarResolution { + resolution: (*r).clone(), + similarity: sim, + }) + .collect() +} + +/// Build the LLM prompt for conflict resolution +fn build_resolve_prompt( + conflicts: &[ConflictFile], + similar: &[SimilarResolution], + merge_ctx: &MergeContext, +) -> String { + let mut prompt =3D String::new(); + + // Header with high-stakes framing + prompt.push_str("# Linux Kernel Merge Conflict Resolution\n\n"); + prompt.push_str("You are acting as an experienced kernel maintainer re= solving a merge conflict.\n\n"); + prompt.push_str("**Important:** Incorrect merge resolutions have histo= rically introduced subtle bugs "); + prompt.push_str("that affected millions of users and took months to di= agnose. A resolution that "); + prompt.push_str("compiles but has semantic errors is worse than no res= olution at all.\n\n"); + prompt.push_str("Take the time to fully understand both sides of the c= onflict before attempting "); + prompt.push_str("any resolution. If after investigation you're not con= fident, say so - it's "); + prompt.push_str("better to escalate to a human than to introduce a sub= tle bug.\n\n"); + + // Merge context + prompt.push_str("## Merge Context\n\n"); + if let Some(ref source) =3D merge_ctx.merge_source { + prompt.push_str(&format!("**Merging:** `{}`\n", source)); + } + if let Some(ref head) =3D merge_ctx.head_branch { + prompt.push_str(&format!("**Into:** `{}`\n", head)); + } + if let Some(ref msg) =3D merge_ctx.merge_message { + let first_line =3D msg.lines().next().unwrap_or(""); + prompt.push_str(&format!("**Merge message:** {}\n", first_line)); + } + prompt.push('\n'); + + // Current conflicts + prompt.push_str("## Current Conflicts\n\n"); + + for conflict in conflicts { + prompt.push_str(&format!("### File: {}\n\n", conflict.path)); + prompt.push_str("**Our version (HEAD):**\n```\n"); + prompt.push_str(&conflict.ours_content); + prompt.push_str("\n```\n\n"); + prompt.push_str("**Their version (being merged):**\n```\n"); + prompt.push_str(&conflict.theirs_content); + prompt.push_str("\n```\n\n"); + if let Some(ref base) =3D conflict.base_content { + prompt.push_str("**Base version (common ancestor):**\n```\n"); + prompt.push_str(base); + prompt.push_str("\n```\n\n"); + } + } + + // Similar historical resolutions (only if available) + if !similar.is_empty() { + prompt.push_str("## Similar Historical Resolutions\n\n"); + prompt.push_str("These conflicts were previously resolved in the L= inux kernel. Use `git show ` "); + prompt.push_str("to examine the full commit message and context - = maintainers often explain "); + prompt.push_str("their resolution rationale there.\n\n"); + + for (i, result) in similar.iter().enumerate() { + let r =3D &result.resolution; + prompt.push_str(&format!("### Historical Resolution {} (simila= rity: {:.1}%)\n\n", i + 1, result.similarity * 100.0)); + prompt.push_str(&format!("- **Commit:** `{}`\n", r.commit_hash= )); + prompt.push_str(&format!("- **Summary:** {}\n", r.commit_summa= ry)); + prompt.push_str(&format!("- **Author:** {}\n", r.author)); + prompt.push_str(&format!("- **Date:** {}\n", r.commit_date)); + prompt.push_str(&format!("- **Files:** {}\n\n", r.files.iter()= .map(|f| f.file_path.as_str()).collect::>().join(", "))); + + for file in &r.files { + prompt.push_str(&format!("#### {}\n\n", file.file_path)); + + if !file.ours_diff.is_empty() { + prompt.push_str("**Ours changed:**\n```diff\n"); + for hunk in &file.ours_diff { + prompt.push_str(&hunk.content); + prompt.push('\n'); + } + prompt.push_str("```\n\n"); + } + + if !file.theirs_diff.is_empty() { + prompt.push_str("**Theirs changed:**\n```diff\n"); + for hunk in &file.theirs_diff { + prompt.push_str(&hunk.content); + prompt.push('\n'); + } + prompt.push_str("```\n\n"); + } + + if !file.resolution_diff.is_empty() { + prompt.push_str("**Final resolution:**\n```diff\n"); + for hunk in &file.resolution_diff { + prompt.push_str(&hunk.content); + prompt.push('\n'); + } + prompt.push_str("```\n\n"); + } + } + } + } + + // Investigation requirement + prompt.push_str("## Investigation Required\n\n"); + prompt.push_str("Before attempting any resolution, you must conduct th= orough research. "); + prompt.push_str("Rushing to resolve without understanding is how subtl= e bugs get introduced. "); + prompt.push_str("Work through each phase below IN ORDER and document y= our findings.\n\n"); + + // Phase 1: Search lore.kernel.org + prompt.push_str("### Phase 1: Search lore.kernel.org for Maintainer Gu= idance (DO THIS FIRST)\n\n"); + prompt.push_str("**CRITICAL:** Before doing ANY other research, search= lore.kernel.org for existing guidance.\n"); + prompt.push_str("Maintainers often post merge resolution instructions = when they know conflicts will occur.\n\n"); + + if let Some(ref source) =3D merge_ctx.merge_source { + prompt.push_str(&format!("1. **Search for the merge itself:** `{}`= \n", source)); + prompt.push_str(&format!(" - URL: `https://lore.kernel.org/all/?= q=3D{}`\n", source.replace('/', "%2F"))); + } + prompt.push_str("2. **Search for conflict discussions:**\n"); + prompt.push_str(" - `\"merge conflict\"` + subsystem name\n"); + prompt.push_str(" - `\"conflicts with\"` + branch/tag name\n\n"); + + // Phase 2: Context + prompt.push_str("### Phase 2: Understand the Context\n\n"); + prompt.push_str("- **What subsystem is this?** Read the file and nearb= y files to understand its purpose.\n"); + prompt.push_str("- **Who maintains it?** Check `git log --oneline -20`= for recent authors.\n"); + prompt.push_str("- **What's the file's role?** Is it a driver, core su= bsystem, header, config?\n\n"); + + // Phase 3: Trace history + prompt.push_str("### Phase 3: Trace Each Side's History\n\n"); + prompt.push_str("**For 'ours' (HEAD):**\n"); + prompt.push_str("- Run `git log --oneline HEAD -- ` to see recen= t changes\n"); + prompt.push_str("- Find the commit that introduced our version of the = conflicted code\n"); + prompt.push_str("- Run `git show ` to read the full commit mes= sage\n\n"); + prompt.push_str("**For 'theirs' (MERGE_HEAD):**\n"); + prompt.push_str("- Run `git log --oneline MERGE_HEAD -- ` to see= their changes\n"); + prompt.push_str("- Find the commit that introduced their version\n"); + prompt.push_str("- Run `git show ` to read the full commit mes= sage\n\n"); + + // Resolution + prompt.push_str("## Resolution\n\n"); + prompt.push_str("Once you understand the conflict:\n\n"); + prompt.push_str("1. Edit the conflicted files to produce the correct m= erged result\n"); + prompt.push_str("2. Remove all conflict markers (`<<<<<<<`, `=3D=3D=3D= =3D=3D=3D=3D`, `>>>>>>>`)\n"); + prompt.push_str("3. Stage the resolved files with `git add`\n"); + prompt.push_str("4. Commit with a detailed message explaining your ana= lysis and resolution\n\n"); + + // If uncertain + prompt.push_str("## If Uncertain\n\n"); + prompt.push_str("If after investigation you're still uncertain about t= he correct resolution:\n\n"); + prompt.push_str("- Explain what you've learned and what remains unclea= r\n"); + prompt.push_str("- Describe the possible resolutions you see and their= tradeoffs\n"); + prompt.push_str("- Recommend whether a human maintainer should review\= n\n"); + prompt.push_str("It's better to flag uncertainty than to silently intr= oduce a bug.\n\n"); + + // Tools available + prompt.push_str("## Tools Available\n\n"); + prompt.push_str("You can use these to investigate:\n\n"); + prompt.push_str("```bash\n"); + if !similar.is_empty() { + prompt.push_str("# Examine historical resolution commits\n"); + for result in similar { + prompt.push_str(&format!("git show {}\n", result.resolution.co= mmit_hash)); + } + prompt.push('\n'); + } + prompt.push_str("# Understand merge parents\n"); + prompt.push_str("git show ^1 # ours\n"); + prompt.push_str("git show ^2 # theirs\n"); + prompt.push_str("```\n"); + + prompt +} + +fn resolve(command: &str) -> Result<()> { + use std::io::Write; + use std::process::Stdio; + + // Get merge context (what branch/tag is being merged) + let merge_ctx =3D get_merge_context(); + if let Some(ref source) =3D merge_ctx.merge_source { + println!("Merging: {}", source); + } + if let Some(ref head) =3D merge_ctx.head_branch { + println!("Into: {}", head); + } + + // Get current conflicts first + let conflicts =3D get_current_conflicts()?; + + println!("Found {} conflict(s)", conflicts.len()); + + // Try to find similar historical resolutions (gracefully handles miss= ing database) + println!("Looking for similar historical conflicts..."); + let similar =3D try_find_similar_resolutions(3, &conflicts); + + if similar.is_empty() { + println!("No historical resolution database found (run 'llminus le= arn' and 'llminus vectorize' to build one)"); + println!("Proceeding without historical examples..."); + } else { + println!("Found {} similar historical resolutions", similar.len()); + } + + // Build the prompt + println!("Building resolution prompt..."); + let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx); + + println!("Prompt size: {} bytes", prompt.len()); + println!("\nInvoking: {}", command); + println!("{}", "=3D".repeat(80)); + + // Parse command (handle arguments) + let parts: Vec<&str> =3D command.split_whitespace().collect(); + if parts.is_empty() { + bail!("Empty command specified"); + } + + let cmd =3D parts[0]; + let args =3D &parts[1..]; + + // Spawn the command + let mut child =3D Command::new(cmd) + .args(args) + .stdin(Stdio::piped()) + .spawn() + .with_context(|| format!("Failed to spawn command: {}", command))?; + + // Write prompt to stdin + if let Some(mut stdin) =3D child.stdin.take() { + stdin.write_all(prompt.as_bytes()) + .context("Failed to write prompt to command stdin")?; + } + + // Wait for completion + let status =3D child.wait().context("Failed to wait for command")?; + + println!("{}", "=3D".repeat(80)); + + if status.success() { + println!("\nCommand completed successfully."); + } else { + eprintln!("\nCommand exited with status: {}", status); + } + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 @@ -945,6 +1366,7 @@ fn main() -> Result<()> { Commands::Learn { range } =3D> learn(range.as_deref()), Commands::Vectorize { batch_size } =3D> vectorize(batch_size), Commands::Find { n } =3D> find(n), + Commands::Resolve { command } =3D> resolve(&command), } } =20 @@ -1014,6 +1436,60 @@ fn test_find_command_with_n() { } } =20 + #[test] + fn test_resolve_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm"]).= unwrap(); + match cli.command { + Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= "), + _ =3D> panic!("Expected Resolve command"), + } + } + + #[test] + fn test_resolve_command_with_args() { + let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm --m= odel fancy"]).unwrap(); + match cli.command { + Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= --model fancy"), + _ =3D> panic!("Expected Resolve command"), + } + } + + #[test] + fn test_parse_merge_source() { + // Standard branch merge + assert_eq!( + parse_merge_source("Merge branch 'feature-branch'"), + Some("feature-branch".to_string()) + ); + + // Tag merge + assert_eq!( + parse_merge_source("Merge tag 'v6.1'"), + Some("v6.1".to_string()) + ); + + // Remote tracking branch + assert_eq!( + parse_merge_source("Merge remote-tracking branch 'origin/main'= "), + Some("origin/main".to_string()) + ); + + // Commit merge + assert_eq!( + parse_merge_source("Merge commit 'abc123def'"), + Some("abc123def".to_string()) + ); + + // Branch with "into" target + assert_eq!( + parse_merge_source("Merge branch 'feature' into master"), + Some("feature".to_string()) + ); + + // Non-merge line + assert_eq!(parse_merge_source("Fix bug in foo"), None); + } + #[test] fn test_cosine_similarity() { // Identical vectors should have similarity 1.0 --=20 2.51.0 From nobody Sun Feb 8 06:54:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A00852877ED for ; Sun, 11 Jan 2026 21:29:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166962; cv=none; b=um9XRTyNI4SksKDCsbHcHz4u1KgBVtCzzszoBzRXDEOsSu7L9JMe8fHt8Z/goVaXDPmX4VR4m+3mkM4ljuxLvobhJPy+bSOhWob0+vANa831QU5c2uzTxLFoxyiKyauM5z+L7HhVgxlu0hGUk5KOpd/dzuQ42EXVqDAE33MM5/Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166962; c=relaxed/simple; bh=cK916ghopKjLqFP5q+UhcVu38pQERQ3V5bXbzpniRlU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=G8U7Fi58rp14h0nHULC4Jq4p/iYSphhQD0X6sWJx8PEHOqZf/40jSTOM/lABkz0Rk6NdupYhx5XpoyMTLqNVy6TxdTuryGg8lz08b+1BVpevpwvpu6MYnX8s2ecF4uNG16aiZrUPBGiPKqPj9VrmsXv5VPAHyG9Ber6qgRt++8o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=DwH62qu9; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="DwH62qu9" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D63F0C4CEF7; Sun, 11 Jan 2026 21:29:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166962; bh=cK916ghopKjLqFP5q+UhcVu38pQERQ3V5bXbzpniRlU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=DwH62qu90JpoUKYDfJL+8jy0zMrUt+k8bUq7msw95lonMb+YMO4bYlGJh2bK3KU7d Dk0jOkiYOI/Qnj8XHtA2K3wau6PeunmxPPRYRY34EzPJyHzncTLEo81fGSS+MYs4Du YUOx9wIVH87mJKKgV9YuxcN/tiim0pmhHUQuWzfwP/030Kxl9W6OHvGfeANoybIMqo nHcmbIhbtJ2ObKETEB2tIu+qsYxC+MI9uNDvRaqh1uPYEgklEIpyhbZ1jTjj8CzAB+ LC0/x6n/VeW7O4NxcyZvwc8F7cOzVsuqBVFP5QlwFJLsgsQBFsL4HyaldxBwhdcBBi 4SNxNytMf7WQw== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 5/7] LLMinus: Add pull command for LLM-assisted kernel pull request merging Date: Sun, 11 Jan 2026 16:29:13 -0500 Message-ID: <20260111212915.195056-6-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the pull command that automates merging kernel pull requests from lore.kernel.org with LLM-assisted conflict resolution. It fetches the pull request email, parses it to extract the repository URL and ref, then executes git pull. If conflicts occur, the LLM receives the complete pull request email, current conflict markers, and similar historical resolutions. The LLM resolves conflicts and writes reasoning to .git/LLMINUS_RESOLUTION. The prompt instructs the LLM to critically evaluate maintainer suggestions and look for opportunities to produce cleaner code. Adds ureq for HTTP fetching. Signed-off-by: Sasha Levin --- tools/llminus/Cargo.toml | 1 + tools/llminus/src/main.rs | 660 +++++++++++++++++++++++++++++++++++++- 2 files changed, 647 insertions(+), 14 deletions(-) diff --git a/tools/llminus/Cargo.toml b/tools/llminus/Cargo.toml index 86740174de59..66c6eddd0f64 100644 --- a/tools/llminus/Cargo.toml +++ b/tools/llminus/Cargo.toml @@ -14,6 +14,7 @@ fastembed =3D "5" rayon =3D "1" serde =3D { version =3D "1", features =3D ["derive"] } serde_json =3D "1" +ureq =3D "3" =20 [dev-dependencies] tempfile =3D "3" diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index 0388a881e413..ff668244688f 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -46,6 +46,14 @@ enum Commands { /// Command to invoke. The prompt will be passed via stdin. command: String, }, + /// Pull a kernel patch/pull request from lore.kernel.org and merge it + Pull { + /// Message ID from lore.kernel.org (e.g., "98b74397-05bc-dbee-cab= 4-3f40d643eaac@kernel.org") + message_id: String, + /// Command to invoke for LLM assistance + #[arg(short, long, default_value =3D "llm")] + command: String, + }, } =20 /// A single diff hunk representing a change region @@ -1034,6 +1042,299 @@ fn parse_merge_source(line: &str) -> Option= { None } =20 +/// Information parsed from a lore.kernel.org pull request email +#[derive(Debug, Default)] +#[allow(dead_code)] +struct PullRequest { + /// Message ID + message_id: String, + /// Subject line of the email + subject: String, + /// Author name and email + from: String, + /// Date of the email + date: String, + /// Git repository URL to pull from + git_url: String, + /// Git ref (tag or branch) to pull + git_ref: String, + /// The full raw email body (LLM extracts summary and conflict instruc= tions from this) + body: String, +} + +/// Fetch raw email from lore.kernel.org +fn fetch_lore_email(message_id: &str) -> Result { + // Clean up message ID (remove < > if present) + let clean_id =3D message_id + .trim_start_matches('<') + .trim_end_matches('>') + .trim(); + + let url =3D format!("https://lore.kernel.org/all/{}/raw", clean_id); + println!("Fetching: {}", url); + + let mut response =3D ureq::get(&url) + .call() + .with_context(|| format!("Failed to fetch {}", url))?; + + response.body_mut() + .read_to_string() + .context("Failed to read response body") +} + +/// Parse email headers from raw email text +fn parse_email_headers(raw: &str) -> (String, String, String, String, &str= ) { + let mut from =3D String::new(); + let mut subject =3D String::new(); + let mut date =3D String::new(); + let mut message_id =3D String::new(); + + // Find the blank line separating headers from body + let (headers_section, body) =3D raw.split_once("\n\n") + .unwrap_or((raw, "")); + + // Parse headers (handle multi-line headers) + let mut current_header =3D String::new(); + let mut current_value =3D String::new(); + + for line in headers_section.lines() { + if line.starts_with(' ') || line.starts_with('\t') { + // Continuation of previous header + current_value.push(' '); + current_value.push_str(line.trim()); + } else if let Some((name, value)) =3D line.split_once(':') { + // New header - save previous if any + if !current_header.is_empty() { + match current_header.to_lowercase().as_str() { + "from" =3D> from =3D current_value.clone(), + "subject" =3D> subject =3D current_value.clone(), + "date" =3D> date =3D current_value.clone(), + "message-id" =3D> message_id =3D current_value.clone(), + _ =3D> {} + } + } + current_header =3D name.to_string(); + current_value =3D value.trim().to_string(); + } + } + + // Don't forget last header + if !current_header.is_empty() { + match current_header.to_lowercase().as_str() { + "from" =3D> from =3D current_value, + "subject" =3D> subject =3D current_value, + "date" =3D> date =3D current_value, + "message-id" =3D> message_id =3D current_value, + _ =3D> {} + } + } + + (from, subject, date, message_id, body) +} + +/// Extract git pull URL and ref from email body +fn extract_git_info(body: &str) -> Option<(String, String)> { + // Look for patterns like: + // "git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux tags/ris= cv-for-linus-6.19-mw2" + // "https://git.kernel.org/pub/scm/linux/kernel/git/foo/bar.git branch= -name" + + for line in body.lines() { + let line =3D line.trim(); + + // Skip empty lines and common non-URL prefixes + if line.is_empty() { + continue; + } + + // Check for git:// or https:// URLs + let url_start =3D if let Some(pos) =3D line.find("git://") { + pos + } else if let Some(pos) =3D line.find("https://git.") { + pos + } else { + continue; + }; + + let url_part =3D &line[url_start..]; + + // Split into URL and ref + let parts: Vec<&str> =3D url_part.split_whitespace().collect(); + if parts.len() >=3D 2 { + let url =3D parts[0].to_string(); + let git_ref =3D parts[1].to_string(); + + // Validate it looks like a kernel git URL + if url.contains("kernel.org") || url.contains("git.") { + return Some((url, git_ref)); + } + } + } + + None +} + +/// Use LLM to extract the maintainer's summary from the email body +/// Returns None if extraction fails (caller can fall back to other method= s) +fn extract_summary_with_llm(body: &str, command: &str) -> Option { + use std::io::Write; + use std::process::Stdio; + + let prompt =3D format!(r#"Extract ONLY the technical summary from this= kernel pull request email. +The summary describes what changes are included (usually as bullet points). +Do NOT include: +- Personal messages to Linus +- Git URLs or repository information +- Merge/conflict resolution instructions +- Diffstat or file change listings +- Sign-offs or signatures + +Output ONLY the summary text, nothing else. No preamble, no explanation. + +Email body: +{} +"#, body); + + let parts: Vec<&str> =3D command.split_whitespace().collect(); + if parts.is_empty() { + return None; + } + + println!("Extracting summary from pull request..."); + + let mut child =3D match Command::new(parts[0]) + .args(&parts[1..]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() { + Ok(c) =3D> c, + Err(_) =3D> return None, + }; + + if let Some(mut stdin) =3D child.stdin.take() { + if stdin.write_all(prompt.as_bytes()).is_err() { + return None; + } + } + + let output =3D match child.wait_with_output() { + Ok(o) =3D> o, + Err(_) =3D> return None, + }; + + if !output.status.success() { + return None; + } + + let summary =3D String::from_utf8_lossy(&output.stdout).trim().to_stri= ng(); + if summary.is_empty() { + None + } else { + Some(summary) + } +} + +/// Parse a pull request email from lore.kernel.org +fn parse_pull_request(message_id: &str, raw: &str) -> Result { + let (from, subject, date, parsed_id, body) =3D parse_email_headers(raw= ); + + let (git_url, git_ref) =3D extract_git_info(body) + .ok_or_else(|| anyhow::anyhow!("Could not find git repository URL = in email"))?; + + Ok(PullRequest { + message_id: if parsed_id.is_empty() { message_id.to_string() } els= e { parsed_id }, + subject, + from, + date, + git_url, + git_ref, + body: body.to_string(), + }) +} + +/// Execute git pull and return whether there are conflicts +fn git_pull(url: &str, git_ref: &str) -> Result { + println!("Executing: git pull {} {}", url, git_ref); + + let output =3D Command::new("git") + .args(["pull", url, git_ref]) + .output() + .context("Failed to run git pull")?; + + let stdout =3D String::from_utf8_lossy(&output.stdout); + let stderr =3D String::from_utf8_lossy(&output.stderr); + + if !stdout.is_empty() { + println!("{}", stdout); + } + if !stderr.is_empty() { + eprintln!("{}", stderr); + } + + // Check if there are conflicts + if output.status.success() { + return Ok(false); // No conflicts + } + + // Check for merge conflicts specifically + let conflict_markers =3D ["CONFLICT", "Automatic merge failed", "fix c= onflicts"]; + let output_text =3D format!("{}{}", stdout, stderr); + + for marker in conflict_markers { + if output_text.contains(marker) { + return Ok(true); // Has conflicts + } + } + + // Some other error + bail!("git pull failed: {}", stderr); +} + +/// Check if there are unmerged files (active merge conflicts) +fn has_merge_conflicts() -> bool { + get_conflicted_files() + .map(|files| !files.is_empty()) + .unwrap_or(false) +} + +/// Build a merge commit message using the pull request information, summa= ry, and resolution +fn build_merge_commit_message(pull_req: &PullRequest, summary: &str, resol= ution: &str) -> String { + let mut msg =3D String::new(); + + // Use the subject line as the merge message header + if !pull_req.subject.is_empty() { + // Clean up subject - remove [GIT PULL] prefix if present + let subject =3D pull_req.subject + .replace("[GIT PULL]", "") + .replace("[git pull]", "") + .trim() + .to_string(); + msg.push_str(&format!("Merge {} {}\n", pull_req.git_ref, &subject)= ); + } else { + msg.push_str(&format!("Merge {}\n", pull_req.git_ref)); + } + msg.push('\n'); + + // Add maintainer's summary (extracted by LLM) + if !summary.is_empty() { + msg.push_str(summary); + msg.push_str("\n\n"); + } + + // Add resolution explanation (written by LLM during conflict resoluti= on) + if !resolution.is_empty() { + msg.push_str("Merge conflict resolution:\n\n"); + msg.push_str(resolution); + msg.push_str("\n\n"); + } + + // Add link to lore + msg.push_str(&format!("Link: https://lore.kernel.org/all/{}/\n", + pull_req.message_id.trim_start_matches('<').trim_end_matches('>'))= ); + + msg +} + /// Get current conflicts from the working directory fn get_current_conflicts() -> Result> { check_repo()?; @@ -1121,18 +1422,53 @@ fn build_resolve_prompt( conflicts: &[ConflictFile], similar: &[SimilarResolution], merge_ctx: &MergeContext, + pull_req: Option<&PullRequest>, ) -> String { let mut prompt =3D String::new(); =20 // Header with high-stakes framing - prompt.push_str("# Linux Kernel Merge Conflict Resolution\n\n"); - prompt.push_str("You are acting as an experienced kernel maintainer re= solving a merge conflict.\n\n"); + if pull_req.is_some() { + prompt.push_str("# Linux Kernel Pull Request Merge with Conflict R= esolution\n\n"); + prompt.push_str("You are acting as an experienced kernel maintaine= r resolving conflicts "); + prompt.push_str("from a pull request submission on lore.kernel.org= .\n\n"); + } else { + prompt.push_str("# Linux Kernel Merge Conflict Resolution\n\n"); + prompt.push_str("You are acting as an experienced kernel maintaine= r resolving a merge conflict.\n\n"); + } prompt.push_str("**Important:** Incorrect merge resolutions have histo= rically introduced subtle bugs "); prompt.push_str("that affected millions of users and took months to di= agnose. A resolution that "); prompt.push_str("compiles but has semantic errors is worse than no res= olution at all.\n\n"); - prompt.push_str("Take the time to fully understand both sides of the c= onflict before attempting "); - prompt.push_str("any resolution. If after investigation you're not con= fident, say so - it's "); - prompt.push_str("better to escalate to a human than to introduce a sub= tle bug.\n\n"); + + // Pull request specific: critical evaluation note + if pull_req.is_some() { + prompt.push_str("**CRITICAL:** You have access to the pull request= email which may contain "); + prompt.push_str("conflict resolution instructions from the maintai= ner. Use these as guidance, "); + prompt.push_str("but ALWAYS evaluate them critically - there may b= e better, cleaner, or more "); + prompt.push_str("efficient solutions than what was suggested.\n\n"= ); + } else { + prompt.push_str("Take the time to fully understand both sides of t= he conflict before attempting "); + prompt.push_str("any resolution. If after investigation you're not= confident, say so - it's "); + prompt.push_str("better to escalate to a human than to introduce a= subtle bug.\n\n"); + } + + // Pull request information (if present) + if let Some(pr) =3D pull_req { + prompt.push_str("## Pull Request Information\n\n"); + prompt.push_str(&format!("- **Subject:** {}\n", pr.subject)); + prompt.push_str(&format!("- **From:** {}\n", pr.from)); + prompt.push_str(&format!("- **Date:** {}\n", pr.date)); + prompt.push_str(&format!("- **Git URL:** {} {}\n", pr.git_url, pr.= git_ref)); + prompt.push_str(&format!("- **Message ID:** {}\n\n", pr.message_id= )); + + // Full email body - LLM will understand summary and conflict inst= ructions from this + prompt.push_str("### Pull Request Email\n\n"); + prompt.push_str("Read this email carefully. It contains the mainta= iner's description of the changes "); + prompt.push_str("and may include conflict resolution instructions.= Evaluate any suggested "); + prompt.push_str("resolutions critically - there may be cleaner or = more efficient solutions.\n\n"); + prompt.push_str("```\n"); + prompt.push_str(&pr.body); + prompt.push_str("\n```\n\n"); + } =20 // Merge context prompt.push_str("## Merge Context\n\n"); @@ -1257,7 +1593,17 @@ fn build_resolve_prompt( prompt.push_str("1. Edit the conflicted files to produce the correct m= erged result\n"); prompt.push_str("2. Remove all conflict markers (`<<<<<<<`, `=3D=3D=3D= =3D=3D=3D=3D`, `>>>>>>>`)\n"); prompt.push_str("3. Stage the resolved files with `git add`\n"); - prompt.push_str("4. Commit with a detailed message explaining your ana= lysis and resolution\n\n"); + if pull_req.is_some() { + prompt.push_str("4. **Do NOT commit** - The tool will handle the c= ommit\n"); + prompt.push_str("5. **IMPORTANT:** Write a detailed explanation of= your resolution to `.git/LLMINUS_RESOLUTION`\n"); + prompt.push_str(" This file should contain:\n"); + prompt.push_str(" - A summary of each conflict and how you resol= ved it\n"); + prompt.push_str(" - The reasoning behind your choices\n"); + prompt.push_str(" - Any improvements you made over suggested res= olutions\n"); + prompt.push_str(" This will be included in the merge commit mess= age.\n\n"); + } else { + prompt.push_str("4. Commit with a detailed message explaining your= analysis and resolution\n\n"); + } =20 // If uncertain prompt.push_str("## If Uncertain\n\n"); @@ -1287,9 +1633,6 @@ fn build_resolve_prompt( } =20 fn resolve(command: &str) -> Result<()> { - use std::io::Write; - use std::process::Stdio; - // Get merge context (what branch/tag is being merged) let merge_ctx =3D get_merge_context(); if let Some(ref source) =3D merge_ctx.merge_source { @@ -1315,12 +1658,17 @@ fn resolve(command: &str) -> Result<()> { println!("Found {} similar historical resolutions", similar.len()); } =20 - // Build the prompt - println!("Building resolution prompt..."); - let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx); + // Build the prompt and invoke LLM + let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = None); + invoke_llm(command, &prompt) +} + +/// Invoke an LLM command with a prompt via stdin +fn invoke_llm(command: &str, prompt: &str) -> Result<()> { + use std::io::Write; + use std::process::Stdio; =20 - println!("Prompt size: {} bytes", prompt.len()); - println!("\nInvoking: {}", command); + println!("Invoking: {} (prompt: {} bytes)", command, prompt.len()); println!("{}", "=3D".repeat(80)); =20 // Parse command (handle arguments) @@ -1359,6 +1707,107 @@ fn resolve(command: &str) -> Result<()> { Ok(()) } =20 +/// Pull a kernel pull request from lore.kernel.org +fn pull(message_id: &str, command: &str) -> Result<()> { + check_repo()?; + + // Step 1: Fetch and parse the pull request email + println!("=3D=3D=3D Fetching Pull Request =3D=3D=3D\n"); + let raw_email =3D fetch_lore_email(message_id)?; + let pull_req =3D parse_pull_request(message_id, &raw_email)?; + + println!("Subject: {}", pull_req.subject); + println!("From: {}", pull_req.from); + println!("Date: {}", pull_req.date); + println!("Git URL: {} {}", pull_req.git_url, pull_req.git_ref); + + // Step 2: Execute git pull + println!("\n=3D=3D=3D Executing Git Pull =3D=3D=3D\n"); + let has_conflicts =3D git_pull(&pull_req.git_url, &pull_req.git_ref)?; + + if !has_conflicts { + // No conflicts - merge succeeded automatically + println!("\n=3D=3D=3D Merge Completed Successfully =3D=3D=3D"); + println!("No conflicts detected. The merge was completed automatic= ally."); + return Ok(()); + } + + // Step 3: Handle conflicts + println!("\n=3D=3D=3D Merge Conflicts Detected =3D=3D=3D\n"); + + // Get merge context + let merge_ctx =3D get_merge_context(); + + // Parse the conflicts + let conflicts =3D get_current_conflicts()?; + println!("Found {} conflict region(s) to resolve", conflicts.len()); + + // Try to find similar historical resolutions + println!("Looking for similar historical conflicts..."); + let similar =3D try_find_similar_resolutions(3, &conflicts); + + if similar.is_empty() { + println!("No historical resolution database found (this is optiona= l)"); + } else { + println!("Found {} similar historical resolutions", similar.len()); + } + + // Build the prompt with pull request context and invoke LLM + let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = Some(&pull_req)); + println!("\n=3D=3D=3D Invoking LLM for Conflict Resolution =3D=3D=3D"); + invoke_llm(command, &prompt)?; + + // Step 5: Check if conflicts are resolved + if has_merge_conflicts() { + println!("\nWarning: Conflicts still remain in the working directo= ry."); + println!("Please resolve any remaining conflicts manually and comm= it."); + return Ok(()); + } + + // Step 6: Commit the merge with pull request information + println!("\n=3D=3D=3D Committing Merge =3D=3D=3D\n"); + + // Extract summary using LLM (falls back to empty if it fails) + let summary =3D extract_summary_with_llm(&pull_req.body, command) + .unwrap_or_else(|| { + println!("Note: Could not extract summary automatically"); + String::new() + }); + + // Read resolution explanation written by LLM + let resolution =3D std::fs::read_to_string(".git/LLMINUS_RESOLUTION") + .unwrap_or_else(|_| { + println!("Note: No resolution explanation found in .git/LLMINU= S_RESOLUTION"); + String::new() + }); + + // Clean up the resolution file + let _ =3D std::fs::remove_file(".git/LLMINUS_RESOLUTION"); + + let commit_msg =3D build_merge_commit_message(&pull_req, &summary, &re= solution); + println!("Commit message:\n{}", commit_msg); + + // Create a temporary file for the commit message (to handle multi-lin= e) + let commit_result =3D Command::new("git") + .args(["commit", "-m", &commit_msg]) + .output() + .context("Failed to run git commit")?; + + if commit_result.status.success() { + println!("\n=3D=3D=3D Merge Committed Successfully =3D=3D=3D"); + let stdout =3D String::from_utf8_lossy(&commit_result.stdout); + if !stdout.is_empty() { + println!("{}", stdout); + } + } else { + let stderr =3D String::from_utf8_lossy(&commit_result.stderr); + eprintln!("Commit failed: {}", stderr); + bail!("Failed to commit merge"); + } + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 @@ -1367,6 +1816,7 @@ fn main() -> Result<()> { Commands::Vectorize { batch_size } =3D> vectorize(batch_size), Commands::Find { n } =3D> find(n), Commands::Resolve { command } =3D> resolve(&command), + Commands::Pull { message_id, command } =3D> pull(&message_id, &com= mand), } } =20 @@ -1755,4 +2205,186 @@ fn test_parse_multiple_conflicts() { assert!(conflicts[0].ours_content.contains("first ours")); assert!(conflicts[1].ours_content.contains("second ours")); } + + #[test] + fn test_pull_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "pull", "test@kernel.o= rg"]).unwrap(); + match cli.command { + Commands::Pull { message_id, command } =3D> { + assert_eq!(message_id, "test@kernel.org"); + assert_eq!(command, "llm"); // default + } + _ =3D> panic!("Expected Pull command"), + } + } + + #[test] + fn test_pull_command_with_custom_command() { + let cli =3D Cli::try_parse_from([ + "llminus", "pull", "test@kernel.org", "-c", "my-llm --model fa= ncy" + ]).unwrap(); + match cli.command { + Commands::Pull { message_id, command } =3D> { + assert_eq!(message_id, "test@kernel.org"); + assert_eq!(command, "my-llm --model fancy"); + } + _ =3D> panic!("Expected Pull command"), + } + } + + #[test] + fn test_parse_email_headers() { + let raw =3D r#"From: Paul Walmsley +Subject: [GIT PULL] RISC-V updates for v6.19 +Date: Thu, 11 Dec 2025 19:36:00 -0700 +Message-ID: + +This is the body of the email. +"#; + let (from, subject, date, msg_id, body) =3D parse_email_headers(ra= w); + assert_eq!(from, "Paul Walmsley "); + assert_eq!(subject, "[GIT PULL] RISC-V updates for v6.19"); + assert_eq!(date, "Thu, 11 Dec 2025 19:36:00 -0700"); + assert_eq!(msg_id, ""); + assert!(body.contains("This is the body")); + } + + #[test] + fn test_parse_email_headers_multiline() { + let raw =3D r#"From: Paul Walmsley +Subject: [GIT PULL] RISC-V updates + for v6.19 merge window +Date: Thu, 11 Dec 2025 19:36:00 -0700 + +Body here. +"#; + let (_, subject, _, _, _) =3D parse_email_headers(raw); + assert!(subject.contains("RISC-V updates")); + assert!(subject.contains("for v6.19 merge window")); + } + + #[test] + fn test_extract_git_info() { + let body =3D r#"Please pull this set of changes. + +The following changes are available in the Git repository at: + + git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux tags/riscv-for= -linus-6.19 + +for you to fetch changes up to abc123. +"#; + let result =3D extract_git_info(body); + assert!(result.is_some()); + let (url, git_ref) =3D result.unwrap(); + assert_eq!(url, "git://git.kernel.org/pub/scm/linux/kernel/git/ris= cv/linux"); + assert_eq!(git_ref, "tags/riscv-for-linus-6.19"); + } + + #[test] + fn test_extract_git_info_https() { + let body =3D r#"Available at: + + https://git.kernel.org/pub/scm/linux/kernel/git/foo/bar.git feature-bran= ch + +Thanks! +"#; + let result =3D extract_git_info(body); + assert!(result.is_some()); + let (url, git_ref) =3D result.unwrap(); + assert!(url.starts_with("https://git.kernel.org")); + assert_eq!(git_ref, "feature-branch"); + } + + #[test] + fn test_extract_git_info_none() { + let body =3D "This email has no git URL in it."; + let result =3D extract_git_info(body); + assert!(result.is_none()); + } + + #[test] + fn test_build_merge_commit_message() { + let pull_req =3D PullRequest { + message_id: "test123@kernel.org".to_string(), + subject: "[GIT PULL] Important updates for v6.19".to_string(), + from: "Maintainer ".to_string(), + date: "2025-12-11".to_string(), + git_url: "git://git.kernel.org/pub/scm/foo".to_string(), + git_ref: "tags/foo-for-v6.19".to_string(), + body: String::new(), + }; + + let summary =3D "This is the maintainer's summary of changes."; + let resolution =3D "Resolved by keeping both changes."; + let msg =3D build_merge_commit_message(&pull_req, summary, resolut= ion); + assert!(msg.contains("Merge tags/foo-for-v6.19")); + assert!(msg.contains("Important updates")); // subject without [GI= T PULL] + assert!(msg.contains("maintainer's summary")); + assert!(msg.contains("conflict resolution")); + assert!(msg.contains("keeping both changes")); + assert!(msg.contains("https://lore.kernel.org/all/test123@kernel.o= rg/")); + } + + #[test] + fn test_build_resolve_prompt_with_pull_request() { + let conflicts =3D vec![ConflictFile { + path: "test.c".to_string(), + ours_content: "int ours;".to_string(), + theirs_content: "int theirs;".to_string(), + base_content: Some("int base;".to_string()), + }]; + + let pull_req =3D PullRequest { + message_id: "test@kernel.org".to_string(), + subject: "Test PR".to_string(), + from: "Author ".to_string(), + date: "2025-12-11".to_string(), + git_url: "git://test".to_string(), + git_ref: "tags/test".to_string(), + body: "Test summary\n\nResolve by keeping both.".to_string(), + }; + + let merge_ctx =3D MergeContext { + merge_source: Some("tags/test".to_string()), + head_branch: Some("master".to_string()), + merge_message: Some("Merge tags/test".to_string()), + }; + + let prompt =3D build_resolve_prompt(&conflicts, &[], &merge_ctx, S= ome(&pull_req)); + + // Check that key sections are present + assert!(prompt.contains("Pull Request Information")); + assert!(prompt.contains("Test PR")); // subject + assert!(prompt.contains("Test summary")); // body includes summary + assert!(prompt.contains("Resolve by keeping both")); // body inclu= des this + assert!(prompt.contains("test.c")); // conflict file + assert!(prompt.contains("int ours;")); // ours content + assert!(prompt.contains("int theirs;")); // theirs content + assert!(prompt.contains("Do NOT commit")); // pull request specific + } + + #[test] + fn test_build_resolve_prompt_without_pull_request() { + let conflicts =3D vec![ConflictFile { + path: "test.c".to_string(), + ours_content: "int ours;".to_string(), + theirs_content: "int theirs;".to_string(), + base_content: None, + }]; + + let merge_ctx =3D MergeContext { + merge_source: Some("feature-branch".to_string()), + head_branch: Some("master".to_string()), + merge_message: None, + }; + + let prompt =3D build_resolve_prompt(&conflicts, &[], &merge_ctx, N= one); + + // Check standard resolve sections + assert!(prompt.contains("Linux Kernel Merge Conflict Resolution")); + assert!(!prompt.contains("Pull Request Information")); + assert!(prompt.contains("test.c")); + assert!(prompt.contains("int ours;")); + assert!(prompt.contains("Commit with a detailed message")); // not= "Do NOT commit" + } } --=20 2.51.0 From nobody Sun Feb 8 06:54:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 891612853EE for ; Sun, 11 Jan 2026 21:29:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166963; cv=none; b=JVumGh/kiRspVWNBPzVpHhj0WSaNzYd85vAYiFBb7P2omUL41RhIDQyoiPFWXRnNz28YzTeuVrec4HikCTY3GeWGoFJIqrrWQJZu360wKWhHZbtV+mMkGzxGFzvY2TLCMh+Pi1LZNDd/0WTrz1zrKcMEZ5f57eW2tqKuXM7gSY8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166963; c=relaxed/simple; bh=mAvsryD46w5Cimgnn+GzWEn7fLSvnQP+2ct0WhzfIX8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=TMyfWbFYsiAXk0aoM5f5sSNaKIMtyQz/zx/aLXMSRP7NO650RMnG3s/Fmrk2AJJfJANfr4d4ES6iUKiLDXAH4FpX6DZt7YQ6HVfIFVE2GeaL7rdZHNO006pWcRS20oRFq7TH1GAk4YJzrnpy9LydULlQ0jrk0dGIWMI6fByxbxc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=k7+9vCdH; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="k7+9vCdH" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C2447C4CEF7; Sun, 11 Jan 2026 21:29:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166963; bh=mAvsryD46w5Cimgnn+GzWEn7fLSvnQP+2ct0WhzfIX8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k7+9vCdHFNvnQgliWuzugu71/ueuxfvDTHHh33v2ON0HNkfyOJkgb28x+PHlgX5OG TE721Q8KHSKIrpA8WiicD5VtCQhMqVffnSQqb/kutkg+8ARrW86oSIXQTatseyLnq6 tPqwFVJyuwHO7WVBUI1Gy0Xh/BRn/X+gkXJtKDBHzAi0B6kcYOMWoCqpvBppbjMIlF ndCa1z02EyRSxfy4JsYWPNDsUkd8lumHLNYYFdv6S3Up/+tiOuNiBJj7il/m7fN9Ef ka26cJ9Uj0/KowgdDbjkesin6l48JfzFaGVqWe1RmXQwsg6fwhJZresE1TKbsekU3G JODadhPxTmSWg== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 6/7] LLMinus: Add prompt token limit enforcement Date: Sun, 11 Jan 2026 16:29:14 -0500 Message-ID: <20260111212915.195056-7-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the max-tokens option with a 100K default to prevent prompt overflow errors with various LLM providers. Token count is estimated at roughly 4 characters per token. When prompts exceed the limit, RAG examples are progressively removed until the prompt fits. The token count is displayed in invoke output. Signed-off-by: Sasha Levin --- tools/llminus/src/main.rs | 114 +++++++++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 19 deletions(-) diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index ff668244688f..5c469e23f09a 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -14,6 +14,18 @@ =20 const STORE_PATH: &str =3D ".llminus-resolutions.json"; =20 +/// Default maximum tokens for prompt (conservative for broad provider com= patibility) +/// Most providers support at least 128K; we use 100K as a safe default. +const DEFAULT_MAX_TOKENS: usize =3D 100_000; + +/// Approximate characters per token (for English text) +const CHARS_PER_TOKEN: usize =3D 4; + +/// Estimate the number of tokens in a text string +fn estimate_tokens(text: &str) -> usize { + text.len() / CHARS_PER_TOKEN +} + #[derive(Parser)] #[command(name =3D "llminus")] #[command(about =3D "LLM-powered git conflict resolution tool")] @@ -45,6 +57,9 @@ enum Commands { Resolve { /// Command to invoke. The prompt will be passed via stdin. command: String, + /// Maximum tokens for prompt (reduces RAG examples if exceeded) + #[arg(short, long, default_value_t =3D DEFAULT_MAX_TOKENS)] + max_tokens: usize, }, /// Pull a kernel patch/pull request from lore.kernel.org and merge it Pull { @@ -53,6 +68,9 @@ enum Commands { /// Command to invoke for LLM assistance #[arg(short, long, default_value =3D "llm")] command: String, + /// Maximum tokens for prompt (reduces RAG examples if exceeded) + #[arg(long, default_value_t =3D DEFAULT_MAX_TOKENS)] + max_tokens: usize, }, } =20 @@ -825,6 +843,7 @@ fn parse_conflict_file(path: &str) -> Result> { } =20 /// Result of a similarity search +#[derive(Clone)] struct SimilarResolution { resolution: MergeResolution, similarity: f32, @@ -1632,7 +1651,7 @@ fn build_resolve_prompt( prompt } =20 -fn resolve(command: &str) -> Result<()> { +fn resolve(command: &str, max_tokens: usize) -> Result<()> { // Get merge context (what branch/tag is being merged) let merge_ctx =3D get_merge_context(); if let Some(ref source) =3D merge_ctx.merge_source { @@ -1649,17 +1668,45 @@ fn resolve(command: &str) -> Result<()> { =20 // Try to find similar historical resolutions (gracefully handles miss= ing database) println!("Looking for similar historical conflicts..."); - let similar =3D try_find_similar_resolutions(3, &conflicts); + let all_similar =3D try_find_similar_resolutions(3, &conflicts); =20 - if similar.is_empty() { + if all_similar.is_empty() { println!("No historical resolution database found (run 'llminus le= arn' and 'llminus vectorize' to build one)"); println!("Proceeding without historical examples..."); } else { - println!("Found {} similar historical resolutions", similar.len()); + println!("Found {} similar historical resolutions", all_similar.le= n()); + } + + // Build the prompt with adaptive RAG example reduction + let mut similar =3D all_similar.clone(); + let mut prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_c= tx, None); + let mut tokens =3D estimate_tokens(&prompt); + + // Reduce RAG examples until we're under the token limit + while tokens > max_tokens && !similar.is_empty() { + let original_count =3D all_similar.len(); + similar.pop(); // Remove the least similar (last) example + prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = None); + tokens =3D estimate_tokens(&prompt); + + if similar.len() < original_count { + println!( + "Reduced RAG examples from {} to {} to fit token limit (~{= } tokens, limit: {})", + original_count, + similar.len(), + tokens, + max_tokens + ); + } + } + + if tokens > max_tokens { + println!( + "Warning: Prompt still exceeds token limit (~{} tokens, limit:= {}) even without RAG examples", + tokens, max_tokens + ); } =20 - // Build the prompt and invoke LLM - let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = None); invoke_llm(command, &prompt) } =20 @@ -1668,7 +1715,8 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<= ()> { use std::io::Write; use std::process::Stdio; =20 - println!("Invoking: {} (prompt: {} bytes)", command, prompt.len()); + let tokens =3D estimate_tokens(prompt); + println!("Invoking: {} (prompt: {} bytes, ~{} tokens)", command, promp= t.len(), tokens); println!("{}", "=3D".repeat(80)); =20 // Parse command (handle arguments) @@ -1708,7 +1756,7 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<= ()> { } =20 /// Pull a kernel pull request from lore.kernel.org -fn pull(message_id: &str, command: &str) -> Result<()> { +fn pull(message_id: &str, command: &str, max_tokens: usize) -> Result<()> { check_repo()?; =20 // Step 1: Fetch and parse the pull request email @@ -1744,16 +1792,44 @@ fn pull(message_id: &str, command: &str) -> Result<= ()> { =20 // Try to find similar historical resolutions println!("Looking for similar historical conflicts..."); - let similar =3D try_find_similar_resolutions(3, &conflicts); + let all_similar =3D try_find_similar_resolutions(3, &conflicts); =20 - if similar.is_empty() { + if all_similar.is_empty() { println!("No historical resolution database found (this is optiona= l)"); } else { - println!("Found {} similar historical resolutions", similar.len()); + println!("Found {} similar historical resolutions", all_similar.le= n()); + } + + // Build the prompt with adaptive RAG example reduction + let mut similar =3D all_similar.clone(); + let mut prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_c= tx, Some(&pull_req)); + let mut tokens =3D estimate_tokens(&prompt); + + // Reduce RAG examples until we're under the token limit + while tokens > max_tokens && !similar.is_empty() { + let original_count =3D all_similar.len(); + similar.pop(); // Remove the least similar (last) example + prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = Some(&pull_req)); + tokens =3D estimate_tokens(&prompt); + + if similar.len() < original_count { + println!( + "Reduced RAG examples from {} to {} to fit token limit (~{= } tokens, limit: {})", + original_count, + similar.len(), + tokens, + max_tokens + ); + } + } + + if tokens > max_tokens { + println!( + "Warning: Prompt still exceeds token limit (~{} tokens, limit:= {}) even without RAG examples", + tokens, max_tokens + ); } =20 - // Build the prompt with pull request context and invoke LLM - let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = Some(&pull_req)); println!("\n=3D=3D=3D Invoking LLM for Conflict Resolution =3D=3D=3D"); invoke_llm(command, &prompt)?; =20 @@ -1815,8 +1891,8 @@ fn main() -> Result<()> { Commands::Learn { range } =3D> learn(range.as_deref()), Commands::Vectorize { batch_size } =3D> vectorize(batch_size), Commands::Find { n } =3D> find(n), - Commands::Resolve { command } =3D> resolve(&command), - Commands::Pull { message_id, command } =3D> pull(&message_id, &com= mand), + Commands::Resolve { command, max_tokens } =3D> resolve(&command, m= ax_tokens), + Commands::Pull { message_id, command, max_tokens } =3D> pull(&mess= age_id, &command, max_tokens), } } =20 @@ -1890,7 +1966,7 @@ fn test_find_command_with_n() { fn test_resolve_command_parses() { let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm"]).= unwrap(); match cli.command { - Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= "), + Commands::Resolve { command, .. } =3D> assert_eq!(command, "my= -llm"), _ =3D> panic!("Expected Resolve command"), } } @@ -1899,7 +1975,7 @@ fn test_resolve_command_parses() { fn test_resolve_command_with_args() { let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm --m= odel fancy"]).unwrap(); match cli.command { - Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= --model fancy"), + Commands::Resolve { command, .. } =3D> assert_eq!(command, "my= -llm --model fancy"), _ =3D> panic!("Expected Resolve command"), } } @@ -2210,7 +2286,7 @@ fn test_parse_multiple_conflicts() { fn test_pull_command_parses() { let cli =3D Cli::try_parse_from(["llminus", "pull", "test@kernel.o= rg"]).unwrap(); match cli.command { - Commands::Pull { message_id, command } =3D> { + Commands::Pull { message_id, command, .. } =3D> { assert_eq!(message_id, "test@kernel.org"); assert_eq!(command, "llm"); // default } @@ -2224,7 +2300,7 @@ fn test_pull_command_with_custom_command() { "llminus", "pull", "test@kernel.org", "-c", "my-llm --model fa= ncy" ]).unwrap(); match cli.command { - Commands::Pull { message_id, command } =3D> { + Commands::Pull { message_id, command, .. } =3D> { assert_eq!(message_id, "test@kernel.org"); assert_eq!(command, "my-llm --model fancy"); } --=20 2.51.0 From nobody Sun Feb 8 06:54:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 56BC2298CC4 for ; Sun, 11 Jan 2026 21:29:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166966; cv=none; b=SgZ5xWyw8RfIMnAtRqzECSpVd+Ex5m0/bTFsqDKeIPBe9XMHeJgVmn2gJ0dDDI5OHA+Ar3bZqeW7niG+EmuGMZELQzg9Fk5yiNRJ7yOW6Q6/9sTHSMdABuMjln0N6KULjQApfOOl3zUHWlrqRLsubGoi4q4nHkJtMfDoFLBhMIM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1768166966; c=relaxed/simple; bh=5z/km3cPIT+wVNjJehk/MJfggkGIzoou+eaipgS4gpM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=vBGtRBHoag90B6TYgz5t8ZgqrM78eKVyqfly0oUHGSQmJUd5fpi2ItwlsBYsDLa76VuR25V43CjSHAP+5DahlQj0i+FltvVWFIshHxIg3rLMr35Z2YsHcdoGrH2aqIuOcG7e+XSoOzhawxFYVe2JWBtInnG6eMEM+WP48e48eGA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=DxfaygYe; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="DxfaygYe" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9B2A6C2BC86; Sun, 11 Jan 2026 21:29:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1768166964; bh=5z/km3cPIT+wVNjJehk/MJfggkGIzoou+eaipgS4gpM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=DxfaygYeqKkpMfapP18tCs3jReRweeukjJbcmbHyFPQD706kvpc8PcuHHTMfxCcla UOkVwWEfRjpZFQz1sLF0KiLHrVUoVmAZiYF9dn9hMfAjaWoORJ0H6JUEswnxizY1gB bmA9FJZGRVRHmqGUQr1/s6uC8hyqSmPORgQDLnJ54roNRYhagOk87bmg1bYtafALJi GdBWRGgV/72uuqvZ7beVvppKs8PkeqZvPMHspY1CsMFy2F5yph9+Z13qOU0LvJiPLm s2YEuKggx2xUwApn2cEL8VlSsw+TGbqbJlD/5asjXYRBhmoBh0y+jK+Qn+E2CgVv2J 3lZ6UsOczaoRw== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC v2 7/7] LLMinus: Add build test integration for semantic conflicts Date: Sun, 11 Jan 2026 16:29:15 -0500 Message-ID: <20260111212915.195056-8-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20260111212915.195056-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> <20260111212915.195056-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Detect and resolve build failures that occur after merges complete without textual conflicts. These semantic conflicts happen when changes from different branches are incompatible at build time. When no textual conflicts exist, llminus runs the build to check for issues. If the build fails, it generates a specialized prompt for the LLM to investigate and fix the errors. The resolve prompt now requires build verification before considering resolution complete. Signed-off-by: Sasha Levin --- tools/llminus/src/main.rs | 236 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 224 insertions(+), 12 deletions(-) diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index 5c469e23f09a..40aa25e83b3d 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -1355,13 +1355,15 @@ fn build_merge_commit_message(pull_req: &PullReques= t, summary: &str, resolution: } =20 /// Get current conflicts from the working directory +/// Returns an empty Vec if there are no conflicts (for build-only issues) fn get_current_conflicts() -> Result> { check_repo()?; =20 // Find current conflicts let conflict_paths =3D get_conflicted_files()?; if conflict_paths.is_empty() { - bail!("No conflicts detected. Run this command when you have activ= e merge conflicts."); + // No conflicts - this is fine, might be a build-only issue + return Ok(Vec::new()); } =20 // Parse all conflict regions @@ -1372,10 +1374,6 @@ fn get_current_conflicts() -> Result> { } } =20 - if all_conflicts.is_empty() { - bail!("Could not parse any conflict markers from the conflicted fi= les."); - } - Ok(all_conflicts) } =20 @@ -1436,6 +1434,184 @@ fn try_find_similar_resolutions(n: usize, conflicts= : &[ConflictFile]) -> Vec BuildTestResult { + use std::process::Stdio; + + println!("Running build test: stable build log"); + + let result =3D Command::new("stable") + .args(["build", "log"]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output(); + + match result { + Ok(output) =3D> { + let stderr =3D String::from_utf8_lossy(&output.stderr).to_stri= ng(); + let stdout =3D String::from_utf8_lossy(&output.stdout).to_stri= ng(); + + // Combine stderr (warnings/errors) with any stdout for full p= icture + let combined =3D if stderr.is_empty() { + stdout + } else if stdout.is_empty() { + stderr + } else { + format!("{}\n{}", stderr, stdout) + }; + + // Check for actual build issues - look for warning/error patt= erns + let has_issues =3D combined.lines().any(|line| { + let lower =3D line.to_lowercase(); + lower.contains("error:") || lower.contains("warning:") + || lower.contains("error[") || lower.contains("undefin= ed reference") + || lower.contains("fatal error") || lower.contains("ma= ke[") + }); + + BuildTestResult { + success: output.status.success() && !has_issues, + output: combined.trim().to_string(), + } + } + Err(e) =3D> BuildTestResult { + success: false, + output: format!("Failed to run build test: {}", e), + }, + } +} + +/// Build an LLM prompt for fixing build errors (when no merge conflicts e= xist) +fn build_fix_prompt(build_output: &str, merge_ctx: &MergeContext) -> Strin= g { + let mut prompt =3D String::new(); + + // Header with high-stakes framing (matching the existing style) + prompt.push_str("# Linux Kernel Build Issue Resolution\n\n"); + prompt.push_str("You are acting as an experienced kernel maintainer. A= merge completed without "); + prompt.push_str("textual conflicts, but the build test revealed warnin= gs or errors that need to be fixed.\n\n"); + + prompt.push_str("**Important:** Incorrect fixes have historically intr= oduced subtle bugs "); + prompt.push_str("that affected millions of users and took months to di= agnose. A fix that "); + prompt.push_str("silences a warning but has semantic errors is worse t= han no fix at all.\n\n"); + + prompt.push_str("Take the time to fully understand the build failure b= efore attempting "); + prompt.push_str("any fix. If after investigation you're not confident,= say so - it's "); + prompt.push_str("better to escalate to a human than to introduce a sub= tle bug.\n\n"); + + // Merge context + prompt.push_str("## Merge Context\n\n"); + if let Some(ref source) =3D merge_ctx.merge_source { + prompt.push_str(&format!("**Merged:** `{}`\n", source)); + } + if let Some(ref head) =3D merge_ctx.head_branch { + prompt.push_str(&format!("**Into:** `{}`\n", head)); + } + prompt.push_str("\nThe merge itself completed without textual conflict= s, but the build has issues.\n\n"); + + // Build output + prompt.push_str("## Build Output\n\n"); + prompt.push_str("The following warnings/errors were produced by `stabl= e build log` (allmodconfig build):\n\n"); + prompt.push_str("```\n"); + prompt.push_str(build_output); + prompt.push_str("\n```\n\n"); + + // Investigation requirement (matching existing style) + prompt.push_str("## Investigation Required\n\n"); + prompt.push_str("Before attempting any fix, you must conduct thorough = research. "); + prompt.push_str("Rushing to fix without understanding is how subtle bu= gs get introduced. "); + prompt.push_str("Work through each phase below IN ORDER and document y= our findings.\n\n"); + + // Phase 1: Search lore.kernel.org + prompt.push_str("### Phase 1: Search lore.kernel.org for Guidance (DO = THIS FIRST)\n\n"); + prompt.push_str("**CRITICAL:** Before doing ANY other research, search= lore.kernel.org for existing guidance.\n"); + prompt.push_str("Maintainers often discuss build issues when they know= they will occur after merges.\n\n"); + + if let Some(ref source) =3D merge_ctx.merge_source { + prompt.push_str(&format!("1. **Search for the merge itself:** `{}`= \n", source)); + prompt.push_str(&format!(" - URL: `https://lore.kernel.org/all/?= q=3D{}`\n", source.replace('/', "%2F"))); + } + prompt.push_str("2. **Search for similar build issues:**\n"); + prompt.push_str(" - Search for the specific error message or warning= text\n"); + prompt.push_str(" - `\"build error\"` + subsystem name\n\n"); + + // Phase 2: Understand the context + prompt.push_str("### Phase 2: Understand the Context\n\n"); + prompt.push_str("- **Identify the affected files** from the error/warn= ing messages\n"); + prompt.push_str("- **What subsystem is this?** Read the file and nearb= y files to understand its purpose.\n"); + prompt.push_str("- **Who maintains it?** Check `git log --oneline -20`= for recent authors.\n\n"); + + // Phase 3: Trace history + prompt.push_str("### Phase 3: Trace the History\n\n"); + prompt.push_str("**Understand what changed:**\n"); + prompt.push_str("- Run `git log --oneline HEAD~5..HEAD -- ` to s= ee recent changes\n"); + prompt.push_str("- Use `git show ` to read the full commit mes= sages\n"); + prompt.push_str("- Check both sides of the merge: `git diff HEAD^1..HE= AD^2 -- `\n\n"); + + prompt.push_str("**Common causes of post-merge build issues:**\n"); + prompt.push_str("- Missing includes when code from one branch uses typ= es/functions defined in another\n"); + prompt.push_str("- Renamed or removed functions/macros that the merged= code references\n"); + prompt.push_str("- Conflicting definitions (same symbol defined differ= ently in merged branches)\n"); + prompt.push_str("- Changes to function signatures that weren't propaga= ted to all callers\n"); + prompt.push_str("- Linker script or section changes that affect symbol= placement\n\n"); + + // Resolution + prompt.push_str("## Resolution\n\n"); + prompt.push_str("Once you understand the issue:\n\n"); + prompt.push_str("1. Fix the build issues in the affected files\n"); + prompt.push_str("2. Stage your changes with `git add`\n"); + prompt.push_str("3. **COMMIT BEFORE BUILD TEST:** Create a temporary c= ommit to test the build:\n"); + prompt.push_str(" ```bash\n"); + prompt.push_str(" git commit -m \"WIP: testing build fix\"\n"); + prompt.push_str(" ```\n"); + prompt.push_str(" The build system requires all changes to be commit= ted to test them.\n"); + prompt.push_str("4. **MANDATORY BUILD TEST:** Run `stable build log` t= o verify the fix\n"); + prompt.push_str("5. If the build fails, try to fix the issues, amend t= he commit, and re-run the build test\n"); + prompt.push_str("6. **ALWAYS UNDO TEMPORARY COMMIT:** Whether build su= cceeded or failed, you MUST undo the temporary commit:\n"); + prompt.push_str(" ```bash\n"); + prompt.push_str(" git reset --soft HEAD~1\n"); + prompt.push_str(" ```\n"); + prompt.push_str(" This is MANDATORY - the calling script expects sta= ged changes, not a committed state.\n"); + prompt.push_str(" If the build still fails after your best effort, r= eport the failure but STILL do the soft reset.\n\n"); + + // If uncertain + prompt.push_str("## If Uncertain\n\n"); + prompt.push_str("If after investigation you're still uncertain about t= he correct fix:\n\n"); + prompt.push_str("- Explain what you've learned and what remains unclea= r\n"); + prompt.push_str("- Describe the possible fixes you see and their trade= offs\n"); + prompt.push_str("- Recommend whether a human maintainer should review\= n\n"); + prompt.push_str("It's better to flag uncertainty than to silently intr= oduce a bug.\n\n"); + + // Tools available + prompt.push_str("## Tools Available\n\n"); + prompt.push_str("You can use these to investigate:\n\n"); + prompt.push_str("```bash\n"); + prompt.push_str("# View recent changes to the affected file\n"); + prompt.push_str("git log --oneline HEAD~10..HEAD -- \n"); + prompt.push('\n'); + prompt.push_str("# Compare the merge parents\n"); + prompt.push_str("git diff HEAD^1..HEAD^2 -- \n"); + prompt.push('\n'); + prompt.push_str("# Understand a specific commit\n"); + prompt.push_str("git show \n"); + prompt.push('\n'); + prompt.push_str("# Re-run the build test\n"); + prompt.push_str("stable build log\n"); + prompt.push_str("```\n"); + + prompt +} + /// Build the LLM prompt for conflict resolution fn build_resolve_prompt( conflicts: &[ConflictFile], @@ -1612,16 +1788,31 @@ fn build_resolve_prompt( prompt.push_str("1. Edit the conflicted files to produce the correct m= erged result\n"); prompt.push_str("2. Remove all conflict markers (`<<<<<<<`, `=3D=3D=3D= =3D=3D=3D=3D`, `>>>>>>>`)\n"); prompt.push_str("3. Stage the resolved files with `git add`\n"); + prompt.push_str("4. **COMMIT BEFORE BUILD TEST:** Create a temporary c= ommit to test the build:\n"); + prompt.push_str(" ```bash\n"); + prompt.push_str(" git commit -m \"WIP: testing merge resolution\"\n"= ); + prompt.push_str(" ```\n"); + prompt.push_str(" The build system requires all changes to be commit= ted to test them.\n"); + prompt.push_str("5. **MANDATORY BUILD TEST:** Verify the build succeed= s with no warnings or errors:\n"); + prompt.push_str(" ```bash\n"); + prompt.push_str(" stable build log\n"); + prompt.push_str(" ```\n"); + prompt.push_str(" This runs an allmodconfig build and outputs only w= arnings/errors to stderr.\n"); + prompt.push_str(" Review the output for ANY warnings or errors. If t= he build fails or produces warnings,\n"); + prompt.push_str(" you MUST try to fix them, amend the commit, and re= -run the build test.\n"); + prompt.push_str("6. **ALWAYS UNDO TEMPORARY COMMIT:** Whether build su= cceeded or failed, you MUST undo the temporary commit:\n"); + prompt.push_str(" ```bash\n"); + prompt.push_str(" git reset --soft HEAD~1\n"); + prompt.push_str(" ```\n"); + prompt.push_str(" This is MANDATORY - the calling script expects sta= ged changes, not a committed state.\n"); + prompt.push_str(" If the build still fails after your best effort, r= eport the failure but STILL do the soft reset.\n"); if pull_req.is_some() { - prompt.push_str("4. **Do NOT commit** - The tool will handle the c= ommit\n"); - prompt.push_str("5. **IMPORTANT:** Write a detailed explanation of= your resolution to `.git/LLMINUS_RESOLUTION`\n"); + prompt.push_str("7. **IMPORTANT:** Write a detailed explanation of= your resolution to `.git/LLMINUS_RESOLUTION`\n"); prompt.push_str(" This file should contain:\n"); prompt.push_str(" - A summary of each conflict and how you resol= ved it\n"); prompt.push_str(" - The reasoning behind your choices\n"); prompt.push_str(" - Any improvements you made over suggested res= olutions\n"); prompt.push_str(" This will be included in the merge commit mess= age.\n\n"); - } else { - prompt.push_str("4. Commit with a detailed message explaining your= analysis and resolution\n\n"); } =20 // If uncertain @@ -1664,6 +1855,24 @@ fn resolve(command: &str, max_tokens: usize) -> Resu= lt<()> { // Get current conflicts first let conflicts =3D get_current_conflicts()?; =20 + if conflicts.is_empty() { + // No textual conflicts - check for build issues instead + println!("No textual conflicts detected."); + println!("Running build test to check for merge-related build issu= es...\n"); + + let build_result =3D run_build_test(); + + if build_result.success { + println!("Build test passed. No issues to resolve."); + return Ok(()); + } + + // Build failed - invoke LLM to fix the issues + println!("\nBuild test failed! Invoking LLM to resolve build issue= s...\n"); + let prompt =3D build_fix_prompt(&build_result.output, &merge_ctx); + return invoke_llm(command, &prompt); + } + println!("Found {} conflict(s)", conflicts.len()); =20 // Try to find similar historical resolutions (gracefully handles miss= ing database) @@ -2145,7 +2354,7 @@ fn test_resolution_store_roundtrip() { let dir =3D TempDir::new().unwrap(); let store_path =3D dir.path().join("resolutions.json"); =20 - let mut store =3D ResolutionStore { version: 3, resolutions: Vec::= new() }; + let mut store =3D ResolutionStore { version: 3, resolutions: Vec::= new(), processed_commits: Vec::new() }; store.resolutions.push(MergeResolution { commit_hash: "abc123".to_string(), commit_summary: "Test merge".to_string(), @@ -2197,6 +2406,7 @@ fn test_git_in_repo() { =20 #[test] fn test_get_merge_commits() { + let original_dir =3D std::env::current_dir().unwrap(); let dir =3D init_test_repo(); std::env::set_current_dir(dir.path()).unwrap(); =20 @@ -2209,6 +2419,8 @@ fn test_get_merge_commits() { =20 let merges =3D get_merge_commits(None).unwrap(); assert_eq!(merges.len(), 1); + + std::env::set_current_dir(original_dir).unwrap(); } =20 #[test] @@ -2436,7 +2648,7 @@ fn test_build_resolve_prompt_with_pull_request() { assert!(prompt.contains("test.c")); // conflict file assert!(prompt.contains("int ours;")); // ours content assert!(prompt.contains("int theirs;")); // theirs content - assert!(prompt.contains("Do NOT commit")); // pull request specific + assert!(prompt.contains("Write a detailed explanation")); // pull = request specific } =20 #[test] @@ -2461,6 +2673,6 @@ fn test_build_resolve_prompt_without_pull_request() { assert!(!prompt.contains("Pull Request Information")); assert!(prompt.contains("test.c")); assert!(prompt.contains("int ours;")); - assert!(prompt.contains("Commit with a detailed message")); // not= "Do NOT commit" + assert!(!prompt.contains("Write a detailed explanation")); // PR-s= pecific, not in standard prompt } } --=20 2.51.0