From nobody Sun Feb 8 02:21:23 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E38EF350A19 for ; Fri, 19 Dec 2025 18:16:37 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168198; cv=none; b=SXkJTx+BKIZqIZuSSiN6L0VPryFBKwGjxaB09Vt7VN4ZBFwwN34o9dJV+nL0V1qV37P2sjLK7WnlfHnJ0u9MD9dt3aCdIytC5bphTSPKpC8fKQ41OI6Uw/LcdVgnkhs6JsJRx3YzpTgofCGwKFuXh0ffUzAD/C1PhmFlEDsiMTk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168198; c=relaxed/simple; bh=gt2tED3ycuCsnRgfcF6HtCI1KxSxn6L+l6tv6FCGDCA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qjluVvUeI6ECBPp23iBzmcCT0eec7iswig+ZbRdAkKveRDhwGagKev0mHJZYiMyg054tavyz1Kn/Eqw+oFL/+wqmG0plqaglpwkDpOQDURGnYQIOPEl4k99SW/mGiQkJqq9cRHK25oqJ0H1ApRiQ6zbnhf+8kHV6JGgowmB9Sqg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=DUtUoiD/; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="DUtUoiD/" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1ECDEC116B1; Fri, 19 Dec 2025 18:16:37 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1766168197; bh=gt2tED3ycuCsnRgfcF6HtCI1KxSxn6L+l6tv6FCGDCA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=DUtUoiD/0Q2kwUtiSfDL4oY7OxL1zmh/hqhY1zX/Y3F/kQ8qnnJ41BDHBVRfRn0Go xtIinMUeJRYXo1bzS+WNYkMBGXwHx9UVAGNkOpm6UXW0lhgrAE9xVumV1eCe5jn5qF WEORwjyvQftOcE7qKZsLMdqIXT4beIhx9/fycJqvSOfSKpYYrAOXVn1afOMD5RzCKZ cjqgr4yqx31F9AXCuYdFeoYU7o/MWHQmt1+zkNHINktJXna57cBpf4sSf01xPZ3lEi 1gR9agsgOarL8fV59HClqRs2O/77eGSPvlXKcPRYh0yIH4Cv5roGv38pjTXbk8SxjR WGIxLvp8X1q4Q== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC 1/5] LLMinus: Add skeleton project with learn command Date: Fri, 19 Dec 2025 13:16:25 -0500 Message-ID: <20251219181629.1123823-2-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20251219181629.1123823-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Introduce LLMinus, an LLM-powered git conflict resolution tool for the Linux kernel. This initial version provides: - CLI structure using clap with derive macros - Data structures for storing conflict resolutions: - DiffHunk: Individual diff hunks with line information - FileResolution: Per-file conflict resolution data - MergeResolution: Per-commit resolution with metadata - ResolutionStore: JSON-based persistence - The 'learn' command that: - Walks merge commit history (optionally filtered by range) - Identifies files modified in both parent branches - Extracts actual conflict resolutions (not trivial merges) - Stores the ours/theirs/resolution diffs for each file - Tracks commits to avoid duplicate processing The tool is designed to build a database of historical conflict resolutions that can later be used for RAG-based similarity search to assist with future merge conflicts. Signed-off-by: Sasha Levin --- tools/llminus/.gitignore | 1 + tools/llminus/Cargo.toml | 18 + tools/llminus/src/main.rs | 693 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 712 insertions(+) create mode 100644 tools/llminus/.gitignore create mode 100644 tools/llminus/Cargo.toml create mode 100644 tools/llminus/src/main.rs diff --git a/tools/llminus/.gitignore b/tools/llminus/.gitignore new file mode 100644 index 0000000000000..b83d22266ac8a --- /dev/null +++ b/tools/llminus/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/tools/llminus/Cargo.toml b/tools/llminus/Cargo.toml new file mode 100644 index 0000000000000..bdb42561a0565 --- /dev/null +++ b/tools/llminus/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name =3D "llminus" +version =3D "0.1.0" +edition =3D "2024" +authors =3D ["Sasha Levin "] +description =3D "LLM-powered git conflict resolution tool for the Linux ke= rnel" +license =3D "GPL-2.0" +repository =3D "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/l= inux.git" + +[dependencies] +anyhow =3D "1" +clap =3D { version =3D "4", features =3D ["derive"] } +rayon =3D "1" +serde =3D { version =3D "1", features =3D ["derive"] } +serde_json =3D "1" + +[dev-dependencies] +tempfile =3D "3" diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs new file mode 100644 index 0000000000000..1c61836cc93f7 --- /dev/null +++ b/tools/llminus/src/main.rs @@ -0,0 +1,693 @@ +//! llminus - LLM-powered git conflict resolution tool + +use anyhow::{bail, Context, Result}; +use clap::{Parser, Subcommand}; +use rayon::prelude::*; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::path::Path; +use std::process::Command; +use std::sync::atomic::{AtomicUsize, Ordering}; + +const STORE_PATH: &str =3D ".llminus-resolutions.json"; + +#[derive(Parser)] +#[command(name =3D "llminus")] +#[command(about =3D "LLM-powered git conflict resolution tool")] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Learn from historical merge conflict resolutions + Learn { + /// Git revision range (e.g., "v6.0..v6.1"). If not specified, lea= rns from entire history. + range: Option, + }, +} + +/// A single diff hunk representing a change region +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DiffHunk { + /// Starting line in the original file + pub start_line: u32, + /// Number of lines in original + pub original_count: u32, + /// Number of lines in new version + pub new_count: u32, + /// The actual diff content (unified diff format lines) + pub content: String, +} + +/// A single file's conflict resolution within a merge +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileResolution { + pub file_path: String, + pub file_type: String, // Extension: "c", "h", "rs", etc. + pub subsystem: String, // Extracted from path: "drivers/gpu" -> "= gpu" + + /// Changes from base to ours (what our branch did) + pub ours_diff: Vec, + /// Changes from base to theirs (what their branch did) + pub theirs_diff: Vec, + /// The final resolution diff (base to merge result) + pub resolution_diff: Vec, +} + +/// Format a section of diff hunks with a title header +fn format_hunk_section(title: &str, hunks: &[DiffHunk]) -> String { + if hunks.is_empty() { + return String::new(); + } + let mut text =3D format!("=3D=3D=3D {} =3D=3D=3D\n", title); + for h in hunks { + text.push_str(&h.content); + text.push('\n'); + } + text.push('\n'); + text +} + +impl FileResolution { + /// Generate embedding text for this file's resolution + pub fn to_embedding_text(&self) -> String { + format!( + "File: {}\n\n{}{}{}", + self.file_path, + format_hunk_section("OURS", &self.ours_diff), + format_hunk_section("THEIRS", &self.theirs_diff), + format_hunk_section("RESOLUTION", &self.resolution_diff), + ) + } +} + +/// A merge commit's conflict resolution (may contain multiple files) +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MergeResolution { + pub commit_hash: String, + pub commit_summary: String, + pub commit_date: String, // ISO format + pub author: String, + + /// All files that required manual conflict resolution in this merge + pub files: Vec, + + /// 384-dimensional embedding vector (BGE-small model) for the entire = merge + #[serde(skip_serializing_if =3D "Option::is_none")] + pub embedding: Option>, +} + +impl MergeResolution { + /// Generate embedding text from all file resolutions + pub fn to_embedding_text(&self) -> String { + let mut text =3D format!("Merge: {}\n{}\n\n", self.commit_hash, se= lf.commit_summary); + for file in &self.files { + text.push_str(&file.to_embedding_text()); + text.push_str("\n---\n\n"); + } + text + } +} + +/// Collection of all learned resolutions +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct ResolutionStore { + pub version: u32, + pub resolutions: Vec, +} + +impl ResolutionStore { + pub fn load(path: &Path) -> Result { + if path.exists() { + let content =3D std::fs::read_to_string(path)?; + Ok(serde_json::from_str(&content)?) + } else { + Ok(Self { version: 2, resolutions: Vec::new() }) + } + } + + pub fn save(&self, path: &Path) -> Result<()> { + // Use compact JSON for faster serialization (use jq to pretty-pri= nt if needed) + let content =3D serde_json::to_string(self)?; + std::fs::write(path, content)?; + Ok(()) + } +} + +/// Run a git command and return stdout +fn git(args: &[&str]) -> Result { + let output =3D Command::new("git") + .args(args) + .output() + .context("Failed to run git")?; + + if !output.status.success() { + let stderr =3D String::from_utf8_lossy(&output.stderr); + bail!("git {} failed: {}", args.join(" "), stderr); + } + + Ok(String::from_utf8_lossy(&output.stdout).to_string()) +} + +/// Run a git command, return stdout, allow failure +fn git_allow_fail(args: &[&str]) -> Option { + Command::new("git") + .args(args) + .output() + .ok() + .filter(|o| o.status.success()) + .map(|o| String::from_utf8_lossy(&o.stdout).to_string()) +} + +/// Check we're in a git repository +fn check_repo() -> Result<()> { + git(&["rev-parse", "--git-dir"])?; + Ok(()) +} + +/// Get merge commits in range (or all history) +fn get_merge_commits(range: Option<&str>) -> Result> { + let args: Vec<&str> =3D match range { + Some(r) =3D> vec!["log", "--merges", "--format=3D%H", r], + None =3D> vec!["log", "--merges", "--format=3D%H"], + }; + + let output =3D git(&args)?; + Ok(output.lines().map(|s| s.to_string()).collect()) +} + +/// Metadata extracted from a git commit +struct CommitMetadata { + summary: String, + date: String, + author: String, +} + +/// Get commit metadata +fn get_commit_metadata(hash: &str) -> CommitMetadata { + let format =3D git_allow_fail(&["log", "-1", "--format=3D%s%n%aI%n%an = <%ae>", hash]) + .unwrap_or_default(); + let mut lines =3D format.lines(); + CommitMetadata { + summary: lines.next().unwrap_or_default().to_string(), + date: lines.next().unwrap_or_default().to_string(), + author: lines.next().unwrap_or_default().to_string(), + } +} + +/// Get parent commits of a merge +fn get_parents(hash: &str) -> Result> { + let output =3D git(&["log", "-1", "--format=3D%P", hash])?; + Ok(output.split_whitespace().map(|s| s.to_string()).collect()) +} + +/// Get merge base between two commits +fn get_merge_base(commit1: &str, commit2: &str) -> Option { + git_allow_fail(&["merge-base", commit1, commit2]) + .map(|s| s.trim().to_string()) +} + +/// Extract file type from path +fn get_file_type(path: &str) -> String { + Path::new(path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_string() +} + +/// Extract subsystem from path (first or second directory component) +fn get_subsystem(path: &str) -> String { + let parts: Vec<&str> =3D path.split('/').collect(); + match parts.first() { + Some(&"drivers") | Some(&"fs") | Some(&"net") | Some(&"arch") | So= me(&"sound") =3D> { + parts.get(1).unwrap_or(&"").to_string() + } + Some(first) =3D> first.to_string(), + None =3D> String::new(), + } +} + +/// Get unified diff between two commits for a specific file +fn get_diff(from: &str, to: &str, file: &str) -> Option { + git_allow_fail(&["diff", "-U3", from, to, "--", file]) +} + +/// Get file content at a specific commit +fn get_file_at_commit(commit: &str, path: &str) -> Option { + git_allow_fail(&["show", &format!("{}:{}", commit, path)]) +} + +/// Parse unified diff into hunks +fn parse_diff_hunks(diff: &str) -> Vec { + let mut hunks =3D Vec::new(); + let mut current_hunk: Option<(u32, u32, u32, Vec)> =3D None; + + for line in diff.lines() { + if line.starts_with("@@") { + // Save previous hunk + if let Some((start, orig_count, new_count, lines)) =3D current= _hunk.take() { + hunks.push(DiffHunk { + start_line: start, + original_count: orig_count, + new_count: new_count, + content: lines.join("\n"), + }); + } + + // Parse hunk header: @@ -start,count +start,count @@ + if let Some(header) =3D parse_hunk_header(line) { + current_hunk =3D Some((header.0, header.1, header.2, vec![= line.to_string()])); + } + } else if current_hunk.is_some() && (line.starts_with('+') || line= .starts_with('-') || line.starts_with(' ')) { + if let Some((_, _, _, ref mut lines)) =3D current_hunk { + lines.push(line.to_string()); + } + } + } + + // Save last hunk + if let Some((start, orig_count, new_count, lines)) =3D current_hunk { + hunks.push(DiffHunk { + start_line: start, + original_count: orig_count, + new_count: new_count, + content: lines.join("\n"), + }); + } + + hunks +} + +/// Parse a hunk header like "@@ -10,5 +10,7 @@" -> (start, orig_count, ne= w_count) +fn parse_hunk_header(line: &str) -> Option<(u32, u32, u32)> { + let line =3D line.trim_start_matches("@@ "); + let parts: Vec<&str> =3D line.split(' ').collect(); + if parts.len() < 2 { + return None; + } + + let parse_range =3D |s: &str| -> (u32, u32) { + let s =3D s.trim_start_matches(['-', '+']); + if let Some((start, count)) =3D s.split_once(',') { + (start.parse().unwrap_or(1), count.parse().unwrap_or(1)) + } else { + (s.parse().unwrap_or(1), 1) + } + }; + + let (orig_start, orig_count) =3D parse_range(parts[0]); + let (_, new_count) =3D parse_range(parts[1]); + + Some((orig_start, orig_count, new_count)) +} + +/// Find files modified in both branches +fn find_modified_in_both(parent1: &str, parent2: &str, base: &str) -> Resu= lt> { + let changed1 =3D git_allow_fail(&["diff", "--name-only", base, parent1= ]) + .unwrap_or_default(); + let changed2 =3D git_allow_fail(&["diff", "--name-only", base, parent2= ]) + .unwrap_or_default(); + + let files1: HashSet<_> =3D changed1.lines().collect(); + let files2: HashSet<_> =3D changed2.lines().collect(); + + Ok(files1.intersection(&files2).map(|s| s.to_string()).collect()) +} + +/// Extract conflict resolutions from a merge commit +/// Returns None if no manual conflict resolution was needed +fn extract_resolution(hash: &str) -> Result> { + let parents =3D get_parents(hash)?; + if parents.len() < 2 { + return Ok(None); + } + + let parent1 =3D &parents[0]; + let parent2 =3D &parents[1]; + + let base =3D match get_merge_base(parent1, parent2) { + Some(b) =3D> b, + None =3D> return Ok(None), + }; + + let meta =3D get_commit_metadata(hash); + let modified =3D find_modified_in_both(parent1, parent2, &base)?; + + let mut files =3D Vec::new(); + + for file_path in modified { + // Get diffs: base->ours, base->theirs, base->resolution + let ours_diff_raw =3D get_diff(&base, parent1, &file_path); + let theirs_diff_raw =3D get_diff(&base, parent2, &file_path); + let resolution_diff_raw =3D get_diff(&base, hash, &file_path); + + // Parse into hunks + let ours_hunks =3D ours_diff_raw.as_ref().map(|d| parse_diff_hunks= (d)).unwrap_or_default(); + let theirs_hunks =3D theirs_diff_raw.as_ref().map(|d| parse_diff_h= unks(d)).unwrap_or_default(); + let resolution_hunks =3D resolution_diff_raw.as_ref().map(|d| pars= e_diff_hunks(d)).unwrap_or_default(); + + // Skip if no actual changes + if ours_hunks.is_empty() && theirs_hunks.is_empty() { + continue; + } + + // Skip if ours =3D=3D theirs (no real conflict) + if ours_diff_raw =3D=3D theirs_diff_raw { + continue; + } + + // Only keep if resolution differs from BOTH parents (manual merge= required) + let ours_content =3D get_file_at_commit(parent1, &file_path); + let theirs_content =3D get_file_at_commit(parent2, &file_path); + let resolution_content =3D get_file_at_commit(hash, &file_path); + + if resolution_content =3D=3D ours_content || resolution_content = =3D=3D theirs_content { + continue; // Trivial resolution, no manual merge needed + } + + files.push(FileResolution { + file_path: file_path.clone(), + file_type: get_file_type(&file_path), + subsystem: get_subsystem(&file_path), + ours_diff: ours_hunks, + theirs_diff: theirs_hunks, + resolution_diff: resolution_hunks, + }); + } + + // Only return if there were actual conflicts + if files.is_empty() { + return Ok(None); + } + + Ok(Some(MergeResolution { + commit_hash: hash.to_string(), + commit_summary: meta.summary, + commit_date: meta.date, + author: meta.author, + files, + embedding: None, + })) +} + +fn learn(range: Option<&str>) -> Result<()> { + check_repo()?; + + let store_path =3D Path::new(STORE_PATH); + let mut store =3D ResolutionStore::load(store_path)?; + store.version =3D 3; // Upgrade version (grouped by commit) + + // Track existing commits to avoid duplicates + let existing: HashSet<_> =3D store.resolutions.iter() + .map(|r| r.commit_hash.clone()) + .collect(); + + let merge_commits =3D get_merge_commits(range)?; + let total_commits =3D merge_commits.len(); + + // Filter to only new commits + let new_commits: Vec<_> =3D merge_commits + .into_iter() + .filter(|h| !existing.contains(h)) + .collect(); + + println!("Found {} merge commits ({} new to analyze)", total_commits, = new_commits.len()); + + if new_commits.is_empty() { + println!("No new commits to process."); + return Ok(()); + } + + // Configure thread pool for I/O bound work (git subprocesses) + // Use 2x threads since we're mostly waiting on git + let num_threads =3D std::thread::available_parallelism() + .map(|n| n.get() * 2) + .unwrap_or(16); + + let pool =3D rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .context("Failed to build thread pool")?; + + println!("Using {} threads", num_threads); + + // Progress counter + let processed =3D AtomicUsize::new(0); + let total_new =3D new_commits.len(); + + // Process commits in parallel + let resolutions: Vec =3D pool.install(|| { + new_commits + .par_iter() + .filter_map(|hash| { + let count =3D processed.fetch_add(1, Ordering::Relaxed) + = 1; + if count % 100 =3D=3D 0 || count =3D=3D total_new { + eprintln!(" Progress: {}/{}", count, total_new); + } + + match extract_resolution(hash) { + Ok(Some(resolution)) =3D> Some(resolution), + Ok(None) =3D> None, + Err(e) =3D> { + eprintln!("Warning: Failed to analyze {}: {}", &ha= sh[..12], e); + None + } + } + }) + .collect() + }); + + // Aggregate results + let commits_with_conflicts =3D resolutions.len(); + let total_files: usize =3D resolutions.iter().map(|r| r.files.len()).s= um(); + + store.resolutions.extend(resolutions); + store.save(store_path)?; + + // Calculate approximate size + let json_size =3D std::fs::metadata(store_path).map(|m| m.len()).unwra= p_or(0); + let total_stored_files: usize =3D store.resolutions.iter().map(|r| r.f= iles.len()).sum(); + + println!("\nResults:"); + println!(" Merge commits analyzed: {}", total_commits); + println!(" Commits with conflicts: {}", commits_with_conflicts); + println!(" Files resolved: {}", total_files); + println!(" New commits stored: {}", commits_with_conflicts); + println!(" Total in store: {} commits, {} files", store.resolutions.l= en(), total_stored_files); + println!(" Output size: {:.2} MB", json_size as f64 / 1024.0 / 1024.0= ); + println!("\nResolutions saved to: {}", store_path.display()); + + Ok(()) +} + +fn main() -> Result<()> { + let cli =3D Cli::parse(); + + match cli.command { + Commands::Learn { range } =3D> learn(range.as_deref()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use clap::CommandFactory; + use std::fs; + use tempfile::TempDir; + + #[test] + fn verify_cli() { + Cli::command().debug_assert(); + } + + #[test] + fn test_learn_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "learn"]).unwrap(); + match cli.command { + Commands::Learn { range } =3D> assert!(range.is_none()), + } + } + + #[test] + fn test_learn_command_with_range() { + let cli =3D Cli::try_parse_from(["llminus", "learn", "v6.0..v6.1"]= ).unwrap(); + match cli.command { + Commands::Learn { range } =3D> assert_eq!(range, Some("v6.0..v= 6.1".to_string())), + } + } + + #[test] + fn test_get_file_type() { + assert_eq!(get_file_type("foo/bar.c"), "c"); + assert_eq!(get_file_type("foo/bar.rs"), "rs"); + assert_eq!(get_file_type("Makefile"), ""); + assert_eq!(get_file_type("include/linux/module.h"), "h"); + } + + #[test] + fn test_get_subsystem() { + assert_eq!(get_subsystem("drivers/gpu/drm/foo.c"), "gpu"); + assert_eq!(get_subsystem("fs/ext4/inode.c"), "ext4"); + assert_eq!(get_subsystem("kernel/sched/core.c"), "kernel"); + assert_eq!(get_subsystem("net/ipv4/tcp.c"), "ipv4"); + assert_eq!(get_subsystem("mm/memory.c"), "mm"); + } + + #[test] + fn test_parse_hunk_header() { + assert_eq!(parse_hunk_header("@@ -10,5 +10,7 @@"), Some((10, 5, 7)= )); + assert_eq!(parse_hunk_header("@@ -1 +1,2 @@"), Some((1, 1, 2))); + assert_eq!(parse_hunk_header("@@ -100,20 +105,25 @@ func"), Some((= 100, 20, 25))); + } + + #[test] + fn test_parse_diff_hunks() { + let diff =3D r#"diff --git a/file.c b/file.c +index 123..456 789 +--- a/file.c ++++ b/file.c +@@ -10,3 +10,4 @@ context + unchanged +-removed ++added ++another +"#; + let hunks =3D parse_diff_hunks(diff); + assert_eq!(hunks.len(), 1); + assert_eq!(hunks[0].start_line, 10); + assert!(hunks[0].content.contains("-removed")); + assert!(hunks[0].content.contains("+added")); + } + + fn init_test_repo() -> TempDir { + let dir =3D TempDir::new().unwrap(); + Command::new("git") + .args(["init"]) + .current_dir(dir.path()) + .output() + .unwrap(); + Command::new("git") + .args(["config", "user.email", "test@test.com"]) + .current_dir(dir.path()) + .output() + .unwrap(); + Command::new("git") + .args(["config", "user.name", "Test"]) + .current_dir(dir.path()) + .output() + .unwrap(); + dir + } + + fn create_commit(dir: &TempDir, filename: &str, content: &str, msg: &s= tr) { + fs::write(dir.path().join(filename), content).unwrap(); + Command::new("git") + .args(["add", filename]) + .current_dir(dir.path()) + .output() + .unwrap(); + Command::new("git") + .args(["commit", "-m", msg]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + fn create_branch(dir: &TempDir, name: &str) { + Command::new("git") + .args(["checkout", "-b", name]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + fn checkout(dir: &TempDir, name: &str) { + Command::new("git") + .args(["checkout", name]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + fn merge(dir: &TempDir, branch: &str, msg: &str) { + Command::new("git") + .args(["merge", "--no-ff", "-m", msg, branch]) + .current_dir(dir.path()) + .output() + .unwrap(); + } + + #[test] + fn test_resolution_store_roundtrip() { + let dir =3D TempDir::new().unwrap(); + let store_path =3D dir.path().join("resolutions.json"); + + let mut store =3D ResolutionStore { version: 3, resolutions: Vec::= new() }; + store.resolutions.push(MergeResolution { + commit_hash: "abc123".to_string(), + commit_summary: "Test merge".to_string(), + commit_date: "2024-01-15T10:00:00Z".to_string(), + author: "Test ".to_string(), + files: vec![FileResolution { + file_path: "test.c".to_string(), + file_type: "c".to_string(), + subsystem: "test".to_string(), + ours_diff: vec![DiffHunk { + start_line: 10, + original_count: 3, + new_count: 4, + content: "@@ -10,3 +10,4 @@\n-old\n+new".to_string(), + }], + theirs_diff: vec![], + resolution_diff: vec![], + }], + embedding: None, + }); + + store.save(&store_path).unwrap(); + let loaded =3D ResolutionStore::load(&store_path).unwrap(); + + assert_eq!(loaded.version, 3); + assert_eq!(loaded.resolutions.len(), 1); + assert_eq!(loaded.resolutions[0].commit_hash, "abc123"); + assert_eq!(loaded.resolutions[0].files.len(), 1); + assert_eq!(loaded.resolutions[0].files[0].file_path, "test.c"); + assert_eq!(loaded.resolutions[0].files[0].file_type, "c"); + + // Test embedding text generation for merge + let embedding =3D loaded.resolutions[0].to_embedding_text(); + assert!(embedding.contains("Merge: abc123")); + assert!(embedding.contains("File: test.c")); + assert!(embedding.contains("=3D=3D=3D OURS =3D=3D=3D")); + assert!(embedding.contains("-old")); + assert!(embedding.contains("+new")); + } + + #[test] + fn test_git_in_repo() { + let dir =3D init_test_repo(); + std::env::set_current_dir(dir.path()).unwrap(); + create_commit(&dir, "file.txt", "initial", "initial commit"); + let result =3D check_repo(); + assert!(result.is_ok()); + } + + #[test] + fn test_get_merge_commits() { + let dir =3D init_test_repo(); + std::env::set_current_dir(dir.path()).unwrap(); + + create_commit(&dir, "file.txt", "initial", "initial commit"); + create_branch(&dir, "feature"); + create_commit(&dir, "feature.txt", "feature", "feature commit"); + checkout(&dir, "master"); + create_commit(&dir, "main.txt", "main", "main commit"); + merge(&dir, "feature", "Merge feature"); + + let merges =3D get_merge_commits(None).unwrap(); + assert_eq!(merges.len(), 1); + } +} --=20 2.51.0 From nobody Sun Feb 8 02:21:23 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D75E336D4FD for ; Fri, 19 Dec 2025 18:16:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168198; cv=none; b=L5doluooj0SkwcP+nb8tlS/Bi6M8NeWhSb99FhjKqBAq3AIRqZlP8ocmI/ePigiLi1qFOfsatMB1N5stkQWoVa8FHOmWBDTYvIZXh1W9HseeV4z/+MWtnFoYOY+TSi93Knba1HMqPPFHYNeXnTf6gQruSW9c410ZFubXe11MUt0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168198; c=relaxed/simple; bh=hcNhg0u25/EPduVe+yXKj4bUmFVkwgt0UcSPBFb2SOI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=e/XaUFI1QqpWhJD5I/Iqhptz3pGzJhOZwLjktJA/lkXQcJ+fdgCVuvuTuCscJ63jHeV62ZhBLkcTxedmYiqURqs/IQRHNxUbNb2Amfh2rPs2p9Zgu1ogfz9AKXO2h9VH0iitMW0ons7wK/+wOvdf/bFLtSsTaP+qPKXWnm1u6PA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=LSyt1bJt; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="LSyt1bJt" Received: by smtp.kernel.org (Postfix) with ESMTPSA id EB518C19421; Fri, 19 Dec 2025 18:16:37 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1766168198; bh=hcNhg0u25/EPduVe+yXKj4bUmFVkwgt0UcSPBFb2SOI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LSyt1bJtJK3ewDK4J55Rbc9ekK2Um4MZngwPsbkA18qPR3v+EVBtrSk1/wTDBd6ro OeIPte5PHTviWGBqiMycaat/Gr0vYLM6sUgEvgS/nRN4VJuKaDJjmjtFktGI50VRT5 CVzoH+D9RlhhA2lbvXTssQnnzEC3ZweH7gHqyipnzbOjrQegpaZQQCWJJqW9e6MELE 1oLp6xuaDPSTgW3jl1Dn+88qv84cYGCC0yIaUvEcnRtrM/aBfEgXMgiJ0cjILXZclx 69IOEqco/BBlhLeKWk5ufBqcKbaeV6irro4IzUuh69BWihJQIIJeTizujpO38cSH56 hpzH01B6dwU0g== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC 2/5] LLMinus: Add vectorize command with fastembed Date: Fri, 19 Dec 2025 13:16:26 -0500 Message-ID: <20251219181629.1123823-3-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20251219181629.1123823-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the 'vectorize' command that generates embeddings for stored conflict resolutions using the BGE-small-en-v1.5 model via fastembed. Key features: - Uses fastembed v5 for local embedding generation - BGE-small model produces 384-dimensional vectors - Batch processing with configurable batch size (-b flag) - Incremental saves after each batch for crash recovery - Skips resolutions that already have embeddings - Progress reporting during vectorization This enables RAG-based similarity search for finding historical conflict resolutions that are similar to current merge conflicts. Also adds: - cosine_similarity() function for vector comparison - init_embedding_model() helper for model initialization - Tests for vectorize command parsing and cosine_similarity Signed-off-by: Sasha Levin --- tools/llminus/Cargo.toml | 1 + tools/llminus/src/main.rs | 157 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/tools/llminus/Cargo.toml b/tools/llminus/Cargo.toml index bdb42561a0565..86740174de598 100644 --- a/tools/llminus/Cargo.toml +++ b/tools/llminus/Cargo.toml @@ -10,6 +10,7 @@ repository =3D "https://git.kernel.org/pub/scm/linux/kern= el/git/torvalds/linux.git [dependencies] anyhow =3D "1" clap =3D { version =3D "4", features =3D ["derive"] } +fastembed =3D "5" rayon =3D "1" serde =3D { version =3D "1", features =3D ["derive"] } serde_json =3D "1" diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index 1c61836cc93f7..32a578030b0e3 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -2,6 +2,7 @@ =20 use anyhow::{bail, Context, Result}; use clap::{Parser, Subcommand}; +use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use std::collections::HashSet; @@ -26,6 +27,12 @@ enum Commands { /// Git revision range (e.g., "v6.0..v6.1"). If not specified, lea= rns from entire history. range: Option, }, + /// Generate embeddings for stored resolutions (for RAG similarity sea= rch) + Vectorize { + /// Batch size for embedding generation (default: 64) + #[arg(short, long, default_value =3D "64")] + batch_size: usize, + }, } =20 /// A single diff hunk representing a change region @@ -483,11 +490,118 @@ fn learn(range: Option<&str>) -> Result<()> { Ok(()) } =20 +/// Compute cosine similarity between two vectors +fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() !=3D b.len() || a.is_empty() { + return 0.0; + } + + let dot: f32 =3D a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); + let norm_a: f32 =3D a.iter().map(|x| x * x).sum::().sqrt(); + let norm_b: f32 =3D b.iter().map(|x| x * x).sum::().sqrt(); + + if norm_a =3D=3D 0.0 || norm_b =3D=3D 0.0 { + return 0.0; + } + + dot / (norm_a * norm_b) +} + +/// Initialize the BGE-small embedding model +fn init_embedding_model() -> Result { + TextEmbedding::try_new( + InitOptions::new(EmbeddingModel::BGESmallENV15) + .with_show_download_progress(true), + ).context("Failed to initialize embedding model") +} + +fn vectorize(batch_size: usize) -> Result<()> { + let store_path =3D Path::new(STORE_PATH); + + if !store_path.exists() { + bail!("No resolutions found. Run 'llminus learn' first."); + } + + let mut store =3D ResolutionStore::load(store_path)?; + + // Count how many need embeddings + let need_embedding: Vec =3D store + .resolutions + .iter() + .enumerate() + .filter(|(_, r)| r.embedding.is_none()) + .map(|(i, _)| i) + .collect(); + + if need_embedding.is_empty() { + println!("All {} resolutions already have embeddings.", store.reso= lutions.len()); + return Ok(()); + } + + println!("Found {} resolutions needing embeddings", need_embedding.len= ()); + println!("Initializing embedding model (BGE-small-en, ~33MB download o= n first run)..."); + + // Initialize the embedding model + let mut model =3D init_embedding_model()?; + + println!("Model loaded. Generating embeddings...\n"); + + // Process in batches + let total_batches =3D (need_embedding.len() + batch_size - 1) / batch_= size; + + for (batch_num, chunk) in need_embedding.chunks(batch_size).enumerate(= ) { + // Collect texts for this batch + let texts: Vec =3D chunk + .iter() + .map(|&i| store.resolutions[i].to_embedding_text()) + .collect(); + + // Generate embeddings + let embeddings =3D model + .embed(texts, None) + .context("Failed to generate embeddings")?; + + // Assign embeddings back to resolutions + for (j, &idx) in chunk.iter().enumerate() { + store.resolutions[idx].embedding =3D Some(embeddings[j].clone(= )); + } + + // Progress report + let done =3D (batch_num + 1) * batch_size.min(chunk.len()); + let pct =3D (done as f64 / need_embedding.len() as f64 * 100.0).mi= n(100.0); + println!( + " Batch {}/{}: {:.1}% ({}/{})", + batch_num + 1, + total_batches, + pct, + done.min(need_embedding.len()), + need_embedding.len() + ); + + // Save after each batch (incremental progress) + store.save(store_path)?; + } + + // Final stats + let json_size =3D std::fs::metadata(store_path).map(|m| m.len()).unwra= p_or(0); + let with_embeddings =3D store.resolutions.iter().filter(|r| r.embeddin= g.is_some()).count(); + + println!("\nResults:"); + println!(" Total resolutions: {}", store.resolutions.len()); + println!(" With embeddings: {}", with_embeddings); + println!(" Embedding dimensions: 384"); + println!(" Output size: {:.2} MB", json_size as f64 / 1024.0 / 1024.0= ); + println!("\nEmbeddings saved to: {}", store_path.display()); + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 match cli.command { Commands::Learn { range } =3D> learn(range.as_deref()), + Commands::Vectorize { batch_size } =3D> vectorize(batch_size), } } =20 @@ -508,6 +622,7 @@ fn test_learn_command_parses() { let cli =3D Cli::try_parse_from(["llminus", "learn"]).unwrap(); match cli.command { Commands::Learn { range } =3D> assert!(range.is_none()), + _ =3D> panic!("Expected Learn command"), } } =20 @@ -516,9 +631,51 @@ fn test_learn_command_with_range() { let cli =3D Cli::try_parse_from(["llminus", "learn", "v6.0..v6.1"]= ).unwrap(); match cli.command { Commands::Learn { range } =3D> assert_eq!(range, Some("v6.0..v= 6.1".to_string())), + _ =3D> panic!("Expected Learn command"), } } =20 + #[test] + fn test_vectorize_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "vectorize"]).unwrap(); + match cli.command { + Commands::Vectorize { batch_size } =3D> assert_eq!(batch_size,= 64), + _ =3D> panic!("Expected Vectorize command"), + } + } + + #[test] + fn test_vectorize_command_with_batch_size() { + let cli =3D Cli::try_parse_from(["llminus", "vectorize", "-b", "12= 8"]).unwrap(); + match cli.command { + Commands::Vectorize { batch_size } =3D> assert_eq!(batch_size,= 128), + _ =3D> panic!("Expected Vectorize command"), + } + } + + #[test] + fn test_cosine_similarity() { + // Identical vectors should have similarity 1.0 + let a =3D vec![1.0, 0.0, 0.0]; + let b =3D vec![1.0, 0.0, 0.0]; + assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.0001); + + // Orthogonal vectors should have similarity 0.0 + let a =3D vec![1.0, 0.0, 0.0]; + let b =3D vec![0.0, 1.0, 0.0]; + assert!((cosine_similarity(&a, &b) - 0.0).abs() < 0.0001); + + // Opposite vectors should have similarity -1.0 + let a =3D vec![1.0, 0.0, 0.0]; + let b =3D vec![-1.0, 0.0, 0.0]; + assert!((cosine_similarity(&a, &b) - (-1.0)).abs() < 0.0001); + + // Different length vectors return 0 + let a =3D vec![1.0, 0.0]; + let b =3D vec![1.0, 0.0, 0.0]; + assert_eq!(cosine_similarity(&a, &b), 0.0); + } + #[test] fn test_get_file_type() { assert_eq!(get_file_type("foo/bar.c"), "c"); --=20 2.51.0 From nobody Sun Feb 8 02:21:23 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AAE9136D510 for ; Fri, 19 Dec 2025 18:16:39 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168199; cv=none; b=s+90FHYoLwV3IXDoyZvaPHL0SEfm/uaHlGzAw43Sgh4KvQrDDoiq5iCHb2e+7/OZnD5Y8qCzzCRCKr9SElMzzFoPwATuiLIJN2ELdbgU+jrAWe1ZKHjC18vX14o7WYuVDIPiiNg7ObjvRsP0yRFQO71rrPO2aSRbboXLv2CRypI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168199; c=relaxed/simple; bh=dABhBs3dlbEFE//IBJcIrzNJ3GkGdZ/MIprGDkWl7UY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=IL5S2nrkRGHD9ESGQPTI4FAbIqpsLmNQyS2pwolpFsRoFc8VrWVJ8ArYgLyjpS4ukOD4arZdtPfZJOCWgkI505NM6Q0MwBbrUk7mzUmFuaiWOhzoK5V4lnVmKf2UVZQ02etKAbVgSFJWYpTZYqeZbEkFE7XdxaANk9S4MuZ8lRw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Go/tMDqw; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Go/tMDqw" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C2663C16AAE; Fri, 19 Dec 2025 18:16:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1766168199; bh=dABhBs3dlbEFE//IBJcIrzNJ3GkGdZ/MIprGDkWl7UY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Go/tMDqw/rs3ShikchclrwxVWuz9rcl5+op9PKHqS4dmQJDRCGBaf95GGrvLi1HIc vyFvodx9wK8rgcFgc5IADFVuzBxr3M8bUXyCaOqJTWs/OVk5Ucya7VceAjb0sWESmM qpa3HuocohsE/1ncZZcy7aNZO4r3yvmrMGw5gL4WZsWOya1jsVL9dDvvrNrqgtsK9L Rs6e0HuZR7rOUMjY0Oq0DMHMeFFqzM4lfhSqPP2OPmOOtraqFdUiE86GEyY8KCYgG9 D3kKHxC1gFbYfqmY1iQbMLHp+cWLYEFKAp/PaB29DpoagEYeppobW1kTYNiSpgr050 8Bevcnj84ZUtw== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC 3/5] LLMinus: Add find command for similarity search Date: Fri, 19 Dec 2025 13:16:27 -0500 Message-ID: <20251219181629.1123823-4-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20251219181629.1123823-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the 'find' command that searches for historical conflict resolutions similar to the current merge conflicts using vector similarity. Key features: - Detects current conflicts via git diff --diff-filter=3DU - Parses conflict markers including diff3 style (with base content) - Generates embeddings for current conflicts - Computes cosine similarity against stored resolutions - Displays top N most similar historical resolutions New functionality: - ConflictFile struct for representing active conflicts - State machine parser for conflict markers (<<<<<<<, =3D=3D=3D=3D=3D=3D=3D= , >>>>>>>) - Support for diff3 style markers (||||||| for base content) - find_similar_resolutions() for core search logic - Configurable number of results via positional argument This enables developers to quickly find relevant examples of how similar conflicts were resolved in the past. Signed-off-by: Sasha Levin --- tools/llminus/src/main.rs | 327 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index 32a578030b0e3..1a045fa3174ea 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -33,6 +33,12 @@ enum Commands { #[arg(short, long, default_value =3D "64")] batch_size: usize, }, + /// Find similar historical conflict resolutions for current conflicts + Find { + /// Number of similar resolutions to show (default: 1) + #[arg(default_value =3D "1")] + n: usize, + }, } =20 /// A single diff hunk representing a change region @@ -596,12 +602,244 @@ fn vectorize(batch_size: usize) -> Result<()> { Ok(()) } =20 +/// A file with active conflict markers +#[derive(Debug)] +struct ConflictFile { + path: String, + ours_content: String, + theirs_content: String, + base_content: Option, +} + +impl ConflictFile { + /// Generate embedding text for this conflict + fn to_embedding_text(&self) -> String { + let mut text =3D format!("File: {}\n\n", self.path); + + text.push_str("=3D=3D=3D OURS =3D=3D=3D\n"); + text.push_str(&self.ours_content); + text.push_str("\n\n"); + + text.push_str("=3D=3D=3D THEIRS =3D=3D=3D\n"); + text.push_str(&self.theirs_content); + text.push('\n'); + + if let Some(ref base) =3D self.base_content { + text.push_str("\n=3D=3D=3D BASE =3D=3D=3D\n"); + text.push_str(base); + text.push('\n'); + } + + text + } +} + +/// Get list of files with unmerged conflicts +fn get_conflicted_files() -> Result> { + // git diff --name-only --diff-filter=3DU shows unmerged files + let output =3D git(&["diff", "--name-only", "--diff-filter=3DU"])?; + Ok(output.lines().map(|s| s.to_string()).filter(|s| !s.is_empty()).col= lect()) +} + +/// State machine for parsing conflict markers +enum ConflictParseState { + Outside, + InOurs, + InBase, + InTheirs, +} + +/// Append a line to a string, adding newline separator if non-empty +fn append_line(s: &mut String, line: &str) { + if !s.is_empty() { + s.push('\n'); + } + s.push_str(line); +} + +/// Parse conflict markers from a file and extract ours/theirs/base content +fn parse_conflict_file(path: &str) -> Result> { + let content =3D std::fs::read_to_string(path) + .with_context(|| format!("Failed to read {}", path))?; + + let mut conflicts =3D Vec::new(); + let mut state =3D ConflictParseState::Outside; + let mut current_ours =3D String::new(); + let mut current_theirs =3D String::new(); + let mut current_base: Option =3D None; + + for line in content.lines() { + if line.starts_with("<<<<<<<") { + state =3D ConflictParseState::InOurs; + current_ours.clear(); + current_theirs.clear(); + current_base =3D None; + } else if line.starts_with("|||||||") { + // diff3 style - base content follows + state =3D ConflictParseState::InBase; + current_base =3D Some(String::new()); + } else if line.starts_with("=3D=3D=3D=3D=3D=3D=3D") { + state =3D ConflictParseState::InTheirs; + } else if line.starts_with(">>>>>>>") { + // End of conflict block - save it + conflicts.push(ConflictFile { + path: path.to_string(), + ours_content: std::mem::take(&mut current_ours), + theirs_content: std::mem::take(&mut current_theirs), + base_content: current_base.take(), + }); + state =3D ConflictParseState::Outside; + } else { + match state { + ConflictParseState::InOurs =3D> append_line(&mut current_o= urs, line), + ConflictParseState::InBase =3D> { + if let Some(ref mut base) =3D current_base { + append_line(base, line); + } + } + ConflictParseState::InTheirs =3D> append_line(&mut current= _theirs, line), + ConflictParseState::Outside =3D> {} + } + } + } + + Ok(conflicts) +} + +/// Result of a similarity search +struct SimilarResolution { + resolution: MergeResolution, + similarity: f32, +} + +/// Find similar resolutions (shared logic for find and resolve) +fn find_similar_resolutions(n: usize) -> Result<(Vec, Vec)> { + check_repo()?; + + let store_path =3D Path::new(STORE_PATH); + if !store_path.exists() { + bail!("No resolutions database found. Run 'llminus learn' first."); + } + + // Find current conflicts + let conflict_paths =3D get_conflicted_files()?; + if conflict_paths.is_empty() { + bail!("No conflicts detected. Run this command when you have activ= e merge conflicts."); + } + + // Parse all conflict regions + let mut all_conflicts =3D Vec::new(); + for path in &conflict_paths { + if let Ok(conflicts) =3D parse_conflict_file(path) { + all_conflicts.extend(conflicts); + } + } + + if all_conflicts.is_empty() { + bail!("Could not parse any conflict markers from the conflicted fi= les."); + } + + // Load the resolution store + let store =3D ResolutionStore::load(store_path)?; + let with_embeddings: Vec<_> =3D store.resolutions.iter() + .filter(|r| r.embedding.is_some()) + .collect(); + + if with_embeddings.is_empty() { + bail!("No embeddings in database. Run 'llminus vectorize' first."); + } + + // Initialize embedding model + let mut model =3D init_embedding_model()?; + + // Generate embedding for current conflicts + let conflict_text: String =3D all_conflicts.iter() + .map(|c| c.to_embedding_text()) + .collect::>() + .join("\n---\n\n"); + + let query_embeddings =3D model + .embed(vec![conflict_text], None) + .context("Failed to generate embedding for current conflict")?; + let query_embedding =3D &query_embeddings[0]; + + // Compute similarities and take top N (clone resolutions to own them) + let mut similarities: Vec<_> =3D with_embeddings.iter() + .map(|r| { + let sim =3D cosine_similarity(query_embedding, r.embedding.as_= ref().unwrap()); + (r, sim) + }) + .collect(); + + similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::= Ordering::Equal)); + + let top_n: Vec =3D similarities.into_iter() + .take(n) + .map(|(r, sim)| SimilarResolution { + resolution: (*r).clone(), + similarity: sim, + }) + .collect(); + + Ok((all_conflicts, top_n)) +} + +fn find(n: usize) -> Result<()> { + // Use find_similar_resolutions for core search logic + let (_conflicts, top_n) =3D find_similar_resolutions(n)?; + + // Display results + println!("\n{}", "=3D".repeat(80)); + println!("Top {} similar historical conflict resolution(s):", top_n.le= n()); + println!("{}", "=3D".repeat(80)); + + for (i, result) in top_n.iter().enumerate() { + let r =3D &result.resolution; + println!("\n{}. [similarity: {:.4}]", i + 1, result.similarity); + println!(" Commit: {}", r.commit_hash); + println!(" Summary: {}", r.commit_summary); + println!(" Author: {}", r.author); + println!(" Date: {}", r.commit_date); + println!(" Files ({}):", r.files.len()); + for file in &r.files { + println!(" - {} ({})", file.file_path, file.subsystem); + } + + // Show the resolution diffs for each file + println!("\n Resolution details:"); + for file in &r.files { + println!(" --- {} ---", file.file_path); + if !file.resolution_diff.is_empty() { + for hunk in &file.resolution_diff { + // Indent and print the diff + for line in hunk.content.lines() { + println!(" {}", line); + } + } + } else { + println!(" (no diff hunks recorded)"); + } + } + println!(); + } + + // Provide git show command for easy access + if let Some(top) =3D top_n.first() { + println!("{}", "-".repeat(80)); + println!("To see the full commit:"); + println!(" git show {}", top.resolution.commit_hash); + } + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 match cli.command { Commands::Learn { range } =3D> learn(range.as_deref()), Commands::Vectorize { batch_size } =3D> vectorize(batch_size), + Commands::Find { n } =3D> find(n), } } =20 @@ -653,6 +891,24 @@ fn test_vectorize_command_with_batch_size() { } } =20 + #[test] + fn test_find_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "find"]).unwrap(); + match cli.command { + Commands::Find { n } =3D> assert_eq!(n, 1), + _ =3D> panic!("Expected Find command"), + } + } + + #[test] + fn test_find_command_with_n() { + let cli =3D Cli::try_parse_from(["llminus", "find", "5"]).unwrap(); + match cli.command { + Commands::Find { n } =3D> assert_eq!(n, 5), + _ =3D> panic!("Expected Find command"), + } + } + #[test] fn test_cosine_similarity() { // Identical vectors should have similarity 1.0 @@ -847,4 +1103,75 @@ fn test_get_merge_commits() { let merges =3D get_merge_commits(None).unwrap(); assert_eq!(merges.len(), 1); } + + #[test] + fn test_parse_conflict_markers() { + let dir =3D TempDir::new().unwrap(); + let conflict_file =3D dir.path().join("conflict.c"); + let content =3D r#"int main() { +<<<<<<< HEAD + printf("ours"); +=3D=3D=3D=3D=3D=3D=3D + printf("theirs"); +>>>>>>> feature + return 0; +} +"#; + fs::write(&conflict_file, content).unwrap(); + + let conflicts =3D parse_conflict_file(conflict_file.to_str().unwra= p()).unwrap(); + assert_eq!(conflicts.len(), 1); + assert!(conflicts[0].ours_content.contains("ours")); + assert!(conflicts[0].theirs_content.contains("theirs")); + assert!(conflicts[0].base_content.is_none()); + } + + #[test] + fn test_parse_conflict_markers_diff3() { + let dir =3D TempDir::new().unwrap(); + let conflict_file =3D dir.path().join("conflict.c"); + // diff3 style with base content + let content =3D r#"int main() { +<<<<<<< HEAD + printf("ours"); +||||||| base + printf("base"); +=3D=3D=3D=3D=3D=3D=3D + printf("theirs"); +>>>>>>> feature + return 0; +} +"#; + fs::write(&conflict_file, content).unwrap(); + + let conflicts =3D parse_conflict_file(conflict_file.to_str().unwra= p()).unwrap(); + assert_eq!(conflicts.len(), 1); + assert!(conflicts[0].ours_content.contains("ours")); + assert!(conflicts[0].theirs_content.contains("theirs")); + assert!(conflicts[0].base_content.as_ref().unwrap().contains("base= ")); + } + + #[test] + fn test_parse_multiple_conflicts() { + let dir =3D TempDir::new().unwrap(); + let conflict_file =3D dir.path().join("conflict.c"); + let content =3D r#"<<<<<<< HEAD +first ours +=3D=3D=3D=3D=3D=3D=3D +first theirs +>>>>>>> feature +middle +<<<<<<< HEAD +second ours +=3D=3D=3D=3D=3D=3D=3D +second theirs +>>>>>>> feature +"#; + fs::write(&conflict_file, content).unwrap(); + + let conflicts =3D parse_conflict_file(conflict_file.to_str().unwra= p()).unwrap(); + assert_eq!(conflicts.len(), 2); + assert!(conflicts[0].ours_content.contains("first ours")); + assert!(conflicts[1].ours_content.contains("second ours")); + } } --=20 2.51.0 From nobody Sun Feb 8 02:21:23 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7CC94376BC7 for ; Fri, 19 Dec 2025 18:16:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168200; cv=none; b=uYQpP8KdNUufrxU6BYad/FhshVRW8Wbyih87YtfhzSifjF4wYvINlu3If4wsvnys433m5IPbiCRtXkcWZO+PqwDYkJM/h2MBrwyK7QSDzvCAdCMfZkH0sp6eRYzY0diQ49SP4wPfZ9k9ypfcS28G7iIZsQ83a0zM1pBgF6nVhr4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168200; c=relaxed/simple; bh=n0qDl4KomvHcC7qPdDhQvyvThXmFNHdTdaPzur2x0Rs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CVNO8jkAAAz5wKLB/IkANJvZfoHoISPi3xrw7lHVwUlDdLUGqZf+1nJOFdQkvh4izx3CGjokng/TAVeUH/YpxuoknLSFQUQa3RwuZyQ+vx/vte6wIWQCFMhkbv9Kjn1LIi+APlEB4IRqKoMwf0hI0tq83VazjojLjQjuWeBY0SM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Ycl+brY4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Ycl+brY4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 97AF6C116B1; Fri, 19 Dec 2025 18:16:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1766168200; bh=n0qDl4KomvHcC7qPdDhQvyvThXmFNHdTdaPzur2x0Rs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Ycl+brY4miRj5aZxxbVdIyccpIVHdxgyGidqL4ggXuKozOYTvByks21Ep4vqhzs6Y dOYKtwIsayV/l9NiiGfSBWjrBO0qrSJbM43E1lu9TyKXEUmyaO5KOZASpWgSIZwMm/ xFcpIhSvMNCdVHiJNtdFu7jiGY2GiqyJSjes+ifIxo7S2DezVeIXG1nWjGuC48iob+ L+uI0NfziGsAfrQdDSUtKQT6M6MpUo9oTCuJk27EadHuf/j1275UCs2muPCOr3U346 VUjQhmXDVwe6P0cri3IDnxSPRCMbxM4HlkHf0m/SnzI4dS3FYo1JuKna1tIdN6u0kq JKz0MH3dCTLkg== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC 4/5] LLMinus: Add resolve command for LLM-assisted conflict resolution Date: Fri, 19 Dec 2025 13:16:28 -0500 Message-ID: <20251219181629.1123823-5-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20251219181629.1123823-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add the 'resolve' command that invokes an external LLM to assist with resolving current merge conflicts. This command: 1. Detects merge context: - Parses .git/MERGE_MSG for merge source (branch/tag/commit) - Identifies target branch from HEAD - Extracts merge message 2. Gathers conflict information: - Parses all conflicted files with markers - Supports both standard and diff3 style markers 3. Finds similar historical resolutions (when database available): - Uses graceful degradation if no database exists - Includes top 3 most similar historical resolutions 4. Builds a prompt that includes: - High-stakes framing about merge quality - Current conflict content (ours/theirs/base) - Similar historical resolutions with diffs - Investigation requirements (lore.kernel.org search, git history) - Resolution instructions 5. Invokes the specified command with prompt via stdin: - Supports commands like "claude" or "llm -m gpt-4" - Streams output directly to terminal The prompt design emphasizes: - Understanding before acting - Searching lore.kernel.org for maintainer guidance - Tracing git history to understand both sides - Flagging uncertainty rather than guessing Usage: llminus resolve "claude" llminus resolve "llm -m my-llm-x" Signed-off-by: Sasha Levin --- tools/llminus/src/main.rs | 477 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 477 insertions(+) diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index 1a045fa3174ea..c00f958a238f8 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -39,6 +39,11 @@ enum Commands { #[arg(default_value =3D "1")] n: usize, }, + /// Resolve current conflicts using an LLM + Resolve { + /// Command to invoke. The prompt will be passed via stdin. + command: String, + }, } =20 /// A single diff hunk representing a change region @@ -833,6 +838,423 @@ fn find(n: usize) -> Result<()> { Ok(()) } =20 +/// Context about the current merge operation +#[derive(Debug, Default)] +struct MergeContext { + /// The branch/tag/ref being merged (from MERGE_HEAD or MERGE_MSG) + merge_source: Option, + /// The target branch (HEAD) + head_branch: Option, + /// The merge message (from .git/MERGE_MSG) + merge_message: Option, +} + +/// Extract context about the current merge operation +fn get_merge_context() -> MergeContext { + let mut ctx =3D MergeContext::default(); + + // Get current branch name + ctx.head_branch =3D git_allow_fail(&["rev-parse", "--abbrev-ref", "HEA= D"]) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty() && s !=3D "HEAD"); + + // Try to read MERGE_MSG for merge context + if let Ok(merge_msg) =3D std::fs::read_to_string(".git/MERGE_MSG") { + ctx.merge_message =3D Some(merge_msg.clone()); + + // Parse merge source from MERGE_MSG + // Common formats: + // "Merge branch 'feature-branch'" + // "Merge tag 'v6.1'" + // "Merge remote-tracking branch 'origin/main'" + // "Merge commit 'abc123'" + let first_line =3D merge_msg.lines().next().unwrap_or(""); + if let Some(source) =3D parse_merge_source(first_line) { + ctx.merge_source =3D Some(source); + } + } + + // If no merge source found from MERGE_MSG, try to describe MERGE_HEAD + if ctx.merge_source.is_none() { + // Try to get a tag name for MERGE_HEAD + if let Some(tag) =3D git_allow_fail(&["describe", "--tags", "--exa= ct-match", "MERGE_HEAD"]) { + ctx.merge_source =3D Some(tag.trim().to_string()); + } else if let Some(branch) =3D git_allow_fail(&["name-rev", "--nam= e-only", "MERGE_HEAD"]) { + let branch =3D branch.trim(); + if !branch.is_empty() && branch !=3D "undefined" { + ctx.merge_source =3D Some(branch.to_string()); + } + } + } + + ctx +} + +/// Parse merge source from a merge message first line +fn parse_merge_source(line: &str) -> Option { + // "Merge branch 'feature'" -> "feature" + // "Merge tag 'v6.1'" -> "v6.1" + // "Merge remote-tracking branch 'origin/main'" -> "origin/main" + // "Merge commit 'abc123'" -> "abc123" + + let line =3D line.trim(); + + // Look for quoted source + if let Some(start) =3D line.find('\'') { + if let Some(end) =3D line[start + 1..].find('\'') { + return Some(line[start + 1..start + 1 + end].to_string()); + } + } + + // Look for "Merge X into Y" pattern without quotes + if line.starts_with("Merge ") { + let rest =3D &line[6..]; + // Skip "branch ", "tag ", "commit ", "remote-tracking branch " + let rest =3D rest + .strip_prefix("remote-tracking branch ") + .or_else(|| rest.strip_prefix("branch ")) + .or_else(|| rest.strip_prefix("tag ")) + .or_else(|| rest.strip_prefix("commit ")) + .unwrap_or(rest); + + // Take until " into " or end of line + if let Some(into_pos) =3D rest.find(" into ") { + return Some(rest[..into_pos].trim().to_string()); + } + let word =3D rest.split_whitespace().next()?; + if !word.is_empty() { + return Some(word.to_string()); + } + } + + None +} + +/// Get current conflicts from the working directory +fn get_current_conflicts() -> Result> { + check_repo()?; + + // Find current conflicts + let conflict_paths =3D get_conflicted_files()?; + if conflict_paths.is_empty() { + bail!("No conflicts detected. Run this command when you have activ= e merge conflicts."); + } + + // Parse all conflict regions + let mut all_conflicts =3D Vec::new(); + for path in &conflict_paths { + if let Ok(conflicts) =3D parse_conflict_file(path) { + all_conflicts.extend(conflicts); + } + } + + if all_conflicts.is_empty() { + bail!("Could not parse any conflict markers from the conflicted fi= les."); + } + + Ok(all_conflicts) +} + +/// Try to find similar resolutions, returns empty vec if no database or e= mbeddings +fn try_find_similar_resolutions(n: usize, conflicts: &[ConflictFile]) -> V= ec { + let store_path =3D Path::new(STORE_PATH); + if !store_path.exists() { + return Vec::new(); + } + + let store =3D match ResolutionStore::load(store_path) { + Ok(s) =3D> s, + Err(_) =3D> return Vec::new(), + }; + + let with_embeddings: Vec<_> =3D store.resolutions.iter() + .filter(|r| r.embedding.is_some()) + .collect(); + + if with_embeddings.is_empty() { + return Vec::new(); + } + + // Initialize embedding model + let mut model =3D match init_embedding_model() { + Ok(m) =3D> m, + Err(_) =3D> return Vec::new(), + }; + + // Generate embedding for current conflicts + let conflict_text: String =3D conflicts.iter() + .map(|c| c.to_embedding_text()) + .collect::>() + .join("\n---\n\n"); + + let query_embeddings =3D match model.embed(vec![conflict_text], None) { + Ok(e) =3D> e, + Err(_) =3D> return Vec::new(), + }; + let query_embedding =3D &query_embeddings[0]; + + // Compute similarities and take top N + let mut similarities: Vec<_> =3D with_embeddings.iter() + .map(|r| { + let sim =3D cosine_similarity(query_embedding, r.embedding.as_= ref().unwrap()); + (r, sim) + }) + .collect(); + + similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::= Ordering::Equal)); + + similarities.into_iter() + .take(n) + .map(|(r, sim)| SimilarResolution { + resolution: (*r).clone(), + similarity: sim, + }) + .collect() +} + +/// Build the LLM prompt for conflict resolution +fn build_resolve_prompt( + conflicts: &[ConflictFile], + similar: &[SimilarResolution], + merge_ctx: &MergeContext, +) -> String { + let mut prompt =3D String::new(); + + // Header with high-stakes framing + prompt.push_str("# Linux Kernel Merge Conflict Resolution\n\n"); + prompt.push_str("You are acting as an experienced kernel maintainer re= solving a merge conflict.\n\n"); + prompt.push_str("**Important:** Incorrect merge resolutions have histo= rically introduced subtle bugs "); + prompt.push_str("that affected millions of users and took months to di= agnose. A resolution that "); + prompt.push_str("compiles but has semantic errors is worse than no res= olution at all.\n\n"); + prompt.push_str("Take the time to fully understand both sides of the c= onflict before attempting "); + prompt.push_str("any resolution. If after investigation you're not con= fident, say so - it's "); + prompt.push_str("better to escalate to a human than to introduce a sub= tle bug.\n\n"); + + // Merge context + prompt.push_str("## Merge Context\n\n"); + if let Some(ref source) =3D merge_ctx.merge_source { + prompt.push_str(&format!("**Merging:** `{}`\n", source)); + } + if let Some(ref head) =3D merge_ctx.head_branch { + prompt.push_str(&format!("**Into:** `{}`\n", head)); + } + if let Some(ref msg) =3D merge_ctx.merge_message { + let first_line =3D msg.lines().next().unwrap_or(""); + prompt.push_str(&format!("**Merge message:** {}\n", first_line)); + } + prompt.push_str("\n"); + + // Current conflicts + prompt.push_str("## Current Conflicts\n\n"); + + for conflict in conflicts { + prompt.push_str(&format!("### File: {}\n\n", conflict.path)); + prompt.push_str("**Our version (HEAD):**\n```\n"); + prompt.push_str(&conflict.ours_content); + prompt.push_str("\n```\n\n"); + prompt.push_str("**Their version (being merged):**\n```\n"); + prompt.push_str(&conflict.theirs_content); + prompt.push_str("\n```\n\n"); + if let Some(ref base) =3D conflict.base_content { + prompt.push_str("**Base version (common ancestor):**\n```\n"); + prompt.push_str(base); + prompt.push_str("\n```\n\n"); + } + } + + // Similar historical resolutions (only if available) + if !similar.is_empty() { + prompt.push_str("## Similar Historical Resolutions\n\n"); + prompt.push_str("These conflicts were previously resolved in the L= inux kernel. Use `git show ` "); + prompt.push_str("to examine the full commit message and context - = maintainers often explain "); + prompt.push_str("their resolution rationale there.\n\n"); + + for (i, result) in similar.iter().enumerate() { + let r =3D &result.resolution; + prompt.push_str(&format!("### Historical Resolution {} (simila= rity: {:.1}%)\n\n", i + 1, result.similarity * 100.0)); + prompt.push_str(&format!("- **Commit:** `{}`\n", r.commit_hash= )); + prompt.push_str(&format!("- **Summary:** {}\n", r.commit_summa= ry)); + prompt.push_str(&format!("- **Author:** {}\n", r.author)); + prompt.push_str(&format!("- **Date:** {}\n", r.commit_date)); + prompt.push_str(&format!("- **Files:** {}\n\n", r.files.iter()= .map(|f| f.file_path.as_str()).collect::>().join(", "))); + + for file in &r.files { + prompt.push_str(&format!("#### {}\n\n", file.file_path)); + + if !file.ours_diff.is_empty() { + prompt.push_str("**Ours changed:**\n```diff\n"); + for hunk in &file.ours_diff { + prompt.push_str(&hunk.content); + prompt.push_str("\n"); + } + prompt.push_str("```\n\n"); + } + + if !file.theirs_diff.is_empty() { + prompt.push_str("**Theirs changed:**\n```diff\n"); + for hunk in &file.theirs_diff { + prompt.push_str(&hunk.content); + prompt.push_str("\n"); + } + prompt.push_str("```\n\n"); + } + + if !file.resolution_diff.is_empty() { + prompt.push_str("**Final resolution:**\n```diff\n"); + for hunk in &file.resolution_diff { + prompt.push_str(&hunk.content); + prompt.push_str("\n"); + } + prompt.push_str("```\n\n"); + } + } + } + } + + // Investigation requirement + prompt.push_str("## Investigation Required\n\n"); + prompt.push_str("Before attempting any resolution, you must conduct th= orough research. "); + prompt.push_str("Rushing to resolve without understanding is how subtl= e bugs get introduced. "); + prompt.push_str("Work through each phase below IN ORDER and document y= our findings.\n\n"); + + // Phase 1: Search lore.kernel.org + prompt.push_str("### Phase 1: Search lore.kernel.org for Maintainer Gu= idance (DO THIS FIRST)\n\n"); + prompt.push_str("**CRITICAL:** Before doing ANY other research, search= lore.kernel.org for existing guidance.\n"); + prompt.push_str("Maintainers often post merge resolution instructions = when they know conflicts will occur.\n\n"); + + if let Some(ref source) =3D merge_ctx.merge_source { + prompt.push_str(&format!("1. **Search for the merge itself:** `{}`= \n", source)); + prompt.push_str(&format!(" - URL: `https://lore.kernel.org/all/?= q=3D{}`\n", source.replace('/', "%2F"))); + } + prompt.push_str("2. **Search for conflict discussions:**\n"); + prompt.push_str(" - `\"merge conflict\"` + subsystem name\n"); + prompt.push_str(" - `\"conflicts with\"` + branch/tag name\n\n"); + + // Phase 2: Context + prompt.push_str("### Phase 2: Understand the Context\n\n"); + prompt.push_str("- **What subsystem is this?** Read the file and nearb= y files to understand its purpose.\n"); + prompt.push_str("- **Who maintains it?** Check `git log --oneline -20`= for recent authors.\n"); + prompt.push_str("- **What's the file's role?** Is it a driver, core su= bsystem, header, config?\n\n"); + + // Phase 3: Trace history + prompt.push_str("### Phase 3: Trace Each Side's History\n\n"); + prompt.push_str("**For 'ours' (HEAD):**\n"); + prompt.push_str("- Run `git log --oneline HEAD -- ` to see recen= t changes\n"); + prompt.push_str("- Find the commit that introduced our version of the = conflicted code\n"); + prompt.push_str("- Run `git show ` to read the full commit mes= sage\n\n"); + prompt.push_str("**For 'theirs' (MERGE_HEAD):**\n"); + prompt.push_str("- Run `git log --oneline MERGE_HEAD -- ` to see= their changes\n"); + prompt.push_str("- Find the commit that introduced their version\n"); + prompt.push_str("- Run `git show ` to read the full commit mes= sage\n\n"); + + // Resolution + prompt.push_str("## Resolution\n\n"); + prompt.push_str("Once you understand the conflict:\n\n"); + prompt.push_str("1. Edit the conflicted files to produce the correct m= erged result\n"); + prompt.push_str("2. Remove all conflict markers (`<<<<<<<`, `=3D=3D=3D= =3D=3D=3D=3D`, `>>>>>>>`)\n"); + prompt.push_str("3. Stage the resolved files with `git add`\n"); + prompt.push_str("4. Commit with a detailed message explaining your ana= lysis and resolution\n\n"); + + // If uncertain + prompt.push_str("## If Uncertain\n\n"); + prompt.push_str("If after investigation you're still uncertain about t= he correct resolution:\n\n"); + prompt.push_str("- Explain what you've learned and what remains unclea= r\n"); + prompt.push_str("- Describe the possible resolutions you see and their= tradeoffs\n"); + prompt.push_str("- Recommend whether a human maintainer should review\= n\n"); + prompt.push_str("It's better to flag uncertainty than to silently intr= oduce a bug.\n\n"); + + // Tools available + prompt.push_str("## Tools Available\n\n"); + prompt.push_str("You can use these to investigate:\n\n"); + prompt.push_str("```bash\n"); + if !similar.is_empty() { + prompt.push_str("# Examine historical resolution commits\n"); + for result in similar { + prompt.push_str(&format!("git show {}\n", result.resolution.co= mmit_hash)); + } + prompt.push_str("\n"); + } + prompt.push_str("# Understand merge parents\n"); + prompt.push_str("git show ^1 # ours\n"); + prompt.push_str("git show ^2 # theirs\n"); + prompt.push_str("```\n"); + + prompt +} + +fn resolve(command: &str) -> Result<()> { + use std::io::Write; + use std::process::Stdio; + + // Get merge context (what branch/tag is being merged) + let merge_ctx =3D get_merge_context(); + if let Some(ref source) =3D merge_ctx.merge_source { + println!("Merging: {}", source); + } + if let Some(ref head) =3D merge_ctx.head_branch { + println!("Into: {}", head); + } + + // Get current conflicts first + let conflicts =3D get_current_conflicts()?; + + println!("Found {} conflict(s)", conflicts.len()); + + // Try to find similar historical resolutions (gracefully handles miss= ing database) + println!("Looking for similar historical conflicts..."); + let similar =3D try_find_similar_resolutions(3, &conflicts); + + if similar.is_empty() { + println!("No historical resolution database found (run 'llminus le= arn' and 'llminus vectorize' to build one)"); + println!("Proceeding without historical examples..."); + } else { + println!("Found {} similar historical resolutions", similar.len()); + } + + // Build the prompt + println!("Building resolution prompt..."); + let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx); + + println!("Prompt size: {} bytes", prompt.len()); + println!("\nInvoking: {}", command); + println!("{}", "=3D".repeat(80)); + + // Parse command (handle arguments) + let parts: Vec<&str> =3D command.split_whitespace().collect(); + if parts.is_empty() { + bail!("Empty command specified"); + } + + let cmd =3D parts[0]; + let args =3D &parts[1..]; + + // Spawn the command + let mut child =3D Command::new(cmd) + .args(args) + .stdin(Stdio::piped()) + .spawn() + .with_context(|| format!("Failed to spawn command: {}", command))?; + + // Write prompt to stdin + if let Some(mut stdin) =3D child.stdin.take() { + stdin.write_all(prompt.as_bytes()) + .context("Failed to write prompt to command stdin")?; + } + + // Wait for completion + let status =3D child.wait().context("Failed to wait for command")?; + + println!("{}", "=3D".repeat(80)); + + if status.success() { + println!("\nCommand completed successfully."); + } else { + eprintln!("\nCommand exited with status: {}", status); + } + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 @@ -840,6 +1262,7 @@ fn main() -> Result<()> { Commands::Learn { range } =3D> learn(range.as_deref()), Commands::Vectorize { batch_size } =3D> vectorize(batch_size), Commands::Find { n } =3D> find(n), + Commands::Resolve { command } =3D> resolve(&command), } } =20 @@ -909,6 +1332,60 @@ fn test_find_command_with_n() { } } =20 + #[test] + fn test_resolve_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm"]).= unwrap(); + match cli.command { + Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= "), + _ =3D> panic!("Expected Resolve command"), + } + } + + #[test] + fn test_resolve_command_with_args() { + let cli =3D Cli::try_parse_from(["llminus", "resolve", "my-llm --m= odel fancy"]).unwrap(); + match cli.command { + Commands::Resolve { command } =3D> assert_eq!(command, "my-llm= --model fancy"), + _ =3D> panic!("Expected Resolve command"), + } + } + + #[test] + fn test_parse_merge_source() { + // Standard branch merge + assert_eq!( + parse_merge_source("Merge branch 'feature-branch'"), + Some("feature-branch".to_string()) + ); + + // Tag merge + assert_eq!( + parse_merge_source("Merge tag 'v6.1'"), + Some("v6.1".to_string()) + ); + + // Remote tracking branch + assert_eq!( + parse_merge_source("Merge remote-tracking branch 'origin/main'= "), + Some("origin/main".to_string()) + ); + + // Commit merge + assert_eq!( + parse_merge_source("Merge commit 'abc123def'"), + Some("abc123def".to_string()) + ); + + // Branch with "into" target + assert_eq!( + parse_merge_source("Merge branch 'feature' into master"), + Some("feature".to_string()) + ); + + // Non-merge line + assert_eq!(parse_merge_source("Fix bug in foo"), None); + } + #[test] fn test_cosine_similarity() { // Identical vectors should have similarity 1.0 --=20 2.51.0 From nobody Sun Feb 8 02:21:23 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5C475376BDD for ; Fri, 19 Dec 2025 18:16:41 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168201; cv=none; b=UkoN9ENllOrJJBOPqKvUvqq2wiyChiGbkb5idjVvtDVgyrnDYurKamYvdnP6pVCfMmTltX2h8FGItGjechG1nGJRQmCRX5Rv8ENNX5frpW2tt5pPB/j5b1EF6SREmykJrcdJ2q1t0UtajDf8C/8T4h46PduhigyNJHlq/62SfD4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766168201; c=relaxed/simple; bh=44kbhUIpomoSQfjsADINxq93SBDe/3k1t2bLTPw43Cs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=QMYmTsz0T1hE1zRT20pyqgPhz/PwfXdU+MH9qzMPxrGnX3oIDzYwY2ecci5+xQnNjcHSt2pcfTKu0VOOCASxZuz2gcScHkxNJ+im0W7nb7HqwhHxIEmOh25ZMNW/j0VNoY3TS1hpKBYyveHJqLWkA+h1T9jLnbot3uqA3C2nT3Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=FhHBVvlI; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="FhHBVvlI" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6C3BEC4CEF1; Fri, 19 Dec 2025 18:16:40 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1766168201; bh=44kbhUIpomoSQfjsADINxq93SBDe/3k1t2bLTPw43Cs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FhHBVvlIjq7mWk44hsTef/ZXrKM0RgaQ/4lV/INN0vX38QyYcgbScjNxGTxNTS5FH CVO1Vrao0yUILoLcbfbLqMbswe4dpd9jxWjDHb++nommuMfwnz9dGv8FPRyFeOxRgM 2PW3u6eavB/gtcnB93xft7o8RkcIF0LESo9u8lXt5A0vpiqIz2295AWZamjwO4kJhk CfC7nSioGUykakSm5W0nI2wLNW4y63tUNZ6vKcXM/a0xuUDHVOyP+ae5ClEL17fodz 3Iv+cVO1REY+nVdr+9SEQLxXSHs8p27zBVFbFBmbUk/8h+U1ZzeU0L6kGdKlzvzLjp YG3jXbv3HVQ2A== From: Sasha Levin To: tools@kernel.org Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, broonie@kernel.org, Sasha Levin Subject: [RFC 5/5] LLMinus: Add pull command for LLM-assisted kernel pull request merging Date: Fri, 19 Dec 2025 13:16:29 -0500 Message-ID: <20251219181629.1123823-6-sashal@kernel.org> X-Mailer: git-send-email 2.51.0 In-Reply-To: <20251219181629.1123823-1-sashal@kernel.org> References: <20251219181629.1123823-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a new 'llminus pull' command that automates merging kernel pull requests from lore.kernel.org with LLM-assisted conflict resolution. Usage: llminus pull [-c ] The command: - Fetches the pull request email from lore.kernel.org - Parses the email to extract the git repository URL and ref - Executes 'git pull' to fetch and merge - If conflicts occur, invokes the LLM with full context including: - The complete pull request email (so the LLM understands both the summary and any conflict resolution instructions from the maintainer) - Current conflict markers with ours/theirs/base content - Similar historical resolutions from the RAG database (if available) - The LLM resolves conflicts and writes its reasoning to .git/LLMINUS_RESOLUTION - Commits with a message containing the PR summary, resolution explanation, and a link to the original email The prompt instructs the LLM to critically evaluate any conflict resolution suggestions from the maintainer, looking for opportunities to produce cleaner or more efficient code. Adds ureq dependency for HTTP fetching from lore.kernel.org. Signed-off-by: Sasha Levin --- tools/llminus/Cargo.toml | 1 + tools/llminus/src/main.rs | 663 +++++++++++++++++++++++++++++++++++++- 2 files changed, 650 insertions(+), 14 deletions(-) diff --git a/tools/llminus/Cargo.toml b/tools/llminus/Cargo.toml index 86740174de598..af7af112a20d7 100644 --- a/tools/llminus/Cargo.toml +++ b/tools/llminus/Cargo.toml @@ -14,6 +14,7 @@ fastembed =3D "5" rayon =3D "1" serde =3D { version =3D "1", features =3D ["derive"] } serde_json =3D "1" +ureq =3D "2" =20 [dev-dependencies] tempfile =3D "3" diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs index c00f958a238f8..e65f6d0f836ec 100644 --- a/tools/llminus/src/main.rs +++ b/tools/llminus/src/main.rs @@ -44,6 +44,14 @@ enum Commands { /// Command to invoke. The prompt will be passed via stdin. command: String, }, + /// Pull a kernel patch/pull request from lore.kernel.org and merge it + Pull { + /// Message ID from lore.kernel.org (e.g., "98b74397-05bc-dbee-cab= 4-3f40d643eaac@kernel.org") + message_id: String, + /// Command to invoke for LLM assistance + #[arg(short, long, default_value =3D "llm")] + command: String, + }, } =20 /// A single diff hunk representing a change region @@ -930,6 +938,302 @@ fn parse_merge_source(line: &str) -> Option { None } =20 +/// Information parsed from a lore.kernel.org pull request email +#[derive(Debug, Default)] +#[allow(dead_code)] +struct PullRequest { + /// Message ID + message_id: String, + /// Subject line of the email + subject: String, + /// Author name and email + from: String, + /// Date of the email + date: String, + /// Git repository URL to pull from + git_url: String, + /// Git ref (tag or branch) to pull + git_ref: String, + /// The full raw email body (LLM extracts summary and conflict instruc= tions from this) + body: String, +} + +/// Fetch raw email from lore.kernel.org +fn fetch_lore_email(message_id: &str) -> Result { + // Clean up message ID (remove < > if present) + let clean_id =3D message_id + .trim_start_matches('<') + .trim_end_matches('>') + .trim(); + + let url =3D format!("https://lore.kernel.org/all/{}/raw", clean_id); + println!("Fetching: {}", url); + + let response =3D ureq::get(&url) + .call() + .with_context(|| format!("Failed to fetch {}", url))?; + + if response.status() !=3D 200 { + bail!("HTTP error {}: {}", response.status(), response.status_text= ()); + } + + response.into_string() + .context("Failed to read response body") +} + +/// Parse email headers from raw email text +fn parse_email_headers(raw: &str) -> (String, String, String, String, &str= ) { + let mut from =3D String::new(); + let mut subject =3D String::new(); + let mut date =3D String::new(); + let mut message_id =3D String::new(); + + // Find the blank line separating headers from body + let (headers_section, body) =3D raw.split_once("\n\n") + .unwrap_or((raw, "")); + + // Parse headers (handle multi-line headers) + let mut current_header =3D String::new(); + let mut current_value =3D String::new(); + + for line in headers_section.lines() { + if line.starts_with(' ') || line.starts_with('\t') { + // Continuation of previous header + current_value.push(' '); + current_value.push_str(line.trim()); + } else if let Some((name, value)) =3D line.split_once(':') { + // New header - save previous if any + if !current_header.is_empty() { + match current_header.to_lowercase().as_str() { + "from" =3D> from =3D current_value.clone(), + "subject" =3D> subject =3D current_value.clone(), + "date" =3D> date =3D current_value.clone(), + "message-id" =3D> message_id =3D current_value.clone(), + _ =3D> {} + } + } + current_header =3D name.to_string(); + current_value =3D value.trim().to_string(); + } + } + + // Don't forget last header + if !current_header.is_empty() { + match current_header.to_lowercase().as_str() { + "from" =3D> from =3D current_value, + "subject" =3D> subject =3D current_value, + "date" =3D> date =3D current_value, + "message-id" =3D> message_id =3D current_value, + _ =3D> {} + } + } + + (from, subject, date, message_id, body) +} + +/// Extract git pull URL and ref from email body +fn extract_git_info(body: &str) -> Option<(String, String)> { + // Look for patterns like: + // "git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux tags/ris= cv-for-linus-6.19-mw2" + // "https://git.kernel.org/pub/scm/linux/kernel/git/foo/bar.git branch= -name" + + for line in body.lines() { + let line =3D line.trim(); + + // Skip empty lines and common non-URL prefixes + if line.is_empty() { + continue; + } + + // Check for git:// or https:// URLs + let url_start =3D if let Some(pos) =3D line.find("git://") { + pos + } else if let Some(pos) =3D line.find("https://git.") { + pos + } else { + continue; + }; + + let url_part =3D &line[url_start..]; + + // Split into URL and ref + let parts: Vec<&str> =3D url_part.split_whitespace().collect(); + if parts.len() >=3D 2 { + let url =3D parts[0].to_string(); + let git_ref =3D parts[1].to_string(); + + // Validate it looks like a kernel git URL + if url.contains("kernel.org") || url.contains("git.") { + return Some((url, git_ref)); + } + } + } + + None +} + +/// Use LLM to extract the maintainer's summary from the email body +/// Returns None if extraction fails (caller can fall back to other method= s) +fn extract_summary_with_llm(body: &str, command: &str) -> Option { + use std::io::Write; + use std::process::Stdio; + + let prompt =3D format!(r#"Extract ONLY the technical summary from this= kernel pull request email. +The summary describes what changes are included (usually as bullet points). +Do NOT include: +- Personal messages to Linus +- Git URLs or repository information +- Merge/conflict resolution instructions +- Diffstat or file change listings +- Sign-offs or signatures + +Output ONLY the summary text, nothing else. No preamble, no explanation. + +Email body: +{} +"#, body); + + let parts: Vec<&str> =3D command.split_whitespace().collect(); + if parts.is_empty() { + return None; + } + + println!("Extracting summary from pull request..."); + + let mut child =3D match Command::new(parts[0]) + .args(&parts[1..]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() { + Ok(c) =3D> c, + Err(_) =3D> return None, + }; + + if let Some(mut stdin) =3D child.stdin.take() { + if stdin.write_all(prompt.as_bytes()).is_err() { + return None; + } + } + + let output =3D match child.wait_with_output() { + Ok(o) =3D> o, + Err(_) =3D> return None, + }; + + if !output.status.success() { + return None; + } + + let summary =3D String::from_utf8_lossy(&output.stdout).trim().to_stri= ng(); + if summary.is_empty() { + None + } else { + Some(summary) + } +} + +/// Parse a pull request email from lore.kernel.org +fn parse_pull_request(message_id: &str, raw: &str) -> Result { + let (from, subject, date, parsed_id, body) =3D parse_email_headers(raw= ); + + let (git_url, git_ref) =3D extract_git_info(body) + .ok_or_else(|| anyhow::anyhow!("Could not find git repository URL = in email"))?; + + Ok(PullRequest { + message_id: if parsed_id.is_empty() { message_id.to_string() } els= e { parsed_id }, + subject, + from, + date, + git_url, + git_ref, + body: body.to_string(), + }) +} + +/// Execute git pull and return whether there are conflicts +fn git_pull(url: &str, git_ref: &str) -> Result { + println!("Executing: git pull {} {}", url, git_ref); + + let output =3D Command::new("git") + .args(["pull", url, git_ref]) + .output() + .context("Failed to run git pull")?; + + let stdout =3D String::from_utf8_lossy(&output.stdout); + let stderr =3D String::from_utf8_lossy(&output.stderr); + + if !stdout.is_empty() { + println!("{}", stdout); + } + if !stderr.is_empty() { + eprintln!("{}", stderr); + } + + // Check if there are conflicts + if output.status.success() { + return Ok(false); // No conflicts + } + + // Check for merge conflicts specifically + let conflict_markers =3D ["CONFLICT", "Automatic merge failed", "fix c= onflicts"]; + let output_text =3D format!("{}{}", stdout, stderr); + + for marker in conflict_markers { + if output_text.contains(marker) { + return Ok(true); // Has conflicts + } + } + + // Some other error + bail!("git pull failed: {}", stderr); +} + +/// Check if there are unmerged files (active merge conflicts) +fn has_merge_conflicts() -> bool { + get_conflicted_files() + .map(|files| !files.is_empty()) + .unwrap_or(false) +} + +/// Build a merge commit message using the pull request information, summa= ry, and resolution +fn build_merge_commit_message(pull_req: &PullRequest, summary: &str, resol= ution: &str) -> String { + let mut msg =3D String::new(); + + // Use the subject line as the merge message header + if !pull_req.subject.is_empty() { + // Clean up subject - remove [GIT PULL] prefix if present + let subject =3D pull_req.subject + .replace("[GIT PULL]", "") + .replace("[git pull]", "") + .trim() + .to_string(); + msg.push_str(&format!("Merge {} {}\n", pull_req.git_ref, &subject)= ); + } else { + msg.push_str(&format!("Merge {}\n", pull_req.git_ref)); + } + msg.push('\n'); + + // Add maintainer's summary (extracted by LLM) + if !summary.is_empty() { + msg.push_str(summary); + msg.push_str("\n\n"); + } + + // Add resolution explanation (written by LLM during conflict resoluti= on) + if !resolution.is_empty() { + msg.push_str("Merge conflict resolution:\n\n"); + msg.push_str(resolution); + msg.push_str("\n\n"); + } + + // Add link to lore + msg.push_str(&format!("Link: https://lore.kernel.org/all/{}/\n", + pull_req.message_id.trim_start_matches('<').trim_end_matches('>'))= ); + + msg +} + /// Get current conflicts from the working directory fn get_current_conflicts() -> Result> { check_repo()?; @@ -1017,18 +1321,53 @@ fn build_resolve_prompt( conflicts: &[ConflictFile], similar: &[SimilarResolution], merge_ctx: &MergeContext, + pull_req: Option<&PullRequest>, ) -> String { let mut prompt =3D String::new(); =20 // Header with high-stakes framing - prompt.push_str("# Linux Kernel Merge Conflict Resolution\n\n"); - prompt.push_str("You are acting as an experienced kernel maintainer re= solving a merge conflict.\n\n"); + if pull_req.is_some() { + prompt.push_str("# Linux Kernel Pull Request Merge with Conflict R= esolution\n\n"); + prompt.push_str("You are acting as an experienced kernel maintaine= r resolving conflicts "); + prompt.push_str("from a pull request submission on lore.kernel.org= .\n\n"); + } else { + prompt.push_str("# Linux Kernel Merge Conflict Resolution\n\n"); + prompt.push_str("You are acting as an experienced kernel maintaine= r resolving a merge conflict.\n\n"); + } prompt.push_str("**Important:** Incorrect merge resolutions have histo= rically introduced subtle bugs "); prompt.push_str("that affected millions of users and took months to di= agnose. A resolution that "); prompt.push_str("compiles but has semantic errors is worse than no res= olution at all.\n\n"); - prompt.push_str("Take the time to fully understand both sides of the c= onflict before attempting "); - prompt.push_str("any resolution. If after investigation you're not con= fident, say so - it's "); - prompt.push_str("better to escalate to a human than to introduce a sub= tle bug.\n\n"); + + // Pull request specific: critical evaluation note + if pull_req.is_some() { + prompt.push_str("**CRITICAL:** You have access to the pull request= email which may contain "); + prompt.push_str("conflict resolution instructions from the maintai= ner. Use these as guidance, "); + prompt.push_str("but ALWAYS evaluate them critically - there may b= e better, cleaner, or more "); + prompt.push_str("efficient solutions than what was suggested.\n\n"= ); + } else { + prompt.push_str("Take the time to fully understand both sides of t= he conflict before attempting "); + prompt.push_str("any resolution. If after investigation you're not= confident, say so - it's "); + prompt.push_str("better to escalate to a human than to introduce a= subtle bug.\n\n"); + } + + // Pull request information (if present) + if let Some(pr) =3D pull_req { + prompt.push_str("## Pull Request Information\n\n"); + prompt.push_str(&format!("- **Subject:** {}\n", pr.subject)); + prompt.push_str(&format!("- **From:** {}\n", pr.from)); + prompt.push_str(&format!("- **Date:** {}\n", pr.date)); + prompt.push_str(&format!("- **Git URL:** {} {}\n", pr.git_url, pr.= git_ref)); + prompt.push_str(&format!("- **Message ID:** {}\n\n", pr.message_id= )); + + // Full email body - LLM will understand summary and conflict inst= ructions from this + prompt.push_str("### Pull Request Email\n\n"); + prompt.push_str("Read this email carefully. It contains the mainta= iner's description of the changes "); + prompt.push_str("and may include conflict resolution instructions.= Evaluate any suggested "); + prompt.push_str("resolutions critically - there may be cleaner or = more efficient solutions.\n\n"); + prompt.push_str("```\n"); + prompt.push_str(&pr.body); + prompt.push_str("\n```\n\n"); + } =20 // Merge context prompt.push_str("## Merge Context\n\n"); @@ -1153,7 +1492,17 @@ fn build_resolve_prompt( prompt.push_str("1. Edit the conflicted files to produce the correct m= erged result\n"); prompt.push_str("2. Remove all conflict markers (`<<<<<<<`, `=3D=3D=3D= =3D=3D=3D=3D`, `>>>>>>>`)\n"); prompt.push_str("3. Stage the resolved files with `git add`\n"); - prompt.push_str("4. Commit with a detailed message explaining your ana= lysis and resolution\n\n"); + if pull_req.is_some() { + prompt.push_str("4. **Do NOT commit** - The tool will handle the c= ommit\n"); + prompt.push_str("5. **IMPORTANT:** Write a detailed explanation of= your resolution to `.git/LLMINUS_RESOLUTION`\n"); + prompt.push_str(" This file should contain:\n"); + prompt.push_str(" - A summary of each conflict and how you resol= ved it\n"); + prompt.push_str(" - The reasoning behind your choices\n"); + prompt.push_str(" - Any improvements you made over suggested res= olutions\n"); + prompt.push_str(" This will be included in the merge commit mess= age.\n\n"); + } else { + prompt.push_str("4. Commit with a detailed message explaining your= analysis and resolution\n\n"); + } =20 // If uncertain prompt.push_str("## If Uncertain\n\n"); @@ -1183,9 +1532,6 @@ fn build_resolve_prompt( } =20 fn resolve(command: &str) -> Result<()> { - use std::io::Write; - use std::process::Stdio; - // Get merge context (what branch/tag is being merged) let merge_ctx =3D get_merge_context(); if let Some(ref source) =3D merge_ctx.merge_source { @@ -1211,12 +1557,17 @@ fn resolve(command: &str) -> Result<()> { println!("Found {} similar historical resolutions", similar.len()); } =20 - // Build the prompt - println!("Building resolution prompt..."); - let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx); + // Build the prompt and invoke LLM + let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = None); + invoke_llm(command, &prompt) +} + +/// Invoke an LLM command with a prompt via stdin +fn invoke_llm(command: &str, prompt: &str) -> Result<()> { + use std::io::Write; + use std::process::Stdio; =20 - println!("Prompt size: {} bytes", prompt.len()); - println!("\nInvoking: {}", command); + println!("Invoking: {} (prompt: {} bytes)", command, prompt.len()); println!("{}", "=3D".repeat(80)); =20 // Parse command (handle arguments) @@ -1255,6 +1606,107 @@ fn resolve(command: &str) -> Result<()> { Ok(()) } =20 +/// Pull a kernel pull request from lore.kernel.org +fn pull(message_id: &str, command: &str) -> Result<()> { + check_repo()?; + + // Step 1: Fetch and parse the pull request email + println!("=3D=3D=3D Fetching Pull Request =3D=3D=3D\n"); + let raw_email =3D fetch_lore_email(message_id)?; + let pull_req =3D parse_pull_request(message_id, &raw_email)?; + + println!("Subject: {}", pull_req.subject); + println!("From: {}", pull_req.from); + println!("Date: {}", pull_req.date); + println!("Git URL: {} {}", pull_req.git_url, pull_req.git_ref); + + // Step 2: Execute git pull + println!("\n=3D=3D=3D Executing Git Pull =3D=3D=3D\n"); + let has_conflicts =3D git_pull(&pull_req.git_url, &pull_req.git_ref)?; + + if !has_conflicts { + // No conflicts - merge succeeded automatically + println!("\n=3D=3D=3D Merge Completed Successfully =3D=3D=3D"); + println!("No conflicts detected. The merge was completed automatic= ally."); + return Ok(()); + } + + // Step 3: Handle conflicts + println!("\n=3D=3D=3D Merge Conflicts Detected =3D=3D=3D\n"); + + // Get merge context + let merge_ctx =3D get_merge_context(); + + // Parse the conflicts + let conflicts =3D get_current_conflicts()?; + println!("Found {} conflict region(s) to resolve", conflicts.len()); + + // Try to find similar historical resolutions + println!("Looking for similar historical conflicts..."); + let similar =3D try_find_similar_resolutions(3, &conflicts); + + if similar.is_empty() { + println!("No historical resolution database found (this is optiona= l)"); + } else { + println!("Found {} similar historical resolutions", similar.len()); + } + + // Build the prompt with pull request context and invoke LLM + let prompt =3D build_resolve_prompt(&conflicts, &similar, &merge_ctx, = Some(&pull_req)); + println!("\n=3D=3D=3D Invoking LLM for Conflict Resolution =3D=3D=3D"); + invoke_llm(command, &prompt)?; + + // Step 5: Check if conflicts are resolved + if has_merge_conflicts() { + println!("\nWarning: Conflicts still remain in the working directo= ry."); + println!("Please resolve any remaining conflicts manually and comm= it."); + return Ok(()); + } + + // Step 6: Commit the merge with pull request information + println!("\n=3D=3D=3D Committing Merge =3D=3D=3D\n"); + + // Extract summary using LLM (falls back to empty if it fails) + let summary =3D extract_summary_with_llm(&pull_req.body, command) + .unwrap_or_else(|| { + println!("Note: Could not extract summary automatically"); + String::new() + }); + + // Read resolution explanation written by LLM + let resolution =3D std::fs::read_to_string(".git/LLMINUS_RESOLUTION") + .unwrap_or_else(|_| { + println!("Note: No resolution explanation found in .git/LLMINU= S_RESOLUTION"); + String::new() + }); + + // Clean up the resolution file + let _ =3D std::fs::remove_file(".git/LLMINUS_RESOLUTION"); + + let commit_msg =3D build_merge_commit_message(&pull_req, &summary, &re= solution); + println!("Commit message:\n{}", commit_msg); + + // Create a temporary file for the commit message (to handle multi-lin= e) + let commit_result =3D Command::new("git") + .args(["commit", "-m", &commit_msg]) + .output() + .context("Failed to run git commit")?; + + if commit_result.status.success() { + println!("\n=3D=3D=3D Merge Committed Successfully =3D=3D=3D"); + let stdout =3D String::from_utf8_lossy(&commit_result.stdout); + if !stdout.is_empty() { + println!("{}", stdout); + } + } else { + let stderr =3D String::from_utf8_lossy(&commit_result.stderr); + eprintln!("Commit failed: {}", stderr); + bail!("Failed to commit merge"); + } + + Ok(()) +} + fn main() -> Result<()> { let cli =3D Cli::parse(); =20 @@ -1263,6 +1715,7 @@ fn main() -> Result<()> { Commands::Vectorize { batch_size } =3D> vectorize(batch_size), Commands::Find { n } =3D> find(n), Commands::Resolve { command } =3D> resolve(&command), + Commands::Pull { message_id, command } =3D> pull(&message_id, &com= mand), } } =20 @@ -1651,4 +2104,186 @@ fn test_parse_multiple_conflicts() { assert!(conflicts[0].ours_content.contains("first ours")); assert!(conflicts[1].ours_content.contains("second ours")); } + + #[test] + fn test_pull_command_parses() { + let cli =3D Cli::try_parse_from(["llminus", "pull", "test@kernel.o= rg"]).unwrap(); + match cli.command { + Commands::Pull { message_id, command } =3D> { + assert_eq!(message_id, "test@kernel.org"); + assert_eq!(command, "llm"); // default + } + _ =3D> panic!("Expected Pull command"), + } + } + + #[test] + fn test_pull_command_with_custom_command() { + let cli =3D Cli::try_parse_from([ + "llminus", "pull", "test@kernel.org", "-c", "my-llm --model fa= ncy" + ]).unwrap(); + match cli.command { + Commands::Pull { message_id, command } =3D> { + assert_eq!(message_id, "test@kernel.org"); + assert_eq!(command, "my-llm --model fancy"); + } + _ =3D> panic!("Expected Pull command"), + } + } + + #[test] + fn test_parse_email_headers() { + let raw =3D r#"From: Paul Walmsley +Subject: [GIT PULL] RISC-V updates for v6.19 +Date: Thu, 11 Dec 2025 19:36:00 -0700 +Message-ID: + +This is the body of the email. +"#; + let (from, subject, date, msg_id, body) =3D parse_email_headers(ra= w); + assert_eq!(from, "Paul Walmsley "); + assert_eq!(subject, "[GIT PULL] RISC-V updates for v6.19"); + assert_eq!(date, "Thu, 11 Dec 2025 19:36:00 -0700"); + assert_eq!(msg_id, ""); + assert!(body.contains("This is the body")); + } + + #[test] + fn test_parse_email_headers_multiline() { + let raw =3D r#"From: Paul Walmsley +Subject: [GIT PULL] RISC-V updates + for v6.19 merge window +Date: Thu, 11 Dec 2025 19:36:00 -0700 + +Body here. +"#; + let (_, subject, _, _, _) =3D parse_email_headers(raw); + assert!(subject.contains("RISC-V updates")); + assert!(subject.contains("for v6.19 merge window")); + } + + #[test] + fn test_extract_git_info() { + let body =3D r#"Please pull this set of changes. + +The following changes are available in the Git repository at: + + git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux tags/riscv-for= -linus-6.19 + +for you to fetch changes up to abc123. +"#; + let result =3D extract_git_info(body); + assert!(result.is_some()); + let (url, git_ref) =3D result.unwrap(); + assert_eq!(url, "git://git.kernel.org/pub/scm/linux/kernel/git/ris= cv/linux"); + assert_eq!(git_ref, "tags/riscv-for-linus-6.19"); + } + + #[test] + fn test_extract_git_info_https() { + let body =3D r#"Available at: + + https://git.kernel.org/pub/scm/linux/kernel/git/foo/bar.git feature-bran= ch + +Thanks! +"#; + let result =3D extract_git_info(body); + assert!(result.is_some()); + let (url, git_ref) =3D result.unwrap(); + assert!(url.starts_with("https://git.kernel.org")); + assert_eq!(git_ref, "feature-branch"); + } + + #[test] + fn test_extract_git_info_none() { + let body =3D "This email has no git URL in it."; + let result =3D extract_git_info(body); + assert!(result.is_none()); + } + + #[test] + fn test_build_merge_commit_message() { + let pull_req =3D PullRequest { + message_id: "test123@kernel.org".to_string(), + subject: "[GIT PULL] Important updates for v6.19".to_string(), + from: "Maintainer ".to_string(), + date: "2025-12-11".to_string(), + git_url: "git://git.kernel.org/pub/scm/foo".to_string(), + git_ref: "tags/foo-for-v6.19".to_string(), + body: String::new(), + }; + + let summary =3D "This is the maintainer's summary of changes."; + let resolution =3D "Resolved by keeping both changes."; + let msg =3D build_merge_commit_message(&pull_req, summary, resolut= ion); + assert!(msg.contains("Merge tags/foo-for-v6.19")); + assert!(msg.contains("Important updates")); // subject without [GI= T PULL] + assert!(msg.contains("maintainer's summary")); + assert!(msg.contains("conflict resolution")); + assert!(msg.contains("keeping both changes")); + assert!(msg.contains("https://lore.kernel.org/all/test123@kernel.o= rg/")); + } + + #[test] + fn test_build_resolve_prompt_with_pull_request() { + let conflicts =3D vec![ConflictFile { + path: "test.c".to_string(), + ours_content: "int ours;".to_string(), + theirs_content: "int theirs;".to_string(), + base_content: Some("int base;".to_string()), + }]; + + let pull_req =3D PullRequest { + message_id: "test@kernel.org".to_string(), + subject: "Test PR".to_string(), + from: "Author ".to_string(), + date: "2025-12-11".to_string(), + git_url: "git://test".to_string(), + git_ref: "tags/test".to_string(), + body: "Test summary\n\nResolve by keeping both.".to_string(), + }; + + let merge_ctx =3D MergeContext { + merge_source: Some("tags/test".to_string()), + head_branch: Some("master".to_string()), + merge_message: Some("Merge tags/test".to_string()), + }; + + let prompt =3D build_resolve_prompt(&conflicts, &[], &merge_ctx, S= ome(&pull_req)); + + // Check that key sections are present + assert!(prompt.contains("Pull Request Information")); + assert!(prompt.contains("Test PR")); // subject + assert!(prompt.contains("Test summary")); // body includes summary + assert!(prompt.contains("Resolve by keeping both")); // body inclu= des this + assert!(prompt.contains("test.c")); // conflict file + assert!(prompt.contains("int ours;")); // ours content + assert!(prompt.contains("int theirs;")); // theirs content + assert!(prompt.contains("Do NOT commit")); // pull request specific + } + + #[test] + fn test_build_resolve_prompt_without_pull_request() { + let conflicts =3D vec![ConflictFile { + path: "test.c".to_string(), + ours_content: "int ours;".to_string(), + theirs_content: "int theirs;".to_string(), + base_content: None, + }]; + + let merge_ctx =3D MergeContext { + merge_source: Some("feature-branch".to_string()), + head_branch: Some("master".to_string()), + merge_message: None, + }; + + let prompt =3D build_resolve_prompt(&conflicts, &[], &merge_ctx, N= one); + + // Check standard resolve sections + assert!(prompt.contains("Linux Kernel Merge Conflict Resolution")); + assert!(!prompt.contains("Pull Request Information")); + assert!(prompt.contains("test.c")); + assert!(prompt.contains("int ours;")); + assert!(prompt.contains("Commit with a detailed message")); // not= "Do NOT commit" + } } --=20 2.51.0