From nobody Sat Oct 4 14:35:17 2025 Received: from smtpbgsg2.qq.com (smtpbgsg2.qq.com [54.254.200.128]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9C6BE29AB1B for ; Fri, 15 Aug 2025 07:58:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=54.254.200.128 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1755244725; cv=none; b=M/RDZhFohr2+auuoK0TFbAzIKhveZtrlA6ynDSx8+FhPMwR7u1bI+PEaqLDUD1ou4VJgg52Rp/dlAd5xyQatGmLNyf58mg5RaurUkH48u1Y5qnYS6BdoJclP5ymtB7KpdonRtNQQK8IwSeImK+QDqLxVifIyDiAj2bY4a8/+dmE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1755244725; c=relaxed/simple; bh=FKp9qrGyxPb1wOmh93bDMiSLybMwM1BJkxyNYFEjBeY=; h=From:To:Cc:Subject:Date:Message-Id:MIME-Version:Content-Type; b=Y6tvXv1YTVThoHyQ9FcSi8VHOSGfVYyHj7aFOZMxOzJ9mZTlzjugxb+EX1uaKl9KXhlhONp+s1YgCdhI4lCiUVzby8rlgJkGM2+z9iesU4JEgLYw+WY5R06siGzILK4IEMCwG1ILdZ+26WlO0eBcyDPXWyHd8yAytzVApm0kkCc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=uniontech.com; spf=pass smtp.mailfrom=uniontech.com; dkim=pass (1024-bit key) header.d=uniontech.com header.i=@uniontech.com header.b=QenBqeol; arc=none smtp.client-ip=54.254.200.128 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=uniontech.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=uniontech.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=uniontech.com header.i=@uniontech.com header.b="QenBqeol" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=uniontech.com; s=onoh2408; t=1755244690; bh=8AjUIk/Zqh55PDDXuVmj0hJvuhDpc6oxkZ4dfmxlBmY=; h=From:To:Subject:Date:Message-Id:MIME-Version; b=QenBqeolsuCykJcKoZ4Bit4KhpFXDSynbCFwtGhx74J5FF6nz0eZlajT/IaObgxUj 3PDwowO2fA4Q66vc1KNt0YNjeqH7BRwNN61bIj+Ajf56pewd28Y9oXltHIr/Sp9H9s TRx0gULiVkaKrum35vMYA5RXJHk11yklcIquVltY= X-QQ-mid: zesmtpip3t1755244650t9a6cc0a4 X-QQ-Originating-IP: D3mhnDDD++5phpOcEBSfJQQPlTejr7s/2VX4/YJcayM= Received: from localhost.localdomain ( [localhost]) by bizesmtp.qq.com (ESMTP) with id ; Fri, 15 Aug 2025 15:57:29 +0800 (CST) X-QQ-SSF: 0000000000000000000000000000000 X-QQ-GoodBg: 1 X-BIZMAIL-ID: 2021490923635856652 EX-QQ-RecipientCnt: 9 From: Morduang Zang To: apw@canonical.com, joe@perches.com, dwaipayanray1@gmail.com, lukas.bulwahn@gmail.com Cc: linux-kernel@vger.kernel.org, wangyuli@uniontech.com, zhanjun@uniontech.com, niecheng1@uniontech.com, Morduang Zang Subject: [PATCH RESEND] checkpatch: Add full-width character detection Add comprehensive detection and automatic fixing capability for full-width (Unicode) characters that are commonly mistaken for ASCII punctuation marks. This helps catch input method editor artifacts that can cause compilation errors or formatting issues. Date: Fri, 15 Aug 2025 15:57:26 +0800 Message-Id: <20250815075726.135806-1-zhangdandan@uniontech.com> X-Mailer: git-send-email 2.20.1 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable X-QQ-SENDSIZE: 520 Feedback-ID: zesmtpip:uniontech.com:qybglogicsvrgz:qybglogicsvrgz6b-0 X-QQ-XMAILINFO: MfMKp/VE+ZXdI4n4V1+mEUyIXN5Ki9d+mwY/OJr1gq1LUFj2TTS3RELm yjo4W5ER5b3NrAI4q/I00OJUr+8YR4azU35sxbglW6vrkACCYrrRsQUFXyKVTl4aA2IVmbC yhfN9BhxlGUJZ1kbMW2a9fwo9A8o1q/f4InSPMIbqHJNqddlj4TcuEs4lVm4zNGaUO/mzyu FDRjpWwnhMLkFZCKs3OsVcXixINlXve3FhtFQMvs4FUHd1o/du9a4MWijg2MKXT15QUayKl dKe9bygMmt1yTQZ81HBoht8llTKsZvLdrJQUv3NZvR/BPbiLyPLPpBYjkU7eKpvj05uVOys IbWeKj5Fi7k2Vyh0FU+eslU9Hjsu6iM7P3FyJQGbzXfUoG0p44lengTk72JBywjVbGJ82mH a+P9pk5Q1RHk7LUaSALPSuyT1jg71ithF4rYe6IcCztevlGnfNzqTk3/kwR5MyNSyEF2by2 6pBiyrDZLxNcGl/LYH1hrCzE6+Z9j4eimCMreDvEBLvee64ZjNb5/AQS/w/AFJ58VCqXg9j Lkmv5x8K/JOSrCa8F0aPosn9OYO3/1tjdH7Lbh2CQ2SOblSs2bjOsMonRLiVLFPbEkyqJ6n IQRzV3Z4g2Vocdz1w28p0PosugR5MqNAkhYrMVRWD5WzJgnEdRQ9kUDsW85xg2z0RDZtWCv mcXNz2UwOKBi01TDLumi97oYG3ir6THGjhjibUtihgshsCegZpIwFUCQT8U26dwxqPKIh1R Q2diRtKeC/qhDcio+bhhdNA2CJcVdak3gxdGHytTYcvupJ++LHvXwwpYapmOtGU2KN5xyQl 5cLRxXUwGRWUJtd+/9r5VHd1aVX5jQFylZVic+JTjhxwU5OUCzkuBSoaS0qyLLNrStqf/fv ylfoLkb3aPtSosuq3GybxGtvyZ93uMhtmbXzkIdpGDvavKPHBvaRd6juwVfGfIwK27+F+0n K/Z9+iqQ2o0Wyukaix71YBYFYFJ2EE9Jl62DeK/Fdmi2zWj2svOWLFZTH0b2dJ77FRDkqCP Kf4uOT0hVQ3t3INetwPmR9R+YPS9gGCHt2ziuzxsXQCZRXVlUckePzXMpRlLLw90gNMJafs wKWKEskZK7oTFi+Zt2a+pX0qdamHJ00ejXHTswkdfa02jhqz804NAwfPJW78Tl+Zg== X-QQ-XMRINFO: MSVp+SPm3vtS1Vd6Y4Mggwc= X-QQ-RECHKSPAM: 0 The implementation detects 25 types of full-width characters: - Basic punctuation: =EF=BC=9B=EF=BC=8C=E3=80=82=EF=BC=88=EF=BC=89=EF=BC=81= =EF=BC=9F=EF=BC=9A=E3=80=80 - Programming brackets: =EF=BC=BB=EF=BC=BD=EF=BD=9B=EF=BD=9D=EF=BC=9C=EF=BC= =9E - Assignment and comparison: =EF=BC=9D - Arithmetic operators: =EF=BC=8B=EF=BC=8D=EF=BC=8A=EF=BC=8F=EF=BC=BC - Other programming symbols: =EF=BC=85=EF=BC=83=EF=BC=86=EF=BD=9C Detection covers three areas: 1. Code lines (lines starting with '+') - FULLWIDTH_CHARS 2. Commit messages - FULLWIDTH_CHARS_COMMIT 3. Subject lines - FULLWIDTH_CHARS_SUBJECT Example usage: ./scripts/checkpatch.pl my_patch.patch ./scripts/checkpatch.pl --fix my_patch.patch ./scripts/checkpatch.pl --fix-inplace my_source.c Signed-off-by: Morduang Zang Signed-off-by: Wangyuli --- scripts/checkpatch.pl | 84 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e722dd6fa8ef..f4cb547a470b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -75,6 +75,41 @@ my $git_command =3D'export LANGUAGE=3Den_US.UTF-8; git'; my $tabsize =3D 8; my ${CONFIG_} =3D "CONFIG_"; =20 +# Full-width character mappings (UTF-8 byte sequences to ASCII) +my %fullwidth_chars =3D ( + # Basic punctuation + "\xef\xbc\x9b" =3D> [";", "semicolon", "=EF=BC=9B"], + "\xef\xbc\x8c" =3D> [",", "comma", "=EF=BC=8C"], + "\xe3\x80\x82" =3D> [".", "period", "=E3=80=82"], + "\xef\xbc\x88" =3D> ["(", "opening parenthesis", "=EF=BC=88"], + "\xef\xbc\x89" =3D> [")", "closing parenthesis", "=EF=BC=89"], + "\xef\xbc\x81" =3D> ["!", "exclamation mark", "=EF=BC=81"], + "\xef\xbc\x9f" =3D> ["?", "question mark", "=EF=BC=9F"], + "\xef\xbc\x9a" =3D> [":", "colon", "=EF=BC=9A"], + "\xe3\x80\x80" =3D> [" ", "space", "=E3=80=80"], + # Programming brackets + "\xef\xbc\xbb" =3D> ["[", "left square bracket", "=EF=BC=BB"], + "\xef\xbc\xbd" =3D> ["]", "right square bracket", "=EF=BC=BD"], + "\xef\xbd\x9b" =3D> ["{", "left curly bracket", "=EF=BD=9B"], + "\xef\xbd\x9d" =3D> ["}", "right curly bracket", "=EF=BD=9D"], + "\xef\xbc\x9c" =3D> ["<", "less-than sign", "=EF=BC=9C"], + "\xef\xbc\x9e" =3D> [">", "greater-than sign", "=EF=BC=9E"], + # Assignment and comparison + "\xef\xbc\x9d" =3D> ["=3D", "equals sign", "=EF=BC=9D"], + # Arithmetic operators + "\xef\xbc\x8b" =3D> ["+", "plus sign", "=EF=BC=8B"], + "\xef\xbc\x8d" =3D> ["-", "minus sign", "=EF=BC=8D"], + "\xef\xbc\x8a" =3D> ["*", "asterisk", "=EF=BC=8A"], + "\xef\xbc\x8f" =3D> ["/", "solidus", "=EF=BC=8F"], + "\xef\xbc\xbc" =3D> ["\\", "reverse solidus", "=EF=BC=BC"], + # Other programming symbols + "\xef\xbc\x85" =3D> ["%", "percent sign", "=EF=BC=85"], + "\xef\xbc\x83" =3D> ["#", "number sign", "=EF=BC=83"], + "\xef\xbc\x86" =3D> ["&", "ampersand", "=EF=BC=86"], + "\xef\xbd\x9c" =3D> ["|", "vertical line", "=EF=BD=9C"], +); +my $fullwidth_pattern =3D join('|', map { quotemeta($_) } keys %fullwidth_= chars); + my %maybe_linker_symbol; # for externs in c exceptions, when seen in *vmli= nux.lds.h =20 sub help { @@ -1019,6 +1054,40 @@ sub read_words { return 0; } =20 +# Check for full-width characters and optionally fix them +sub check_fullwidth_chars { + my ($line, $context, $warning_type, $apply_fix, $fixlinenr, $fixed_ref, $= herecurr) =3D @_; + my @found_chars =3D (); + my $fixed_line =3D $line; + my $has_fixes =3D 0; + + return 0 unless $line =3D~ /$fullwidth_pattern/o; + + if ($apply_fix) { + $fixed_line =3D~ s/($fullwidth_pattern)/$fullwidth_chars{$1}[0]/ge; + $has_fixes =3D ($fixed_line ne $line); + } + + while ($line =3D~ /($fullwidth_pattern)/go) { + my $fullwidth_byte_seq =3D $1; + if (exists $fullwidth_chars{$fullwidth_byte_seq}) { + my ($ascii_char, $name, $fullwidth_char) =3D @{$fullwidth_chars{$fullwi= dth_byte_seq}}; + push @found_chars, "Full-width $name ($fullwidth_char) found$context, u= se ASCII $name ($ascii_char) instead"; + } + } + + if (@found_chars) { + foreach my $msg (@found_chars) { + WARN($warning_type, $msg . "\n" . $herecurr); + } + if ($apply_fix && $has_fixes && defined $fixed_ref) { + $fixed_ref->[$fixlinenr] =3D $fixed_line; + } + } + + return scalar @found_chars; +} + my $const_structs; if (show_type("CONST_STRUCT")) { read_words(\$const_structs, $conststructsfile) @@ -2961,6 +3030,11 @@ sub process { $commit_log_has_diff =3D 1; } =20 +# Check for full-width characters in commit message + if ($in_commit_log && show_type("FULLWIDTH_CHARS_COMMIT")) { + check_fullwidth_chars($rawline, " in commit message", "FULLWIDTH_CHARS_= COMMIT", 0, 0, undef, $herecurr); + } + # Check for incorrect file permissions if ($line =3D~ /^new (file )?mode.*[7531]\d{0,2}$/) { my $permhere =3D $here . "FILE: $realfile\n"; @@ -3266,6 +3340,11 @@ sub process { "A patch subject line should describe the change not the tool that= found it\n" . $herecurr); } =20 +# Check for full-width characters in Subject line + if ($in_header_lines && $line =3D~ /^Subject:/i && show_type("FULLWIDTH_= CHARS_SUBJECT")) { + check_fullwidth_chars($rawline, " in subject line", "FULLWIDTH_CHARS_SU= BJECT", 0, 0, undef, $herecurr); + } + # Check for Gerrit Change-Ids not in any patch context if ($realfile eq '' && !$has_patch_separator && $line =3D~ /^\s*change-i= d:/i) { if (ERROR("GERRIT_CHANGE_ID", @@ -3974,6 +4053,11 @@ sub process { } } =20 +# check for full-width characters (full-width punctuation marks, etc.) + if ($rawline =3D~ /^\+/ && show_type("FULLWIDTH_CHARS")) { + check_fullwidth_chars($rawline, "", "FULLWIDTH_CHARS", $fix, $fixlinenr= , \@fixed, $herecurr); + } + # check multi-line statement indentation matches previous line if ($perl_version_ok && $prevline =3D~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declar= e\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=3D\s*$Ident\s*= )\(.*(\&\&|\|\||,)\s*$/) { --=20 2.20.1