From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6EBA02D9ED9; Sat, 14 Jun 2025 13:49:02 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908942; cv=none; b=pXXDHMnPiT0hMB5uOmme40xvvFASHw1FrRdTixKmMsFIM0aN/5fWDtwlDyttmxBf8+YqtxDv+wFJJbXCz1wpAOePOIKtVGy8vD74f3gbTemeIfUBfleqb9PrzlYng9p2hwgW7Bx7yjNCVolR0V5aguGbHwyxZ/JzwP3eKvOnH7Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908942; c=relaxed/simple; bh=O+1OzBrm5md4mNoCVi4XOBBdMV6xDhIhhMYZresewY8=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version:Content-Type; b=IMmCYhDDmP7jaXnyeMoBdmOTlkAzn3Sl+IGMFZui2lmDA1TJKRU4yeaCJPf714ovEQwDK3+eXJTc5Ldl86gr5NL240btIEDUrMp8+rR4npwTITSaHhuKuXZwB2p38jWkzljbXzyhi440OgoqCCEFiIkh9AwabGx58gvSiUpBRQo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=rFtDpitE; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="rFtDpitE" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1FEEEC4CEF1; Sat, 14 Jun 2025 13:49:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908942; bh=O+1OzBrm5md4mNoCVi4XOBBdMV6xDhIhhMYZresewY8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rFtDpitEmBsceY/hYuXhcm+ndWCpraqKL9O71f4+fzWE12USD5/iZSAYSu6QOKTnR Sae31zvitLTvgz0DqHua6fRWjewTw7K5zIYN0BaI7HrzdQPZ4O8bQLIpupCrxeMH2q q+jGrGEaGvAPHjRwE9fSIDweVXeb2puea5dhUNbvaj7tQas6mUMC26sZYKIjTKpBB9 DmnL+yjWAOzBseMRozL9B8rnh+1SyZ1CVZqyKwJKubHSRoLLuyBjZtxVYbetG2no7J ak1OTqUK2Q3pRuaJ9bQjC5i6zLNvabcFgfyu5c/pK1hF28HwlX4/MdZ2/MM2RobykD 50mxqGlGbxeig== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 01/19] kernel/api: introduce kernel API specification framework Date: Sat, 14 Jun 2025 09:48:40 -0400 Message-Id: <20250614134858.790460-2-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Add a comprehensive framework for formally documenting kernel APIs with inline specifications. This framework provides: - Structured API documentation with parameter specifications, return values, error conditions, and execution context requirements - Runtime validation capabilities for debugging (CONFIG_KAPI_RUNTIME_CHECKS) - Export of specifications via debugfs for tooling integration - Support for both internal kernel APIs and system calls The framework stores specifications in a dedicated ELF section and provides infrastructure for: - Compile-time validation of specifications - Runtime querying of API documentation - Machine-readable export formats - Integration with existing SYSCALL_DEFINE macros This commit introduces the core infrastructure without modifying any existing APIs. Subsequent patches will add specifications to individual subsystems. Signed-off-by: Sasha Levin --- Documentation/admin-guide/kernel-api-spec.rst | 507 +++++++ MAINTAINERS | 9 + arch/um/kernel/dyn.lds.S | 3 + arch/um/kernel/uml.lds.S | 3 + arch/x86/kernel/vmlinux.lds.S | 3 + include/asm-generic/vmlinux.lds.h | 20 + include/linux/kernel_api_spec.h | 942 +++++++++++++ include/linux/syscall_api_spec.h | 341 +++++ include/linux/syscalls.h | 1 + init/Kconfig | 2 + kernel/Makefile | 1 + kernel/api/Kconfig | 35 + kernel/api/Makefile | 7 + kernel/api/kernel_api_spec.c | 1169 +++++++++++++++++ 14 files changed, 3043 insertions(+) create mode 100644 Documentation/admin-guide/kernel-api-spec.rst create mode 100644 include/linux/kernel_api_spec.h create mode 100644 include/linux/syscall_api_spec.h create mode 100644 kernel/api/Kconfig create mode 100644 kernel/api/Makefile create mode 100644 kernel/api/kernel_api_spec.c diff --git a/Documentation/admin-guide/kernel-api-spec.rst b/Documentation/= admin-guide/kernel-api-spec.rst new file mode 100644 index 0000000000000..3a63f6711e27b --- /dev/null +++ b/Documentation/admin-guide/kernel-api-spec.rst @@ -0,0 +1,507 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D +Kernel API Specification Framework +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +:Author: Sasha Levin +:Date: June 2025 + +.. contents:: Table of Contents + :depth: 3 + :local: + +Introduction +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +The Kernel API Specification Framework (KAPI) provides a comprehensive sys= tem for +formally documenting, validating, and introspecting kernel APIs. This fram= ework +addresses the long-standing challenge of maintaining accurate, machine-rea= dable +documentation for the thousands of internal kernel APIs and system calls. + +Purpose and Goals +----------------- + +The framework aims to: + +1. **Improve API Documentation**: Provide structured, inline documentation= that + lives alongside the code and is maintained as part of the development p= rocess. + +2. **Enable Runtime Validation**: Optionally validate API usage at runtime= to catch + common programming errors during development and testing. + +3. **Support Tooling**: Export API specifications in machine-readable form= ats for + use by static analyzers, documentation generators, and development tool= s. + +4. **Enhance Debugging**: Provide detailed API information at runtime thro= ugh debugfs + for debugging and introspection. + +5. **Formalize Contracts**: Explicitly document API contracts including pa= rameter + constraints, execution contexts, locking requirements, and side effects. + +Architecture Overview +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Components +---------- + +The framework consists of several key components: + +1. **Core Framework** (``kernel/api/kernel_api_spec.c``) + + - API specification registration and storage + - Runtime validation engine + - Specification lookup and querying + +2. **DebugFS Interface** (``kernel/api/kapi_debugfs.c``) + + - Runtime introspection via ``/sys/kernel/debug/kapi/`` + - JSON and XML export formats + - Per-API detailed information + +3. **IOCTL Support** (``kernel/api/ioctl_validation.c``) + + - Extended framework for IOCTL specifications + - Automatic validation wrappers + - Structure field validation + +4. **Specification Macros** (``include/linux/kernel_api_spec.h``) + + - Declarative macros for API documentation + - Type-safe parameter specifications + - Context and constraint definitions + +Data Model +---------- + +The framework uses a hierarchical data model:: + + kernel_api_spec + =E2=94=9C=E2=94=80=E2=94=80 Basic Information + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 name (API function name) + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 version (specification version) + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 description (human-readable de= scription) + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 kernel_version (when API was i= ntroduced) + =E2=94=82 + =E2=94=9C=E2=94=80=E2=94=80 Parameters (up to 16) + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 kapi_param_spec + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 name + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 type (int, pointer, string= , etc.) + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 direction (in, out, inout) + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 constraints (range, mask, = enum values) + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 validation rules + =E2=94=82 + =E2=94=9C=E2=94=80=E2=94=80 Return Value + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 kapi_return_spec + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 type + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 success conditions + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 validation rules + =E2=94=82 + =E2=94=9C=E2=94=80=E2=94=80 Error Conditions (up to 32) + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 kapi_error_spec + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 error code + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 condition description + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 recovery advice + =E2=94=82 + =E2=94=9C=E2=94=80=E2=94=80 Execution Context + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 allowed contexts (process, int= errupt, etc.) + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 locking requirements + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 preemption/interrupt state + =E2=94=82 + =E2=94=94=E2=94=80=E2=94=80 Side Effects + =E2=94=9C=E2=94=80=E2=94=80 memory allocation + =E2=94=9C=E2=94=80=E2=94=80 state changes + =E2=94=94=E2=94=80=E2=94=80 signal handling + +Usage Guide +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Basic API Specification +----------------------- + +To document a kernel API, use the specification macros in the implementati= on file: + +.. code-block:: c + + #include + + KAPI_DEFINE_SPEC(kmalloc_spec, kmalloc, "3.0") + KAPI_DESCRIPTION("Allocate kernel memory") + KAPI_PARAM(0, size, KAPI_TYPE_SIZE_T, KAPI_DIR_IN, + "Number of bytes to allocate") + KAPI_PARAM_RANGE(0, 0, KMALLOC_MAX_SIZE) + KAPI_PARAM(1, flags, KAPI_TYPE_FLAGS, KAPI_DIR_IN, + "Allocation flags (GFP_*)") + KAPI_PARAM_MASK(1, __GFP_BITS_MASK) + KAPI_RETURN(KAPI_TYPE_POINTER, "Pointer to allocated memory or NULL") + KAPI_ERROR(ENOMEM, "Out of memory") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SOFTIRQ | KAPI_CTX_HARDIRQ) + KAPI_SIDE_EFFECT("Allocates memory from kernel heap") + KAPI_LOCK_NOT_REQUIRED("Any lock") + KAPI_END_SPEC + + void *kmalloc(size_t size, gfp_t flags) + { + /* Implementation */ + } + +System Call Specification +------------------------- + +System calls use specialized macros: + +.. code-block:: c + + KAPI_DEFINE_SYSCALL_SPEC(open_spec, open, "1.0") + KAPI_DESCRIPTION("Open a file") + KAPI_PARAM(0, pathname, KAPI_TYPE_USER_STRING, KAPI_DIR_IN, + "Path to file") + KAPI_PARAM_PATH(0, PATH_MAX) + KAPI_PARAM(1, flags, KAPI_TYPE_FLAGS, KAPI_DIR_IN, + "Open flags (O_*)") + KAPI_PARAM(2, mode, KAPI_TYPE_MODE_T, KAPI_DIR_IN, + "File permissions (if creating)") + KAPI_RETURN(KAPI_TYPE_INT, "File descriptor or -1") + KAPI_ERROR(EACCES, "Permission denied") + KAPI_ERROR(ENOENT, "File does not exist") + KAPI_ERROR(EMFILE, "Too many open files") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + KAPI_SIGNAL(EINTR, "Open can be interrupted by signal") + KAPI_END_SYSCALL_SPEC + +IOCTL Specification +------------------- + +IOCTLs have extended support for structure validation: + +.. code-block:: c + + KAPI_DEFINE_IOCTL_SPEC(vidioc_querycap_spec, VIDIOC_QUERYCAP, + "VIDIOC_QUERYCAP", + sizeof(struct v4l2_capability), + sizeof(struct v4l2_capability), + "video_fops") + KAPI_DESCRIPTION("Query device capabilities") + KAPI_IOCTL_FIELD(driver, KAPI_TYPE_CHAR_ARRAY, KAPI_DIR_OUT, + "Driver name", 16) + KAPI_IOCTL_FIELD(card, KAPI_TYPE_CHAR_ARRAY, KAPI_DIR_OUT, + "Device name", 32) + KAPI_IOCTL_FIELD(version, KAPI_TYPE_U32, KAPI_DIR_OUT, + "Driver version") + KAPI_IOCTL_FIELD(capabilities, KAPI_TYPE_FLAGS, KAPI_DIR_OUT, + "Device capabilities") + KAPI_END_IOCTL_SPEC + +Runtime Validation +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Enabling Validation +------------------- + +Runtime validation is controlled by kernel configuration: + +1. Enable ``CONFIG_KAPI_SPEC`` to build the framework +2. Enable ``CONFIG_KAPI_RUNTIME_CHECKS`` for runtime validation +3. Optionally enable ``CONFIG_KAPI_SPEC_DEBUGFS`` for debugfs interface + +Validation Modes +---------------- + +The framework supports several validation modes: + +.. code-block:: c + + /* Enable validation for specific API */ + kapi_enable_validation("kmalloc"); + + /* Enable validation for all APIs */ + kapi_enable_all_validation(); + + /* Set validation level */ + kapi_set_validation_level(KAPI_VALIDATE_FULL); + +Validation Levels: + +- ``KAPI_VALIDATE_NONE``: No validation +- ``KAPI_VALIDATE_BASIC``: Type and NULL checks only +- ``KAPI_VALIDATE_NORMAL``: Basic + range and constraint checks +- ``KAPI_VALIDATE_FULL``: All checks including custom validators + +Custom Validators +----------------- + +APIs can register custom validation functions: + +.. code-block:: c + + static bool validate_buffer_size(const struct kapi_param_spec *spec, + const void *value, void *context) + { + size_t size =3D *(size_t *)value; + struct my_context *ctx =3D context; + + return size > 0 && size <=3D ctx->max_buffer_size; + } + + KAPI_PARAM_CUSTOM_VALIDATOR(0, validate_buffer_size) + +DebugFS Interface +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +The debugfs interface provides runtime access to API specifications: + +Directory Structure +------------------- + +:: + + /sys/kernel/debug/kapi/ + =E2=94=9C=E2=94=80=E2=94=80 apis/ # All registered = APIs + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 kmalloc/ + =E2=94=82 =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 specification # = Human-readable spec + =E2=94=82 =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 json # J= SON format + =E2=94=82 =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 xml # X= ML format + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 open/ + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 ... + =E2=94=9C=E2=94=80=E2=94=80 summary # Overview of all= APIs + =E2=94=9C=E2=94=80=E2=94=80 validation/ # Validation cont= rols + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 enabled # Global e= nable/disable + =E2=94=82 =E2=94=9C=E2=94=80=E2=94=80 level # Validati= on level + =E2=94=82 =E2=94=94=E2=94=80=E2=94=80 stats # Validati= on statistics + =E2=94=94=E2=94=80=E2=94=80 export/ # Bulk export opt= ions + =E2=94=9C=E2=94=80=E2=94=80 all.json # All specs in JSON + =E2=94=94=E2=94=80=E2=94=80 all.xml # All specs in XML + +Usage Examples +-------------- + +Query specific API:: + + $ cat /sys/kernel/debug/kapi/apis/kmalloc/specification + API: kmalloc + Version: 3.0 + Description: Allocate kernel memory + + Parameters: + [0] size (size_t, in): Number of bytes to allocate + Range: 0 - 4194304 + [1] flags (flags, in): Allocation flags (GFP_*) + Mask: 0x1ffffff + + Returns: pointer - Pointer to allocated memory or NULL + + Errors: + ENOMEM: Out of memory + + Context: process, softirq, hardirq + + Side Effects: + - Allocates memory from kernel heap + +Export all specifications:: + + $ cat /sys/kernel/debug/kapi/export/all.json > kernel-apis.json + +Enable validation for specific API:: + + $ echo 1 > /sys/kernel/debug/kapi/apis/kmalloc/validate + +Performance Considerations +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D + +Memory Overhead +--------------- + +Each API specification consumes approximately 2-4KB of memory. With thousa= nds +of kernel APIs, this can add up to several megabytes. Consider: + +1. Building with ``CONFIG_KAPI_SPEC=3Dn`` for production kernels +2. Using ``__init`` annotations for APIs only used during boot +3. Implementing lazy loading for rarely used specifications + +Runtime Overhead +---------------- + +When ``CONFIG_KAPI_RUNTIME_CHECKS`` is enabled: + +- Each validated API call adds 50-200ns overhead +- Complex validations (custom validators) may add more +- Use validation only in development/testing kernels + +Optimization Strategies +----------------------- + +1. **Compile-time optimization**: When validation is disabled, all + validation code is optimized away by the compiler. + +2. **Selective validation**: Enable validation only for specific APIs + or subsystems under test. + +3. **Caching**: The framework caches validation results for repeated + calls with identical parameters. + +Documentation Generation +------------------------ + +The framework exports specifications via debugfs that can be used +to generate documentation. Tools for automatic documentation generation +from specifications are planned for future development. + +IDE Integration +--------------- + +Modern IDEs can use the JSON export for: + +- Parameter hints +- Type checking +- Context validation +- Error code documentation + +Testing Framework +----------------- + +The framework includes test helpers:: + + #ifdef CONFIG_KAPI_TESTING + /* Verify API behaves according to specification */ + kapi_test_api("kmalloc", test_cases); + #endif + +Best Practices +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Writing Specifications +---------------------- + +1. **Be Comprehensive**: Document all parameters, errors, and side effects +2. **Keep Updated**: Update specs when API behavior changes +3. **Use Examples**: Include usage examples in descriptions +4. **Validate Constraints**: Define realistic constraints for parameters +5. **Document Context**: Clearly specify allowed execution contexts + +Maintenance +----------- + +1. **Version Specifications**: Increment version when API changes +2. **Deprecation**: Mark deprecated APIs and suggest replacements +3. **Cross-reference**: Link related APIs in descriptions +4. **Test Specifications**: Verify specs match implementation + +Common Patterns +--------------- + +**Optional Parameters**:: + + KAPI_PARAM(2, optional_arg, KAPI_TYPE_POINTER, KAPI_DIR_IN, + "Optional argument (may be NULL)") + KAPI_PARAM_OPTIONAL(2) + +**Variable Arguments**:: + + KAPI_PARAM(1, fmt, KAPI_TYPE_FORMAT_STRING, KAPI_DIR_IN, + "Printf-style format string") + KAPI_PARAM_VARIADIC(2, "Format arguments") + +**Callback Functions**:: + + KAPI_PARAM(1, callback, KAPI_TYPE_FUNCTION_PTR, KAPI_DIR_IN, + "Callback function") + KAPI_PARAM_CALLBACK(1, "int (*)(void *data)", "data") + +Troubleshooting +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Common Issues +------------- + +**Specification Not Found**:: + + kernel: KAPI: Specification for 'my_api' not found + + Solution: Ensure KAPI_DEFINE_SPEC is in the same translation unit + as the function implementation. + +**Validation Failures**:: + + kernel: KAPI: Validation failed for kmalloc parameter 'size': + value 5242880 exceeds maximum 4194304 + + Solution: Check parameter constraints or adjust specification if + the constraint is incorrect. + +**Build Errors**:: + + error: 'KAPI_TYPE_UNKNOWN' undeclared + + Solution: Include and ensure + CONFIG_KAPI_SPEC is enabled. + +Debug Options +------------- + +Enable verbose debugging:: + + echo 8 > /proc/sys/kernel/printk + echo 1 > /sys/kernel/debug/kapi/debug/verbose + +Future Directions +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Planned Features +---------------- + +1. **Automatic Extraction**: Tool to extract specifications from existing + kernel-doc comments + +2. **Contract Verification**: Static analysis to verify implementation + matches specification + +3. **Performance Profiling**: Measure actual API performance against + documented expectations + +4. **Fuzzing Integration**: Use specifications to guide intelligent + fuzzing of kernel APIs + +5. **Version Compatibility**: Track API changes across kernel versions + +Research Areas +-------------- + +1. **Formal Verification**: Use specifications for mathematical proofs + of correctness + +2. **Runtime Monitoring**: Detect specification violations in production + with minimal overhead + +3. **API Evolution**: Analyze how kernel APIs change over time + +4. **Security Applications**: Use specifications for security policy + enforcement + +Contributing +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Submitting Specifications +------------------------- + +1. Add specifications to the same file as the API implementation +2. Follow existing patterns and naming conventions +3. Test with CONFIG_KAPI_RUNTIME_CHECKS enabled +4. Verify debugfs output is correct +5. Run scripts/checkpatch.pl on your changes + +Review Criteria +--------------- + +Specifications will be reviewed for: + +1. **Completeness**: All parameters and errors documented +2. **Accuracy**: Specification matches implementation +3. **Clarity**: Descriptions are clear and helpful +4. **Consistency**: Follows framework conventions +5. **Performance**: No unnecessary runtime overhead + +Contact +------- + +- Maintainer: Sasha Levin diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa163..7a2cb663131bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13047,6 +13047,15 @@ W: https://linuxtv.org T: git git://linuxtv.org/media.git F: drivers/media/radio/radio-keene* =20 +KERNEL API SPECIFICATION FRAMEWORK (KAPI) +M: Sasha Levin +L: linux-api@vger.kernel.org +S: Maintained +F: Documentation/admin-guide/kernel-api-spec.rst +F: include/linux/kernel_api_spec.h +F: kernel/api/ +F: scripts/extract-kapi-spec.sh + KERNEL AUTOMOUNTER M: Ian Kent L: autofs@vger.kernel.org diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index a36b7918a011a..283ab11788d8c 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -102,6 +102,9 @@ SECTIONS init.data : { INIT_DATA } __init_end =3D .; =20 + /* Kernel API specifications in dedicated section */ + KAPI_SPECS_SECTION() + /* Ensure the __preinit_array_start label is properly aligned. We could instead move the label definition inside the section, but the linker would then create the section even if it turns out to diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index a409d4b66114f..e3850d8293436 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -74,6 +74,9 @@ SECTIONS init.data : { INIT_DATA } __init_end =3D .; =20 + /* Kernel API specifications in dedicated section */ + KAPI_SPECS_SECTION() + .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4fa0be732af10..8cc508adc9d51 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -173,6 +173,9 @@ SECTIONS RO_DATA(PAGE_SIZE) X86_ALIGN_RODATA_END =20 + /* Kernel API specifications in dedicated section */ + KAPI_SPECS_SECTION() + /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Start of data section */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinu= x.lds.h index fa5f19b8d53a0..7b47736057e01 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -279,6 +279,26 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPEL= LER_CLANG) #define TRACE_SYSCALLS() #endif =20 +#ifdef CONFIG_KAPI_SPEC +#define KAPI_SPECS() \ + . =3D ALIGN(8); \ + __start_kapi_specs =3D .; \ + KEEP(*(.kapi_specs)) \ + __stop_kapi_specs =3D .; + +/* For placing KAPI specs in a dedicated section */ +#define KAPI_SPECS_SECTION() \ + .kapi_specs : AT(ADDR(.kapi_specs) - LOAD_OFFSET) { \ + . =3D ALIGN(8); \ + __start_kapi_specs =3D .; \ + KEEP(*(.kapi_specs)) \ + __stop_kapi_specs =3D .; \ + } +#else +#define KAPI_SPECS() +#define KAPI_SPECS_SECTION() +#endif + #ifdef CONFIG_BPF_EVENTS #define BPF_RAW_TP() STRUCT_ALIGN(); \ BOUNDED_SECTION_BY(__bpf_raw_tp_map, __bpf_raw_tp) diff --git a/include/linux/kernel_api_spec.h b/include/linux/kernel_api_spe= c.h new file mode 100644 index 0000000000000..04df5892bc6d6 --- /dev/null +++ b/include/linux/kernel_api_spec.h @@ -0,0 +1,942 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kernel_api_spec.h - Kernel API Formal Specification Framework + * + * This framework provides structures and macros to formally specify kerne= l APIs + * in both human and machine-readable formats. It supports comprehensive d= ocumentation + * of function signatures, parameters, return values, error conditions, an= d constraints. + */ + +#ifndef _LINUX_KERNEL_API_SPEC_H +#define _LINUX_KERNEL_API_SPEC_H + +#include +#include +#include + +#define KAPI_MAX_PARAMS 16 +#define KAPI_MAX_ERRORS 32 +#define KAPI_MAX_CONSTRAINTS 16 +#define KAPI_MAX_SIGNALS 32 +#define KAPI_MAX_NAME_LEN 128 +#define KAPI_MAX_DESC_LEN 512 + +/** + * enum kapi_param_type - Parameter type classification + * @KAPI_TYPE_VOID: void type + * @KAPI_TYPE_INT: Integer types (int, long, etc.) + * @KAPI_TYPE_UINT: Unsigned integer types + * @KAPI_TYPE_PTR: Pointer types + * @KAPI_TYPE_STRUCT: Structure types + * @KAPI_TYPE_UNION: Union types + * @KAPI_TYPE_ENUM: Enumeration types + * @KAPI_TYPE_FUNC_PTR: Function pointer types + * @KAPI_TYPE_ARRAY: Array types + * @KAPI_TYPE_FD: File descriptor - validated in process context + * @KAPI_TYPE_USER_PTR: User space pointer - validated for access and size + * @KAPI_TYPE_PATH: Pathname - validated for access and path limits + * @KAPI_TYPE_CUSTOM: Custom/complex types + */ +enum kapi_param_type { + KAPI_TYPE_VOID =3D 0, + KAPI_TYPE_INT, + KAPI_TYPE_UINT, + KAPI_TYPE_PTR, + KAPI_TYPE_STRUCT, + KAPI_TYPE_UNION, + KAPI_TYPE_ENUM, + KAPI_TYPE_FUNC_PTR, + KAPI_TYPE_ARRAY, + KAPI_TYPE_FD, /* File descriptor - validated in process context */ + KAPI_TYPE_USER_PTR, /* User space pointer - validated for access and size= */ + KAPI_TYPE_PATH, /* Pathname - validated for access and path limits */ + KAPI_TYPE_CUSTOM, +}; + +/** + * enum kapi_param_flags - Parameter attribute flags + * @KAPI_PARAM_IN: Input parameter + * @KAPI_PARAM_OUT: Output parameter + * @KAPI_PARAM_INOUT: Input/output parameter + * @KAPI_PARAM_OPTIONAL: Optional parameter (can be NULL) + * @KAPI_PARAM_CONST: Const qualified parameter + * @KAPI_PARAM_VOLATILE: Volatile qualified parameter + * @KAPI_PARAM_USER: User space pointer + * @KAPI_PARAM_DMA: DMA-capable memory required + * @KAPI_PARAM_ALIGNED: Alignment requirements + */ +enum kapi_param_flags { + KAPI_PARAM_IN =3D (1 << 0), + KAPI_PARAM_OUT =3D (1 << 1), + KAPI_PARAM_INOUT =3D (1 << 2), + KAPI_PARAM_OPTIONAL =3D (1 << 3), + KAPI_PARAM_CONST =3D (1 << 4), + KAPI_PARAM_VOLATILE =3D (1 << 5), + KAPI_PARAM_USER =3D (1 << 6), + KAPI_PARAM_DMA =3D (1 << 7), + KAPI_PARAM_ALIGNED =3D (1 << 8), +}; + +/** + * enum kapi_context_flags - Function execution context flags + * @KAPI_CTX_PROCESS: Can be called from process context + * @KAPI_CTX_SOFTIRQ: Can be called from softirq context + * @KAPI_CTX_HARDIRQ: Can be called from hardirq context + * @KAPI_CTX_NMI: Can be called from NMI context + * @KAPI_CTX_ATOMIC: Must be called in atomic context + * @KAPI_CTX_SLEEPABLE: May sleep + * @KAPI_CTX_PREEMPT_DISABLED: Requires preemption disabled + * @KAPI_CTX_IRQ_DISABLED: Requires interrupts disabled + */ +enum kapi_context_flags { + KAPI_CTX_PROCESS =3D (1 << 0), + KAPI_CTX_SOFTIRQ =3D (1 << 1), + KAPI_CTX_HARDIRQ =3D (1 << 2), + KAPI_CTX_NMI =3D (1 << 3), + KAPI_CTX_ATOMIC =3D (1 << 4), + KAPI_CTX_SLEEPABLE =3D (1 << 5), + KAPI_CTX_PREEMPT_DISABLED =3D (1 << 6), + KAPI_CTX_IRQ_DISABLED =3D (1 << 7), +}; + +/** + * enum kapi_lock_type - Lock types used/required by the function + * @KAPI_LOCK_NONE: No locking requirements + * @KAPI_LOCK_MUTEX: Mutex lock + * @KAPI_LOCK_SPINLOCK: Spinlock + * @KAPI_LOCK_RWLOCK: Read-write lock + * @KAPI_LOCK_SEQLOCK: Sequence lock + * @KAPI_LOCK_RCU: RCU lock + * @KAPI_LOCK_SEMAPHORE: Semaphore + * @KAPI_LOCK_CUSTOM: Custom locking mechanism + */ +enum kapi_lock_type { + KAPI_LOCK_NONE =3D 0, + KAPI_LOCK_MUTEX, + KAPI_LOCK_SPINLOCK, + KAPI_LOCK_RWLOCK, + KAPI_LOCK_SEQLOCK, + KAPI_LOCK_RCU, + KAPI_LOCK_SEMAPHORE, + KAPI_LOCK_CUSTOM, +}; + +/** + * enum kapi_constraint_type - Types of parameter constraints + * @KAPI_CONSTRAINT_NONE: No constraint + * @KAPI_CONSTRAINT_RANGE: Numeric range constraint + * @KAPI_CONSTRAINT_MASK: Bitmask constraint + * @KAPI_CONSTRAINT_ENUM: Enumerated values constraint + * @KAPI_CONSTRAINT_CUSTOM: Custom validation function + */ +enum kapi_constraint_type { + KAPI_CONSTRAINT_NONE =3D 0, + KAPI_CONSTRAINT_RANGE, + KAPI_CONSTRAINT_MASK, + KAPI_CONSTRAINT_ENUM, + KAPI_CONSTRAINT_CUSTOM, +}; + +/** + * struct kapi_param_spec - Parameter specification + * @name: Parameter name + * @type_name: Type name as string + * @type: Parameter type classification + * @flags: Parameter attribute flags + * @size: Size in bytes (for arrays/buffers) + * @alignment: Required alignment + * @min_value: Minimum valid value (for numeric types) + * @max_value: Maximum valid value (for numeric types) + * @valid_mask: Valid bits mask (for flag parameters) + * @enum_values: Array of valid enumerated values + * @enum_count: Number of valid enumerated values + * @constraint_type: Type of constraint applied + * @validate: Custom validation function + * @description: Human-readable description + * @constraints: Additional constraints description + * @size_param_idx: Index of parameter that determines size (-1 if fixed s= ize) + * @size_multiplier: Multiplier for size calculation (e.g., sizeof(struct)) + */ +struct kapi_param_spec { + char name[KAPI_MAX_NAME_LEN]; + char type_name[KAPI_MAX_NAME_LEN]; + enum kapi_param_type type; + u32 flags; + size_t size; + size_t alignment; + s64 min_value; + s64 max_value; + u64 valid_mask; + const s64 *enum_values; + u32 enum_count; + enum kapi_constraint_type constraint_type; + bool (*validate)(s64 value); + char description[KAPI_MAX_DESC_LEN]; + char constraints[KAPI_MAX_DESC_LEN]; + int size_param_idx; /* Index of param that determines size, -1 if N/A */ + size_t size_multiplier; /* Size per unit (e.g., sizeof(struct epoll_event= )) */ +} __attribute__((packed)); + +/** + * struct kapi_error_spec - Error condition specification + * @error_code: Error code value + * @name: Error code name (e.g., "EINVAL") + * @condition: Condition that triggers this error + * @description: Detailed error description + */ +struct kapi_error_spec { + int error_code; + char name[KAPI_MAX_NAME_LEN]; + char condition[KAPI_MAX_DESC_LEN]; + char description[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +/** + * enum kapi_return_check_type - Return value check types + * @KAPI_RETURN_EXACT: Success is an exact value + * @KAPI_RETURN_RANGE: Success is within a range + * @KAPI_RETURN_ERROR_CHECK: Success is when NOT in error list + * @KAPI_RETURN_FD: Return value is a file descriptor (>=3D 0 is success) + * @KAPI_RETURN_CUSTOM: Custom validation function + */ +enum kapi_return_check_type { + KAPI_RETURN_EXACT, + KAPI_RETURN_RANGE, + KAPI_RETURN_ERROR_CHECK, + KAPI_RETURN_FD, + KAPI_RETURN_CUSTOM, +}; + +/** + * struct kapi_return_spec - Return value specification + * @type_name: Return type name + * @type: Return type classification + * @check_type: Type of success check to perform + * @success_value: Exact value indicating success (for EXACT) + * @success_min: Minimum success value (for RANGE) + * @success_max: Maximum success value (for RANGE) + * @error_values: Array of error values (for ERROR_CHECK) + * @error_count: Number of error values + * @is_success: Custom function to check success + * @description: Return value description + */ +struct kapi_return_spec { + char type_name[KAPI_MAX_NAME_LEN]; + enum kapi_param_type type; + enum kapi_return_check_type check_type; + s64 success_value; + s64 success_min; + s64 success_max; + const s64 *error_values; + u32 error_count; + bool (*is_success)(s64 retval); + char description[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +/** + * struct kapi_lock_spec - Lock requirement specification + * @lock_name: Name of the lock + * @lock_type: Type of lock + * @acquired: Whether function acquires this lock + * @released: Whether function releases this lock + * @held_on_entry: Whether lock must be held on entry + * @held_on_exit: Whether lock is held on exit + * @description: Additional lock requirements + */ +struct kapi_lock_spec { + char lock_name[KAPI_MAX_NAME_LEN]; + enum kapi_lock_type lock_type; + bool acquired; + bool released; + bool held_on_entry; + bool held_on_exit; + char description[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +/** + * struct kapi_constraint_spec - Additional constraint specification + * @name: Constraint name + * @description: Constraint description + * @expression: Formal expression (if applicable) + */ +struct kapi_constraint_spec { + char name[KAPI_MAX_NAME_LEN]; + char description[KAPI_MAX_DESC_LEN]; + char expression[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +/** + * enum kapi_signal_direction - Signal flow direction + * @KAPI_SIGNAL_RECEIVE: Function may receive this signal + * @KAPI_SIGNAL_SEND: Function may send this signal + * @KAPI_SIGNAL_HANDLE: Function handles this signal specially + * @KAPI_SIGNAL_BLOCK: Function blocks this signal + * @KAPI_SIGNAL_IGNORE: Function ignores this signal + */ +enum kapi_signal_direction { + KAPI_SIGNAL_RECEIVE =3D (1 << 0), + KAPI_SIGNAL_SEND =3D (1 << 1), + KAPI_SIGNAL_HANDLE =3D (1 << 2), + KAPI_SIGNAL_BLOCK =3D (1 << 3), + KAPI_SIGNAL_IGNORE =3D (1 << 4), +}; + +/** + * enum kapi_signal_action - What the function does with the signal + * @KAPI_SIGNAL_ACTION_DEFAULT: Default signal action applies + * @KAPI_SIGNAL_ACTION_TERMINATE: Causes termination + * @KAPI_SIGNAL_ACTION_COREDUMP: Causes termination with core dump + * @KAPI_SIGNAL_ACTION_STOP: Stops the process + * @KAPI_SIGNAL_ACTION_CONTINUE: Continues a stopped process + * @KAPI_SIGNAL_ACTION_CUSTOM: Custom handling described in notes + * @KAPI_SIGNAL_ACTION_RETURN: Returns from syscall with EINTR + * @KAPI_SIGNAL_ACTION_RESTART: Restarts the syscall + */ +enum kapi_signal_action { + KAPI_SIGNAL_ACTION_DEFAULT =3D 0, + KAPI_SIGNAL_ACTION_TERMINATE, + KAPI_SIGNAL_ACTION_COREDUMP, + KAPI_SIGNAL_ACTION_STOP, + KAPI_SIGNAL_ACTION_CONTINUE, + KAPI_SIGNAL_ACTION_CUSTOM, + KAPI_SIGNAL_ACTION_RETURN, + KAPI_SIGNAL_ACTION_RESTART, +}; + +/** + * struct kapi_signal_spec - Signal specification + * @signal_num: Signal number (e.g., SIGKILL, SIGTERM) + * @signal_name: Signal name as string + * @direction: Direction flags (OR of kapi_signal_direction) + * @action: What happens when signal is received + * @target: Description of target process/thread for sent signals + * @condition: Condition under which signal is sent/received/handled + * @description: Detailed description of signal handling + * @restartable: Whether syscall is restartable after this signal + */ +struct kapi_signal_spec { + int signal_num; + char signal_name[32]; + u32 direction; + enum kapi_signal_action action; + char target[KAPI_MAX_DESC_LEN]; + char condition[KAPI_MAX_DESC_LEN]; + char description[KAPI_MAX_DESC_LEN]; + bool restartable; +} __attribute__((packed)); + +/** + * struct kapi_signal_mask_spec - Signal mask specification + * @mask_name: Name of the signal mask + * @signals: Array of signal numbers in the mask + * @signal_count: Number of signals in the mask + * @description: Description of what this mask represents + */ +struct kapi_signal_mask_spec { + char mask_name[KAPI_MAX_NAME_LEN]; + int signals[KAPI_MAX_SIGNALS]; + u32 signal_count; + char description[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +/** + * struct kapi_struct_field - Structure field specification + * @name: Field name + * @type: Field type classification + * @type_name: Type name as string + * @offset: Offset within structure + * @size: Size of field in bytes + * @flags: Field attribute flags + * @constraint_type: Type of constraint applied + * @min_value: Minimum valid value (for numeric types) + * @max_value: Maximum valid value (for numeric types) + * @valid_mask: Valid bits mask (for flag fields) + * @description: Field description + */ +struct kapi_struct_field { + char name[KAPI_MAX_NAME_LEN]; + enum kapi_param_type type; + char type_name[KAPI_MAX_NAME_LEN]; + size_t offset; + size_t size; + u32 flags; + enum kapi_constraint_type constraint_type; + s64 min_value; + s64 max_value; + u64 valid_mask; + char description[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +/** + * struct kapi_struct_spec - Structure type specification + * @name: Structure name + * @size: Total size of structure + * @alignment: Required alignment + * @field_count: Number of fields + * @fields: Field specifications + * @description: Structure description + */ +struct kapi_struct_spec { + char name[KAPI_MAX_NAME_LEN]; + size_t size; + size_t alignment; + u32 field_count; + struct kapi_struct_field fields[KAPI_MAX_PARAMS]; + char description[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +/** + * enum kapi_side_effect_type - Types of side effects + * @KAPI_EFFECT_NONE: No side effects + * @KAPI_EFFECT_ALLOC_MEMORY: Allocates memory + * @KAPI_EFFECT_FREE_MEMORY: Frees memory + * @KAPI_EFFECT_MODIFY_STATE: Modifies global/shared state + * @KAPI_EFFECT_SIGNAL_SEND: Sends signals + * @KAPI_EFFECT_FILE_POSITION: Modifies file position + * @KAPI_EFFECT_LOCK_ACQUIRE: Acquires locks + * @KAPI_EFFECT_LOCK_RELEASE: Releases locks + * @KAPI_EFFECT_RESOURCE_CREATE: Creates system resources (FDs, PIDs, etc) + * @KAPI_EFFECT_RESOURCE_DESTROY: Destroys system resources + * @KAPI_EFFECT_SCHEDULE: May cause scheduling/context switch + * @KAPI_EFFECT_HARDWARE: Interacts with hardware + * @KAPI_EFFECT_NETWORK: Network I/O operation + * @KAPI_EFFECT_FILESYSTEM: Filesystem modification + * @KAPI_EFFECT_PROCESS_STATE: Modifies process state + */ +enum kapi_side_effect_type { + KAPI_EFFECT_NONE =3D 0, + KAPI_EFFECT_ALLOC_MEMORY =3D (1 << 0), + KAPI_EFFECT_FREE_MEMORY =3D (1 << 1), + KAPI_EFFECT_MODIFY_STATE =3D (1 << 2), + KAPI_EFFECT_SIGNAL_SEND =3D (1 << 3), + KAPI_EFFECT_FILE_POSITION =3D (1 << 4), + KAPI_EFFECT_LOCK_ACQUIRE =3D (1 << 5), + KAPI_EFFECT_LOCK_RELEASE =3D (1 << 6), + KAPI_EFFECT_RESOURCE_CREATE =3D (1 << 7), + KAPI_EFFECT_RESOURCE_DESTROY =3D (1 << 8), + KAPI_EFFECT_SCHEDULE =3D (1 << 9), + KAPI_EFFECT_HARDWARE =3D (1 << 10), + KAPI_EFFECT_NETWORK =3D (1 << 11), + KAPI_EFFECT_FILESYSTEM =3D (1 << 12), + KAPI_EFFECT_PROCESS_STATE =3D (1 << 13), +}; + +/** + * struct kapi_side_effect - Side effect specification + * @type: Bitmask of effect types + * @target: What is affected (e.g., "process memory", "file descriptor tab= le") + * @condition: Condition under which effect occurs + * @description: Detailed description of the effect + * @reversible: Whether the effect can be undone + */ +struct kapi_side_effect { + u32 type; + char target[KAPI_MAX_NAME_LEN]; + char condition[KAPI_MAX_DESC_LEN]; + char description[KAPI_MAX_DESC_LEN]; + bool reversible; +} __attribute__((packed)); + +/** + * struct kapi_state_transition - State transition specification + * @from_state: Starting state description + * @to_state: Ending state description + * @condition: Condition for transition + * @object: Object whose state changes + * @description: Detailed description + */ +struct kapi_state_transition { + char from_state[KAPI_MAX_NAME_LEN]; + char to_state[KAPI_MAX_NAME_LEN]; + char condition[KAPI_MAX_DESC_LEN]; + char object[KAPI_MAX_NAME_LEN]; + char description[KAPI_MAX_DESC_LEN]; +} __attribute__((packed)); + +#define KAPI_MAX_STRUCT_SPECS 8 +#define KAPI_MAX_SIDE_EFFECTS 16 +#define KAPI_MAX_STATE_TRANS 8 + +/** + * struct kernel_api_spec - Complete kernel API specification + * @name: Function name + * @version: API version + * @description: Brief description + * @long_description: Detailed description + * @context_flags: Execution context flags + * @param_count: Number of parameters + * @params: Parameter specifications + * @return_spec: Return value specification + * @error_count: Number of possible errors + * @errors: Error specifications + * @lock_count: Number of lock specifications + * @locks: Lock requirement specifications + * @constraint_count: Number of additional constraints + * @constraints: Additional constraint specifications + * @examples: Usage examples + * @notes: Additional notes + * @since_version: Kernel version when introduced + * @deprecated: Whether API is deprecated + * @replacement: Replacement API if deprecated + * @signal_count: Number of signal specifications + * @signals: Signal handling specifications + * @signal_mask_count: Number of signal mask specifications + * @signal_masks: Signal mask specifications + * @struct_spec_count: Number of structure specifications + * @struct_specs: Structure type specifications + * @side_effect_count: Number of side effect specifications + * @side_effects: Side effect specifications + * @state_trans_count: Number of state transition specifications + * @state_transitions: State transition specifications + */ +struct kernel_api_spec { + char name[KAPI_MAX_NAME_LEN]; + u32 version; + char description[KAPI_MAX_DESC_LEN]; + char long_description[KAPI_MAX_DESC_LEN * 4]; + u32 context_flags; + + /* Parameters */ + u32 param_count; + struct kapi_param_spec params[KAPI_MAX_PARAMS]; + + /* Return value */ + struct kapi_return_spec return_spec; + + /* Errors */ + u32 error_count; + struct kapi_error_spec errors[KAPI_MAX_ERRORS]; + + /* Locking */ + u32 lock_count; + struct kapi_lock_spec locks[KAPI_MAX_CONSTRAINTS]; + + /* Constraints */ + u32 constraint_count; + struct kapi_constraint_spec constraints[KAPI_MAX_CONSTRAINTS]; + + /* Additional information */ + char examples[KAPI_MAX_DESC_LEN * 2]; + char notes[KAPI_MAX_DESC_LEN]; + char since_version[32]; + bool deprecated; + char replacement[KAPI_MAX_NAME_LEN]; + + /* Signal specifications */ + u32 signal_count; + struct kapi_signal_spec signals[KAPI_MAX_SIGNALS]; + + /* Signal mask specifications */ + u32 signal_mask_count; + struct kapi_signal_mask_spec signal_masks[KAPI_MAX_SIGNALS]; + + /* Structure specifications */ + u32 struct_spec_count; + struct kapi_struct_spec struct_specs[KAPI_MAX_STRUCT_SPECS]; + + /* Side effects */ + u32 side_effect_count; + struct kapi_side_effect side_effects[KAPI_MAX_SIDE_EFFECTS]; + + /* State transitions */ + u32 state_trans_count; + struct kapi_state_transition state_transitions[KAPI_MAX_STATE_TRANS]; +} __attribute__((packed)); + +/* Macros for defining API specifications */ + +/** + * DEFINE_KERNEL_API_SPEC - Define a kernel API specification + * @func_name: Function name to specify + */ +#define DEFINE_KERNEL_API_SPEC(func_name) \ + static struct kernel_api_spec __kapi_spec_##func_name \ + __used __section(".kapi_specs") =3D { \ + .name =3D __stringify(func_name), \ + .version =3D 1, + +#define KAPI_END_SPEC }; + +/** + * KAPI_DESCRIPTION - Set API description + * @desc: Description string + */ +#define KAPI_DESCRIPTION(desc) \ + .description =3D desc, + +/** + * KAPI_LONG_DESC - Set detailed API description + * @desc: Detailed description string + */ +#define KAPI_LONG_DESC(desc) \ + .long_description =3D desc, + +/** + * KAPI_CONTEXT - Set execution context flags + * @flags: Context flags (OR'ed KAPI_CTX_* values) + */ +#define KAPI_CONTEXT(flags) \ + .context_flags =3D flags, + +/** + * KAPI_PARAM - Define a parameter specification + * @idx: Parameter index (0-based) + * @pname: Parameter name + * @ptype: Type name string + * @pdesc: Parameter description + */ +#define KAPI_PARAM(idx, pname, ptype, pdesc) \ + .params[idx] =3D { \ + .name =3D pname, \ + .type_name =3D ptype, \ + .description =3D pdesc, \ + .size_param_idx =3D -1, /* Default: no dynamic sizing */ + +#define KAPI_PARAM_FLAGS(pflags) \ + .flags =3D pflags, + +#define KAPI_PARAM_SIZE(psize) \ + .size =3D psize, + +#define KAPI_PARAM_RANGE(pmin, pmax) \ + .min_value =3D pmin, \ + .max_value =3D pmax, + +#define KAPI_PARAM_END }, + +/** + * KAPI_RETURN - Define return value specification + * @rtype: Return type name + * @rdesc: Return value description + */ +#define KAPI_RETURN(rtype, rdesc) \ + .return_spec =3D { \ + .type_name =3D rtype, \ + .description =3D rdesc, + +#define KAPI_RETURN_SUCCESS(val) \ + .success_value =3D val, + +#define KAPI_RETURN_END }, + +/** + * KAPI_ERROR - Define an error condition + * @idx: Error index + * @ecode: Error code value + * @ename: Error name + * @econd: Error condition + * @edesc: Error description + */ +#define KAPI_ERROR(idx, ecode, ename, econd, edesc) \ + .errors[idx] =3D { \ + .error_code =3D ecode, \ + .name =3D ename, \ + .condition =3D econd, \ + .description =3D edesc, \ + }, + +/** + * KAPI_LOCK - Define a lock requirement + * @idx: Lock index + * @lname: Lock name + * @ltype: Lock type + */ +#define KAPI_LOCK(idx, lname, ltype) \ + .locks[idx] =3D { \ + .lock_name =3D lname, \ + .lock_type =3D ltype, + +#define KAPI_LOCK_ACQUIRED \ + .acquired =3D true, + +#define KAPI_LOCK_RELEASED \ + .released =3D true, + +#define KAPI_LOCK_HELD_ENTRY \ + .held_on_entry =3D true, + +#define KAPI_LOCK_HELD_EXIT \ + .held_on_exit =3D true, + +#define KAPI_LOCK_DESC(ldesc) \ + .description =3D ldesc, + +#define KAPI_LOCK_END }, + +/** + * KAPI_CONSTRAINT - Define an additional constraint + * @idx: Constraint index + * @cname: Constraint name + * @cdesc: Constraint description + */ +#define KAPI_CONSTRAINT(idx, cname, cdesc) \ + .constraints[idx] =3D { \ + .name =3D cname, \ + .description =3D cdesc, + +#define KAPI_CONSTRAINT_EXPR(expr) \ + .expression =3D expr, + +#define KAPI_CONSTRAINT_END }, + +/** + * KAPI_SIGNAL - Define a signal specification + * @idx: Signal index + * @signum: Signal number (e.g., SIGKILL) + * @signame: Signal name string + * @dir: Direction flags + * @act: Action taken + */ +#define KAPI_SIGNAL(idx, signum, signame, dir, act) \ + .signals[idx] =3D { \ + .signal_num =3D signum, \ + .signal_name =3D signame, \ + .direction =3D dir, \ + .action =3D act, + +#define KAPI_SIGNAL_TARGET(tgt) \ + .target =3D tgt, + +#define KAPI_SIGNAL_CONDITION(cond) \ + .condition =3D cond, + +#define KAPI_SIGNAL_DESC(desc) \ + .description =3D desc, + +#define KAPI_SIGNAL_RESTARTABLE \ + .restartable =3D true, + +#define KAPI_SIGNAL_END }, + +/** + * KAPI_SIGNAL_MASK - Define a signal mask specification + * @idx: Mask index + * @name: Mask name + * @desc: Mask description + */ +#define KAPI_SIGNAL_MASK(idx, name, desc) \ + .signal_masks[idx] =3D { \ + .mask_name =3D name, \ + .description =3D desc, + +#define KAPI_SIGNAL_MASK_ADD(signum) \ + .signals[.signal_count++] =3D signum, + +#define KAPI_SIGNAL_MASK_END }, + +/** + * KAPI_STRUCT_SPEC - Define a structure specification + * @idx: Structure spec index + * @sname: Structure name + * @sdesc: Structure description + */ +#define KAPI_STRUCT_SPEC(idx, sname, sdesc) \ + .struct_specs[idx] =3D { \ + .name =3D #sname, \ + .description =3D sdesc, + +#define KAPI_STRUCT_SIZE(ssize, salign) \ + .size =3D ssize, \ + .alignment =3D salign, + +#define KAPI_STRUCT_FIELD_COUNT(n) \ + .field_count =3D n, + +/** + * KAPI_STRUCT_FIELD - Define a structure field + * @fidx: Field index + * @fname: Field name + * @ftype: Field type (KAPI_TYPE_*) + * @ftype_name: Type name as string + * @fdesc: Field description + */ +#define KAPI_STRUCT_FIELD(fidx, fname, ftype, ftype_name, fdesc) \ + .fields[fidx] =3D { \ + .name =3D fname, \ + .type =3D ftype, \ + .type_name =3D ftype_name, \ + .description =3D fdesc, + +#define KAPI_FIELD_OFFSET(foffset) \ + .offset =3D foffset, + +#define KAPI_FIELD_SIZE(fsize) \ + .size =3D fsize, + +#define KAPI_FIELD_FLAGS(fflags) \ + .flags =3D fflags, + +#define KAPI_FIELD_CONSTRAINT_RANGE(min, max) \ + .constraint_type =3D KAPI_CONSTRAINT_RANGE, \ + .min_value =3D min, \ + .max_value =3D max, + +#define KAPI_FIELD_CONSTRAINT_MASK(mask) \ + .constraint_type =3D KAPI_CONSTRAINT_MASK, \ + .valid_mask =3D mask, + +#define KAPI_FIELD_CONSTRAINT_ENUM(values, count) \ + .constraint_type =3D KAPI_CONSTRAINT_ENUM, \ + .enum_values =3D values, \ + .enum_count =3D count, + +#define KAPI_STRUCT_FIELD_END }, + +#define KAPI_STRUCT_SPEC_END }, + +/* Counter for structure specifications */ +#define KAPI_STRUCT_SPEC_COUNT(n) \ + .struct_spec_count =3D n, + +/** + * KAPI_SIDE_EFFECT - Define a side effect + * @idx: Side effect index + * @etype: Effect type bitmask (OR'ed KAPI_EFFECT_* values) + * @etarget: What is affected + * @edesc: Effect description + */ +#define KAPI_SIDE_EFFECT(idx, etype, etarget, edesc) \ + .side_effects[idx] =3D { \ + .type =3D etype, \ + .target =3D etarget, \ + .description =3D edesc, \ + .reversible =3D false, /* Default to non-reversible */ + +#define KAPI_EFFECT_CONDITION(cond) \ + .condition =3D cond, + +#define KAPI_EFFECT_REVERSIBLE \ + .reversible =3D true, + +#define KAPI_SIDE_EFFECT_END }, + +/** + * KAPI_STATE_TRANS - Define a state transition + * @idx: State transition index + * @obj: Object whose state changes + * @from: From state + * @to: To state + * @desc: Transition description + */ +#define KAPI_STATE_TRANS(idx, obj, from, to, desc) \ + .state_transitions[idx] =3D { \ + .object =3D obj, \ + .from_state =3D from, \ + .to_state =3D to, \ + .description =3D desc, + +#define KAPI_STATE_TRANS_COND(cond) \ + .condition =3D cond, + +#define KAPI_STATE_TRANS_END }, + +/* Counters for side effects and state transitions */ +#define KAPI_SIDE_EFFECT_COUNT(n) \ + .side_effect_count =3D n, + +#define KAPI_STATE_TRANS_COUNT(n) \ + .state_trans_count =3D n, + +/* Helper macros for common side effect patterns */ +#define KAPI_EFFECTS_MEMORY (KAPI_EFFECT_ALLOC_MEMORY | KAPI_EFFECT_FREE_M= EMORY) +#define KAPI_EFFECTS_LOCKING (KAPI_EFFECT_LOCK_ACQUIRE | KAPI_EFFECT_LOCK_= RELEASE) +#define KAPI_EFFECTS_RESOURCES (KAPI_EFFECT_RESOURCE_CREATE | KAPI_EFFECT_= RESOURCE_DESTROY) +#define KAPI_EFFECTS_IO (KAPI_EFFECT_NETWORK | KAPI_EFFECT_FILESYSTEM) + +/* Helper macros for common patterns */ + +#define KAPI_PARAM_IN (KAPI_PARAM_IN) +#define KAPI_PARAM_OUT (KAPI_PARAM_OUT) +#define KAPI_PARAM_INOUT (KAPI_PARAM_IN | KAPI_PARAM_OUT) +#define KAPI_PARAM_OPTIONAL (KAPI_PARAM_OPTIONAL) +#define KAPI_PARAM_USER_PTR (KAPI_PARAM_USER | KAPI_PARAM_PTR) + +/* Validation and runtime checking */ + +#ifdef CONFIG_KAPI_RUNTIME_CHECKS +bool kapi_validate_params(const struct kernel_api_spec *spec, ...); +bool kapi_validate_param(const struct kapi_param_spec *param_spec, s64 val= ue); +bool kapi_validate_param_with_context(const struct kapi_param_spec *param_= spec, + s64 value, const s64 *all_params, int param_count); +int kapi_validate_syscall_param(const struct kernel_api_spec *spec, + int param_idx, s64 value); +int kapi_validate_syscall_params(const struct kernel_api_spec *spec, + const s64 *params, int param_count); +bool kapi_check_return_success(const struct kapi_return_spec *return_spec,= s64 retval); +bool kapi_validate_return_value(const struct kernel_api_spec *spec, s64 re= tval); +int kapi_validate_syscall_return(const struct kernel_api_spec *spec, s64 r= etval); +void kapi_check_context(const struct kernel_api_spec *spec); +void kapi_check_locks(const struct kernel_api_spec *spec); +#else +static inline bool kapi_validate_params(const struct kernel_api_spec *spec= , ...) +{ + return true; +} +static inline bool kapi_validate_param(const struct kapi_param_spec *param= _spec, s64 value) +{ + return true; +} +static inline bool kapi_validate_param_with_context(const struct kapi_para= m_spec *param_spec, + s64 value, const s64 *all_params, int param_count) +{ + return true; +} +static inline int kapi_validate_syscall_param(const struct kernel_api_spec= *spec, + int param_idx, s64 value) +{ + return 0; +} +static inline int kapi_validate_syscall_params(const struct kernel_api_spe= c *spec, + const s64 *params, int param_count) +{ + return 0; +} +static inline bool kapi_check_return_success(const struct kapi_return_spec= *return_spec, s64 retval) +{ + return true; +} +static inline bool kapi_validate_return_value(const struct kernel_api_spec= *spec, s64 retval) +{ + return true; +} +static inline int kapi_validate_syscall_return(const struct kernel_api_spe= c *spec, s64 retval) +{ + return 0; +} +static inline void kapi_check_context(const struct kernel_api_spec *spec) = {} +static inline void kapi_check_locks(const struct kernel_api_spec *spec) {} +#endif + +/* Export/query functions */ +const struct kernel_api_spec *kapi_get_spec(const char *name); +int kapi_export_json(const struct kernel_api_spec *spec, char *buf, size_t= size); +int kapi_export_xml(const struct kernel_api_spec *spec, char *buf, size_t = size); +void kapi_print_spec(const struct kernel_api_spec *spec); + +/* Registration for dynamic APIs */ +int kapi_register_spec(struct kernel_api_spec *spec); +void kapi_unregister_spec(const char *name); + +/* Helper to get parameter constraint info */ +static inline bool kapi_get_param_constraint(const char *api_name, int par= am_idx, + enum kapi_constraint_type *type, + u64 *valid_mask, s64 *min_val, s64 *max_val) +{ + const struct kernel_api_spec *spec =3D kapi_get_spec(api_name); + + if (!spec || param_idx >=3D spec->param_count) + return false; + + if (type) + *type =3D spec->params[param_idx].constraint_type; + if (valid_mask) + *valid_mask =3D spec->params[param_idx].valid_mask; + if (min_val) + *min_val =3D spec->params[param_idx].min_value; + if (max_val) + *max_val =3D spec->params[param_idx].max_value; + + return true; +} + +#endif /* _LINUX_KERNEL_API_SPEC_H */ \ No newline at end of file diff --git a/include/linux/syscall_api_spec.h b/include/linux/syscall_api_s= pec.h new file mode 100644 index 0000000000000..48ad95647dd39 --- /dev/null +++ b/include/linux/syscall_api_spec.h @@ -0,0 +1,341 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_api_spec.h - System Call API Specification Integration + * + * This header extends the SYSCALL_DEFINEX macros to support inline API sp= ecifications, + * allowing syscall documentation to be written alongside the implementati= on in a + * human-readable and machine-parseable format. + */ + +#ifndef _LINUX_SYSCALL_API_SPEC_H +#define _LINUX_SYSCALL_API_SPEC_H + +#include + +/* + * Extended SYSCALL_DEFINE macros with API specification support + * + * Usage example: + * + * SYSCALL_DEFINE_SPEC2(example, + * KAPI_DESCRIPTION("Example system call"), + * KAPI_LONG_DESC("This is a detailed description of the example sysca= ll"), + * KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE), + * + * KAPI_PARAM(0, "fd", "int", "File descriptor to operate on") + * KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + * KAPI_PARAM_RANGE(0, INT_MAX) + * KAPI_PARAM_END, + * + * KAPI_PARAM(1, "flags", "unsigned int", "Operation flags") + * KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + * KAPI_PARAM_END, + * + * KAPI_RETURN("long", "0 on success, negative error code on failure") + * KAPI_RETURN_SUCCESS(0, "=3D=3D 0") + * KAPI_RETURN_END, + * + * KAPI_ERROR(0, -EBADF, "EBADF", "fd is not a valid file descriptor", + * "The file descriptor is invalid or closed"), + * KAPI_ERROR(1, -EINVAL, "EINVAL", "flags contains invalid values", + * "Invalid flag combination specified"), + * + * .error_count =3D 2, + * .param_count =3D 2, + * + * int, fd, unsigned int, flags) + * { + * // Implementation here + * } + */ + +/* Helper to count parameters */ +#define __SYSCALL_PARAM_COUNT(...) __SYSCALL_PARAM_COUNT_I(__VA_ARGS__, 6,= 5, 4, 3, 2, 1, 0) +#define __SYSCALL_PARAM_COUNT_I(_1, _2, _3, _4, _5, _6, N, ...) N + +/* Extract syscall name from parameters */ +#define __SYSCALL_NAME(name, ...) name + +/* Generate API spec structure name */ +#define __SYSCALL_API_SPEC_NAME(name) __kapi_spec_sys_##name + +/* Helper to count syscall parameters (pairs of type, name) */ +#define __SYSCALL_ARG_COUNT(...) __SYSCALL_ARG_COUNT_I(__VA_ARGS__, 6, 6, = 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0) +#define __SYSCALL_ARG_COUNT_I(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11= , _12, N, ...) N + +/* Automatic syscall validation infrastructure */ +#ifdef CONFIG_KAPI_RUNTIME_CHECKS + +/* Helper to inject validation at the beginning of syscall */ +#define __KAPI_SYSCALL_VALIDATE_0(name) +#define __KAPI_SYSCALL_VALIDATE_1(name, t1, a1) \ + const struct kernel_api_spec *__spec =3D kapi_get_spec("sys_" #name); \ + if (__spec) { \ + s64 __params[1] =3D { (s64)(a1) }; \ + int __ret =3D kapi_validate_syscall_params(__spec, __params, 1); \ + if (__ret) return __ret; \ + } +#define __KAPI_SYSCALL_VALIDATE_2(name, t1, a1, t2, a2) \ + const struct kernel_api_spec *__spec =3D kapi_get_spec("sys_" #name); \ + if (__spec) { \ + s64 __params[2] =3D { (s64)(a1), (s64)(a2) }; \ + int __ret =3D kapi_validate_syscall_params(__spec, __params, 2); \ + if (__ret) return __ret; \ + } +#define __KAPI_SYSCALL_VALIDATE_3(name, t1, a1, t2, a2, t3, a3) \ + const struct kernel_api_spec *__spec =3D kapi_get_spec("sys_" #name); \ + if (__spec) { \ + s64 __params[3] =3D { (s64)(a1), (s64)(a2), (s64)(a3) }; \ + int __ret =3D kapi_validate_syscall_params(__spec, __params, 3); \ + if (__ret) return __ret; \ + } +#define __KAPI_SYSCALL_VALIDATE_4(name, t1, a1, t2, a2, t3, a3, t4, a4) \ + const struct kernel_api_spec *__spec =3D kapi_get_spec("sys_" #name); \ + if (__spec) { \ + s64 __params[4] =3D { (s64)(a1), (s64)(a2), (s64)(a3), (s64)(a4) }; \ + int __ret =3D kapi_validate_syscall_params(__spec, __params, 4); \ + if (__ret) return __ret; \ + } +#define __KAPI_SYSCALL_VALIDATE_5(name, t1, a1, t2, a2, t3, a3, t4, a4, t5= , a5) \ + const struct kernel_api_spec *__spec =3D kapi_get_spec("sys_" #name); \ + if (__spec) { \ + s64 __params[5] =3D { (s64)(a1), (s64)(a2), (s64)(a3), (s64)(a4), (s64)(= a5) }; \ + int __ret =3D kapi_validate_syscall_params(__spec, __params, 5); \ + if (__ret) return __ret; \ + } +#define __KAPI_SYSCALL_VALIDATE_6(name, t1, a1, t2, a2, t3, a3, t4, a4, t5= , a5, t6, a6) \ + const struct kernel_api_spec *__spec =3D kapi_get_spec("sys_" #name); \ + if (__spec) { \ + s64 __params[6] =3D { (s64)(a1), (s64)(a2), (s64)(a3), (s64)(a4), (s64)(= a5), (s64)(a6) }; \ + int __ret =3D kapi_validate_syscall_params(__spec, __params, 6); \ + if (__ret) return __ret; \ + } + +#else /* !CONFIG_KAPI_RUNTIME_CHECKS */ + +#define __KAPI_SYSCALL_VALIDATE_0(name) +#define __KAPI_SYSCALL_VALIDATE_1(name, t1, a1) +#define __KAPI_SYSCALL_VALIDATE_2(name, t1, a1, t2, a2) +#define __KAPI_SYSCALL_VALIDATE_3(name, t1, a1, t2, a2, t3, a3) +#define __KAPI_SYSCALL_VALIDATE_4(name, t1, a1, t2, a2, t3, a3, t4, a4) +#define __KAPI_SYSCALL_VALIDATE_5(name, t1, a1, t2, a2, t3, a3, t4, a4, t5= , a5) +#define __KAPI_SYSCALL_VALIDATE_6(name, t1, a1, t2, a2, t3, a3, t4, a4, t5= , a5, t6, a6) + +#endif /* CONFIG_KAPI_RUNTIME_CHECKS */ + +/* Helper to inject validation for return values */ +#ifdef CONFIG_KAPI_RUNTIME_CHECKS + +#define __KAPI_SYSCALL_VALIDATE_RETURN(name, retval) \ + do { \ + const struct kernel_api_spec *__spec =3D kapi_get_spec("sys_" #name); \ + if (__spec) { \ + kapi_validate_syscall_return(__spec, (s64)(retval)); \ + } \ + } while (0) + +/* Wrapper to validate both params and return value */ +#define __SYSCALL_DEFINE_SPEC(name, spec_args, ...) \ + DEFINE_KERNEL_API_SPEC(sys_##name) \ + .name =3D "sys_" #name, \ + spec_args \ + KAPI_END_SPEC; \ + static long __kapi_sys_##name(__MAP((__SYSCALL_ARG_COUNT(__VA_ARGS__)), _= _SC_DECL, __VA_ARGS__)); \ + SYSCALL_DEFINE##__SYSCALL_ARG_COUNT(__VA_ARGS__)(name, __VA_ARGS__) \ + { \ + long __ret; \ + __KAPI_SYSCALL_VALIDATE_##__SYSCALL_ARG_COUNT(__VA_ARGS__)(name, __VA_AR= GS__); \ + __ret =3D __kapi_sys_##name(__MAP((__SYSCALL_ARG_COUNT(__VA_ARGS__)), __= SC_CAST, __VA_ARGS__)); \ + __KAPI_SYSCALL_VALIDATE_RETURN(name, __ret); \ + return __ret; \ + } \ + static long __kapi_sys_##name(__MAP((__SYSCALL_ARG_COUNT(__VA_ARGS__)), _= _SC_DECL, __VA_ARGS__)) + +#else /* !CONFIG_KAPI_RUNTIME_CHECKS */ + +#define __SYSCALL_DEFINE_SPEC(name, spec_args, ...) \ + DEFINE_KERNEL_API_SPEC(sys_##name) \ + .name =3D "sys_" #name, \ + spec_args \ + KAPI_END_SPEC; \ + SYSCALL_DEFINE##__SYSCALL_ARG_COUNT(__VA_ARGS__)(name, __VA_ARGS__) + +#endif /* CONFIG_KAPI_RUNTIME_CHECKS */ + + +/* Convenience macros for different parameter counts */ +#define SYSCALL_DEFINE_SPEC0(name, spec_args) \ + DEFINE_KERNEL_API_SPEC(sys_##name) \ + .name =3D "sys_" #name, \ + .param_count =3D 0, \ + spec_args \ + KAPI_END_SPEC; \ + SYSCALL_DEFINE0(name) + +#define SYSCALL_DEFINE_SPEC1(name, spec_args, t1, a1) \ + __SYSCALL_DEFINE_SPEC(name, spec_args, t1, a1) + +#define SYSCALL_DEFINE_SPEC2(name, spec_args, t1, a1, t2, a2) \ + __SYSCALL_DEFINE_SPEC(name, spec_args, t1, a1, t2, a2) + +#define SYSCALL_DEFINE_SPEC3(name, spec_args, t1, a1, t2, a2, t3, a3) \ + __SYSCALL_DEFINE_SPEC(name, spec_args, t1, a1, t2, a2, t3, a3) + +#define SYSCALL_DEFINE_SPEC4(name, spec_args, t1, a1, t2, a2, t3, a3, \ + t4, a4) \ + __SYSCALL_DEFINE_SPEC(name, spec_args, t1, a1, t2, a2, t3, a3, t4, a4) + +#define SYSCALL_DEFINE_SPEC5(name, spec_args, t1, a1, t2, a2, t3, a3, \ + t4, a4, t5, a5) \ + __SYSCALL_DEFINE_SPEC(name, spec_args, t1, a1, t2, a2, t3, a3, \ + t4, a4, t5, a5) + +#define SYSCALL_DEFINE_SPEC6(name, spec_args, t1, a1, t2, a2, t3, a3, \ + t4, a4, t5, a5, t6, a6) \ + __SYSCALL_DEFINE_SPEC(name, spec_args, t1, a1, t2, a2, t3, a3, \ + t4, a4, t5, a5, t6, a6) + +/* + * Helper macros for common syscall patterns + */ + +/* For syscalls that can sleep */ +#define KAPI_SYSCALL_SLEEPABLE \ + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + +/* For syscalls that must be atomic */ +#define KAPI_SYSCALL_ATOMIC \ + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_ATOMIC) + +/* Common parameter specifications */ +#define KAPI_PARAM_FD(idx, desc) \ + KAPI_PARAM(idx, "fd", "int", desc) \ + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) \ + .type =3D KAPI_TYPE_FD, \ + .constraint_type =3D KAPI_CONSTRAINT_NONE, \ + KAPI_PARAM_END + +#define KAPI_PARAM_USER_BUF(idx, name, desc) \ + KAPI_PARAM(idx, name, "void __user *", desc) \ + KAPI_PARAM_FLAGS(KAPI_PARAM_USER_PTR | KAPI_PARAM_IN) \ + KAPI_PARAM_END + +#define KAPI_PARAM_USER_STRUCT(idx, name, struct_type, desc) \ + KAPI_PARAM(idx, name, #struct_type " __user *", desc) \ + KAPI_PARAM_FLAGS(KAPI_PARAM_USER | KAPI_PARAM_IN) \ + .type =3D KAPI_TYPE_USER_PTR, \ + .size =3D sizeof(struct_type), \ + .constraint_type =3D KAPI_CONSTRAINT_NONE, \ + KAPI_PARAM_END + +#define KAPI_PARAM_SIZE_T(idx, name, desc) \ + KAPI_PARAM(idx, name, "size_t", desc) \ + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) \ + KAPI_PARAM_RANGE(0, SIZE_MAX) \ + KAPI_PARAM_END + +/* Common error specifications */ +#define KAPI_ERROR_EBADF(idx) \ + KAPI_ERROR(idx, -EBADF, "EBADF", "Invalid file descriptor", \ + "The file descriptor is not valid or has been closed") + +#define KAPI_ERROR_EINVAL(idx, condition) \ + KAPI_ERROR(idx, -EINVAL, "EINVAL", condition, \ + "Invalid argument provided") + +#define KAPI_ERROR_ENOMEM(idx) \ + KAPI_ERROR(idx, -ENOMEM, "ENOMEM", "Insufficient memory", \ + "Cannot allocate memory for the operation") + +#define KAPI_ERROR_EPERM(idx) \ + KAPI_ERROR(idx, -EPERM, "EPERM", "Operation not permitted", \ + "The calling process does not have the required permissions") + +#define KAPI_ERROR_EFAULT(idx) \ + KAPI_ERROR(idx, -EFAULT, "EFAULT", "Bad address", \ + "Invalid user space address provided") + +/* Standard return value specifications */ +#define KAPI_RETURN_SUCCESS_ZERO \ + KAPI_RETURN("long", "0 on success, negative error code on failure") \ + KAPI_RETURN_SUCCESS(0, "=3D=3D 0") \ + KAPI_RETURN_END + +#define KAPI_RETURN_FD_SPEC \ + KAPI_RETURN("long", "File descriptor on success, negative error code on f= ailure") \ + .check_type =3D KAPI_RETURN_FD, \ + KAPI_RETURN_END + +#define KAPI_RETURN_COUNT \ + KAPI_RETURN("long", "Number of bytes processed on success, negative error= code on failure") \ + KAPI_RETURN_SUCCESS(0, ">=3D 0") \ + KAPI_RETURN_END + + +/* + * Compat syscall support + */ +#ifdef CONFIG_COMPAT + +#define COMPAT_SYSCALL_DEFINE_SPEC0(name, spec_args) \ + DEFINE_KERNEL_API_SPEC(compat_sys_##name) \ + .name =3D "compat_sys_" #name, \ + .param_count =3D 0, \ + spec_args \ + KAPI_END_SPEC; \ + COMPAT_SYSCALL_DEFINE0(name) + +#define COMPAT_SYSCALL_DEFINE_SPEC1(name, spec_args, t1, a1) \ + DEFINE_KERNEL_API_SPEC(compat_sys_##name) \ + .name =3D "compat_sys_" #name, \ + .param_count =3D 1, \ + spec_args \ + KAPI_END_SPEC; \ + COMPAT_SYSCALL_DEFINE1(name, t1, a1) + +#define COMPAT_SYSCALL_DEFINE_SPEC2(name, spec_args, t1, a1, t2, a2) \ + DEFINE_KERNEL_API_SPEC(compat_sys_##name) \ + .name =3D "compat_sys_" #name, \ + .param_count =3D 2, \ + spec_args \ + KAPI_END_SPEC; \ + COMPAT_SYSCALL_DEFINE2(name, t1, a1, t2, a2) + +#define COMPAT_SYSCALL_DEFINE_SPEC3(name, spec_args, t1, a1, t2, a2, t3, a= 3) \ + DEFINE_KERNEL_API_SPEC(compat_sys_##name) \ + .name =3D "compat_sys_" #name, \ + .param_count =3D 3, \ + spec_args \ + KAPI_END_SPEC; \ + COMPAT_SYSCALL_DEFINE3(name, t1, a1, t2, a2, t3, a3) + +#define COMPAT_SYSCALL_DEFINE_SPEC4(name, spec_args, t1, a1, t2, a2, t3, a= 3, \ + t4, a4) \ + DEFINE_KERNEL_API_SPEC(compat_sys_##name) \ + .name =3D "compat_sys_" #name, \ + .param_count =3D 4, \ + spec_args \ + KAPI_END_SPEC; \ + COMPAT_SYSCALL_DEFINE4(name, t1, a1, t2, a2, t3, a3, t4, a4) + +#define COMPAT_SYSCALL_DEFINE_SPEC5(name, spec_args, t1, a1, t2, a2, t3, a= 3, \ + t4, a4, t5, a5) \ + DEFINE_KERNEL_API_SPEC(compat_sys_##name) \ + .name =3D "compat_sys_" #name, \ + .param_count =3D 5, \ + spec_args \ + KAPI_END_SPEC; \ + COMPAT_SYSCALL_DEFINE5(name, t1, a1, t2, a2, t3, a3, t4, a4, t5, a5) + +#define COMPAT_SYSCALL_DEFINE_SPEC6(name, spec_args, t1, a1, t2, a2, t3, a= 3, \ + t4, a4, t5, a5, t6, a6) \ + DEFINE_KERNEL_API_SPEC(compat_sys_##name) \ + .name =3D "compat_sys_" #name, \ + .param_count =3D 6, \ + spec_args \ + KAPI_END_SPEC; \ + COMPAT_SYSCALL_DEFINE6(name, t1, a1, t2, a2, t3, a3, t4, a4, t5, a5, t6, = a6) + +#endif /* CONFIG_COMPAT */ + +#endif /* _LINUX_SYSCALL_API_SPEC_H */ \ No newline at end of file diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e5603cc91963d..f2951ece2068b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -87,6 +87,7 @@ struct xattr_args; #include #include #include +#include #include #include #include diff --git a/init/Kconfig b/init/Kconfig index af4c2f0854554..7a15248933895 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2079,6 +2079,8 @@ config TRACEPOINTS =20 source "kernel/Kconfig.kexec" =20 +source "kernel/api/Kconfig" + endmenu # General setup =20 source "arch/Kconfig" diff --git a/kernel/Makefile b/kernel/Makefile index 32e80dd626af0..ba94ee4bb2292 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -56,6 +56,7 @@ obj-y +=3D livepatch/ obj-y +=3D dma/ obj-y +=3D entry/ obj-$(CONFIG_MODULES) +=3D module/ +obj-$(CONFIG_KAPI_SPEC) +=3D api/ =20 obj-$(CONFIG_KCMP) +=3D kcmp.o obj-$(CONFIG_FREEZER) +=3D freezer.o diff --git a/kernel/api/Kconfig b/kernel/api/Kconfig new file mode 100644 index 0000000000000..fde25ec70e134 --- /dev/null +++ b/kernel/api/Kconfig @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Kernel API Specification Framework Configuration +# + +config KAPI_SPEC + bool "Kernel API Specification Framework" + help + This option enables the kernel API specification framework, + which provides formal documentation of kernel APIs in both + human and machine-readable formats. + + The framework allows developers to document APIs inline with + their implementation, including parameter specifications, + return values, error conditions, locking requirements, and + execution context constraints. + + When enabled, API specifications can be queried at runtime + and exported in various formats (JSON, XML) through debugfs. + + If unsure, say N. + +config KAPI_RUNTIME_CHECKS + bool "Runtime API specification checks" + depends on KAPI_SPEC + depends on DEBUG_KERNEL + help + Enable runtime validation of API usage against specifications. + This includes checking execution context requirements, parameter + validation, and lock state verification. + + This adds overhead and should only be used for debugging and + development. The checks use WARN_ONCE to report violations. + + If unsure, say N. diff --git a/kernel/api/Makefile b/kernel/api/Makefile new file mode 100644 index 0000000000000..4120ded7e5cf1 --- /dev/null +++ b/kernel/api/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Kernel API Specification Framework +# + +# Core API specification framework +obj-$(CONFIG_KAPI_SPEC) +=3D kernel_api_spec.o \ No newline at end of file diff --git a/kernel/api/kernel_api_spec.c b/kernel/api/kernel_api_spec.c new file mode 100644 index 0000000000000..29c0c84d87f7c --- /dev/null +++ b/kernel/api/kernel_api_spec.c @@ -0,0 +1,1169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * kernel_api_spec.c - Kernel API Specification Framework Implementation + * + * Provides runtime support for kernel API specifications including valida= tion, + * export to various formats, and querying capabilities. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Section where API specifications are stored */ +extern struct kernel_api_spec __start_kapi_specs[]; +extern struct kernel_api_spec __stop_kapi_specs[]; + +/* Dynamic API registration */ +static LIST_HEAD(dynamic_api_specs); +static DEFINE_MUTEX(api_spec_mutex); + +struct dynamic_api_spec { + struct list_head list; + struct kernel_api_spec *spec; +}; + +/** + * kapi_get_spec - Get API specification by name + * @name: Function name to look up + * + * Return: Pointer to API specification or NULL if not found + */ +const struct kernel_api_spec *kapi_get_spec(const char *name) +{ + struct kernel_api_spec *spec; + struct dynamic_api_spec *dyn_spec; + + /* Search static specifications */ + for (spec =3D __start_kapi_specs; spec < __stop_kapi_specs; spec++) { + if (strcmp(spec->name, name) =3D=3D 0) + return spec; + } + + /* Search dynamic specifications */ + mutex_lock(&api_spec_mutex); + list_for_each_entry(dyn_spec, &dynamic_api_specs, list) { + if (strcmp(dyn_spec->spec->name, name) =3D=3D 0) { + mutex_unlock(&api_spec_mutex); + return dyn_spec->spec; + } + } + mutex_unlock(&api_spec_mutex); + + return NULL; +} +EXPORT_SYMBOL_GPL(kapi_get_spec); + +/** + * kapi_register_spec - Register a dynamic API specification + * @spec: API specification to register + * + * Return: 0 on success, negative error code on failure + */ +int kapi_register_spec(struct kernel_api_spec *spec) +{ + struct dynamic_api_spec *dyn_spec; + + if (!spec || !spec->name[0]) + return -EINVAL; + + /* Check if already exists */ + if (kapi_get_spec(spec->name)) + return -EEXIST; + + dyn_spec =3D kzalloc(sizeof(*dyn_spec), GFP_KERNEL); + if (!dyn_spec) + return -ENOMEM; + + dyn_spec->spec =3D spec; + + mutex_lock(&api_spec_mutex); + list_add_tail(&dyn_spec->list, &dynamic_api_specs); + mutex_unlock(&api_spec_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(kapi_register_spec); + +/** + * kapi_unregister_spec - Unregister a dynamic API specification + * @name: Name of API to unregister + */ +void kapi_unregister_spec(const char *name) +{ + struct dynamic_api_spec *dyn_spec, *tmp; + + mutex_lock(&api_spec_mutex); + list_for_each_entry_safe(dyn_spec, tmp, &dynamic_api_specs, list) { + if (strcmp(dyn_spec->spec->name, name) =3D=3D 0) { + list_del(&dyn_spec->list); + kfree(dyn_spec); + break; + } + } + mutex_unlock(&api_spec_mutex); +} +EXPORT_SYMBOL_GPL(kapi_unregister_spec); + +/** + * param_type_to_string - Convert parameter type to string + * @type: Parameter type + * + * Return: String representation of type + */ +static const char *param_type_to_string(enum kapi_param_type type) +{ + static const char * const type_names[] =3D { + [KAPI_TYPE_VOID] =3D "void", + [KAPI_TYPE_INT] =3D "int", + [KAPI_TYPE_UINT] =3D "uint", + [KAPI_TYPE_PTR] =3D "pointer", + [KAPI_TYPE_STRUCT] =3D "struct", + [KAPI_TYPE_UNION] =3D "union", + [KAPI_TYPE_ENUM] =3D "enum", + [KAPI_TYPE_FUNC_PTR] =3D "function_pointer", + [KAPI_TYPE_ARRAY] =3D "array", + [KAPI_TYPE_FD] =3D "file_descriptor", + [KAPI_TYPE_USER_PTR] =3D "user_pointer", + [KAPI_TYPE_PATH] =3D "pathname", + [KAPI_TYPE_CUSTOM] =3D "custom", + }; + + if (type >=3D ARRAY_SIZE(type_names)) + return "unknown"; + + return type_names[type]; +} + +/** + * lock_type_to_string - Convert lock type to string + * @type: Lock type + * + * Return: String representation of lock type + */ +static const char *lock_type_to_string(enum kapi_lock_type type) +{ + static const char * const lock_names[] =3D { + [KAPI_LOCK_NONE] =3D "none", + [KAPI_LOCK_MUTEX] =3D "mutex", + [KAPI_LOCK_SPINLOCK] =3D "spinlock", + [KAPI_LOCK_RWLOCK] =3D "rwlock", + [KAPI_LOCK_SEQLOCK] =3D "seqlock", + [KAPI_LOCK_RCU] =3D "rcu", + [KAPI_LOCK_SEMAPHORE] =3D "semaphore", + [KAPI_LOCK_CUSTOM] =3D "custom", + }; + + if (type >=3D ARRAY_SIZE(lock_names)) + return "unknown"; + + return lock_names[type]; +} + +/** + * return_check_type_to_string - Convert return check type to string + * @type: Return check type + * + * Return: String representation of return check type + */ +static const char *return_check_type_to_string(enum kapi_return_check_type= type) +{ + static const char * const check_names[] =3D { + [KAPI_RETURN_EXACT] =3D "exact", + [KAPI_RETURN_RANGE] =3D "range", + [KAPI_RETURN_ERROR_CHECK] =3D "error_check", + [KAPI_RETURN_FD] =3D "file_descriptor", + [KAPI_RETURN_CUSTOM] =3D "custom", + }; + + if (type >=3D ARRAY_SIZE(check_names)) + return "unknown"; + + return check_names[type]; +} + +/** + * kapi_export_json - Export API specification to JSON format + * @spec: API specification to export + * @buf: Buffer to write JSON to + * @size: Size of buffer + * + * Return: Number of bytes written or negative error + */ +int kapi_export_json(const struct kernel_api_spec *spec, char *buf, size_t= size) +{ + int ret =3D 0; + int i; + + if (!spec || !buf || size =3D=3D 0) + return -EINVAL; + + ret =3D scnprintf(buf, size, + "{\n" + " \"name\": \"%s\",\n" + " \"version\": %u,\n" + " \"description\": \"%s\",\n" + " \"long_description\": \"%s\",\n" + " \"context_flags\": \"0x%x\",\n", + spec->name, + spec->version, + spec->description, + spec->long_description, + spec->context_flags); + + /* Parameters */ + ret +=3D scnprintf(buf + ret, size - ret, + " \"parameters\": [\n"); + + for (i =3D 0; i < spec->param_count && i < KAPI_MAX_PARAMS; i++) { + const struct kapi_param_spec *param =3D &spec->params[i]; + + ret +=3D scnprintf(buf + ret, size - ret, + " {\n" + " \"name\": \"%s\",\n" + " \"type\": \"%s\",\n" + " \"type_class\": \"%s\",\n" + " \"flags\": \"0x%x\",\n" + " \"description\": \"%s\"\n" + " }%s\n", + param->name, + param->type_name, + param_type_to_string(param->type), + param->flags, + param->description, + (i < spec->param_count - 1) ? "," : ""); + } + + ret +=3D scnprintf(buf + ret, size - ret, " ],\n"); + + /* Return value */ + ret +=3D scnprintf(buf + ret, size - ret, + " \"return\": {\n" + " \"type\": \"%s\",\n" + " \"type_class\": \"%s\",\n" + " \"check_type\": \"%s\",\n", + spec->return_spec.type_name, + param_type_to_string(spec->return_spec.type), + return_check_type_to_string(spec->return_spec.check_type)); + + switch (spec->return_spec.check_type) { + case KAPI_RETURN_EXACT: + ret +=3D scnprintf(buf + ret, size - ret, + " \"success_value\": %lld,\n", + spec->return_spec.success_value); + break; + case KAPI_RETURN_RANGE: + ret +=3D scnprintf(buf + ret, size - ret, + " \"success_min\": %lld,\n" + " \"success_max\": %lld,\n", + spec->return_spec.success_min, + spec->return_spec.success_max); + break; + case KAPI_RETURN_ERROR_CHECK: + ret +=3D scnprintf(buf + ret, size - ret, + " \"error_count\": %u,\n", + spec->return_spec.error_count); + break; + default: + break; + } + + ret +=3D scnprintf(buf + ret, size - ret, + " \"description\": \"%s\"\n" + " },\n", + spec->return_spec.description); + + /* Errors */ + ret +=3D scnprintf(buf + ret, size - ret, + " \"errors\": [\n"); + + for (i =3D 0; i < spec->error_count && i < KAPI_MAX_ERRORS; i++) { + const struct kapi_error_spec *error =3D &spec->errors[i]; + + ret +=3D scnprintf(buf + ret, size - ret, + " {\n" + " \"code\": %d,\n" + " \"name\": \"%s\",\n" + " \"condition\": \"%s\",\n" + " \"description\": \"%s\"\n" + " }%s\n", + error->error_code, + error->name, + error->condition, + error->description, + (i < spec->error_count - 1) ? "," : ""); + } + + ret +=3D scnprintf(buf + ret, size - ret, " ],\n"); + + /* Locks */ + ret +=3D scnprintf(buf + ret, size - ret, + " \"locks\": [\n"); + + for (i =3D 0; i < spec->lock_count && i < KAPI_MAX_CONSTRAINTS; i++) { + const struct kapi_lock_spec *lock =3D &spec->locks[i]; + + ret +=3D scnprintf(buf + ret, size - ret, + " {\n" + " \"name\": \"%s\",\n" + " \"type\": \"%s\",\n" + " \"acquired\": %s,\n" + " \"released\": %s,\n" + " \"held_on_entry\": %s,\n" + " \"held_on_exit\": %s,\n" + " \"description\": \"%s\"\n" + " }%s\n", + lock->lock_name, + lock_type_to_string(lock->lock_type), + lock->acquired ? "true" : "false", + lock->released ? "true" : "false", + lock->held_on_entry ? "true" : "false", + lock->held_on_exit ? "true" : "false", + lock->description, + (i < spec->lock_count - 1) ? "," : ""); + } + + ret +=3D scnprintf(buf + ret, size - ret, " ],\n"); + + /* Additional info */ + ret +=3D scnprintf(buf + ret, size - ret, + " \"since_version\": \"%s\",\n" + " \"deprecated\": %s,\n" + " \"replacement\": \"%s\",\n" + " \"examples\": \"%s\",\n" + " \"notes\": \"%s\"\n" + "}\n", + spec->since_version, + spec->deprecated ? "true" : "false", + spec->replacement, + spec->examples, + spec->notes); + + return ret; +} +EXPORT_SYMBOL_GPL(kapi_export_json); + +/** + * kapi_export_xml - Export API specification to XML format + * @spec: API specification to export + * @buf: Buffer to write XML to + * @size: Size of buffer + * + * Return: Number of bytes written or negative error + */ +int kapi_export_xml(const struct kernel_api_spec *spec, char *buf, size_t = size) +{ + int ret =3D 0; + int i; + + if (!spec || !buf || size =3D=3D 0) + return -EINVAL; + + ret =3D scnprintf(buf, size, + "\n" + "\n" + " %s\n" + " %u\n" + " %s\n" + " \n" + " 0x%x\n", + spec->name, + spec->version, + spec->description, + spec->long_description, + spec->context_flags); + + /* Parameters */ + ret +=3D scnprintf(buf + ret, size - ret, " \n"); + + for (i =3D 0; i < spec->param_count && i < KAPI_MAX_PARAMS; i++) { + const struct kapi_param_spec *param =3D &spec->params[i]; + + ret +=3D scnprintf(buf + ret, size - ret, + " \n" + " %s\n" + " %s\n" + " %s\n" + " 0x%x\n" + " \n" + " \n", + param->name, + param->type_name, + param_type_to_string(param->type), + param->flags, + param->description); + } + + ret +=3D scnprintf(buf + ret, size - ret, " \n"); + + /* Return value */ + ret +=3D scnprintf(buf + ret, size - ret, + " \n" + " %s\n" + " %s\n" + " %s\n", + spec->return_spec.type_name, + param_type_to_string(spec->return_spec.type), + return_check_type_to_string(spec->return_spec.check_type)); + + switch (spec->return_spec.check_type) { + case KAPI_RETURN_EXACT: + ret +=3D scnprintf(buf + ret, size - ret, + " %lld\n", + spec->return_spec.success_value); + break; + case KAPI_RETURN_RANGE: + ret +=3D scnprintf(buf + ret, size - ret, + " %lld\n" + " %lld\n", + spec->return_spec.success_min, + spec->return_spec.success_max); + break; + case KAPI_RETURN_ERROR_CHECK: + ret +=3D scnprintf(buf + ret, size - ret, + " %u\n", + spec->return_spec.error_count); + break; + default: + break; + } + + ret +=3D scnprintf(buf + ret, size - ret, + " \n" + " \n", + spec->return_spec.description); + + /* Errors */ + ret +=3D scnprintf(buf + ret, size - ret, " \n"); + + for (i =3D 0; i < spec->error_count && i < KAPI_MAX_ERRORS; i++) { + const struct kapi_error_spec *error =3D &spec->errors[i]; + + ret +=3D scnprintf(buf + ret, size - ret, + " \n" + " %d\n" + " %s\n" + " \n" + " \n" + " \n", + error->error_code, + error->name, + error->condition, + error->description); + } + + ret +=3D scnprintf(buf + ret, size - ret, " \n"); + + /* Additional info */ + ret +=3D scnprintf(buf + ret, size - ret, + " %s\n" + " %s\n" + " %s\n" + " \n" + " \n" + "\n", + spec->since_version, + spec->deprecated ? "true" : "false", + spec->replacement, + spec->examples, + spec->notes); + + return ret; +} +EXPORT_SYMBOL_GPL(kapi_export_xml); + +/** + * kapi_print_spec - Print API specification to kernel log + * @spec: API specification to print + */ +void kapi_print_spec(const struct kernel_api_spec *spec) +{ + int i; + + if (!spec) + return; + + pr_info("=3D=3D=3D Kernel API Specification =3D=3D=3D\n"); + pr_info("Name: %s\n", spec->name); + pr_info("Version: %u\n", spec->version); + pr_info("Description: %s\n", spec->description); + + if (spec->long_description[0]) + pr_info("Long Description: %s\n", spec->long_description); + + pr_info("Context Flags: 0x%x\n", spec->context_flags); + + /* Parameters */ + if (spec->param_count > 0) { + pr_info("Parameters:\n"); + for (i =3D 0; i < spec->param_count && i < KAPI_MAX_PARAMS; i++) { + const struct kapi_param_spec *param =3D &spec->params[i]; + pr_info(" [%d] %s: %s (flags: 0x%x)\n", + i, param->name, param->type_name, param->flags); + if (param->description[0]) + pr_info(" Description: %s\n", param->description); + } + } + + /* Return value */ + pr_info("Return: %s\n", spec->return_spec.type_name); + if (spec->return_spec.description[0]) + pr_info(" Description: %s\n", spec->return_spec.description); + + /* Errors */ + if (spec->error_count > 0) { + pr_info("Possible Errors:\n"); + for (i =3D 0; i < spec->error_count && i < KAPI_MAX_ERRORS; i++) { + const struct kapi_error_spec *error =3D &spec->errors[i]; + pr_info(" %s (%d): %s\n", + error->name, error->error_code, error->condition); + } + } + + pr_info("=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D\n"); +} +EXPORT_SYMBOL_GPL(kapi_print_spec); + +#ifdef CONFIG_KAPI_RUNTIME_CHECKS + +/** + * kapi_validate_fd - Validate that a file descriptor is valid in current = context + * @fd: File descriptor to validate + * + * Return: true if fd is valid in current process context, false otherwise + */ +static bool kapi_validate_fd(int fd) +{ + struct fd f; + + /* Special case: AT_FDCWD is always valid */ + if (fd =3D=3D AT_FDCWD) + return true; + + /* Check basic range */ + if (fd < 0) + return false; + + /* Check if fd is valid in current process context */ + f =3D fdget(fd); + if (fd_empty(f)) { + return false; + } + + /* fd is valid, release reference */ + fdput(f); + return true; +} + +/** + * kapi_validate_user_ptr - Validate that a user pointer is accessible + * @ptr: User pointer to validate + * @size: Size in bytes to validate + * @write: Whether write access is required + * + * Return: true if user memory is accessible, false otherwise + */ +static bool kapi_validate_user_ptr(const void __user *ptr, size_t size, bo= ol write) +{ + /* NULL is valid if parameter is marked optional */ + if (!ptr) + return false; + + /* Check if the user memory region is accessible */ + if (write) { + return access_ok(ptr, size); + } else { + return access_ok(ptr, size); + } +} + +/** + * kapi_validate_user_ptr_with_params - Validate user pointer with dynamic= size + * @param_spec: Parameter specification + * @ptr: User pointer to validate + * @all_params: Array of all parameter values + * @param_count: Number of parameters + * + * Return: true if user memory is accessible, false otherwise + */ +static bool kapi_validate_user_ptr_with_params(const struct kapi_param_spe= c *param_spec, + const void __user *ptr, + const s64 *all_params, + int param_count) +{ + size_t actual_size; + bool write; + + /* NULL is allowed for optional parameters */ + if (!ptr && (param_spec->flags & KAPI_PARAM_OPTIONAL)) + return true; + + /* Calculate actual size based on related parameter */ + if (param_spec->size_param_idx >=3D 0 && + param_spec->size_param_idx < param_count) { + s64 count =3D all_params[param_spec->size_param_idx]; + + /* Validate count is positive */ + if (count <=3D 0) { + pr_warn("Parameter %s: size determinant is non-positive (%lld)\n", + param_spec->name, count); + return false; + } + + /* Check for multiplication overflow */ + if (param_spec->size_multiplier > 0 && + count > SIZE_MAX / param_spec->size_multiplier) { + pr_warn("Parameter %s: size calculation overflow\n", + param_spec->name); + return false; + } + + actual_size =3D count * param_spec->size_multiplier; + } else { + /* Use fixed size */ + actual_size =3D param_spec->size; + } + + write =3D (param_spec->flags & KAPI_PARAM_OUT) || + (param_spec->flags & KAPI_PARAM_INOUT); + + return kapi_validate_user_ptr(ptr, actual_size, write); +} + +/** + * kapi_validate_path - Validate that a pathname is accessible and within = limits + * @path: User pointer to pathname + * @param_spec: Parameter specification + * + * Return: true if path is valid, false otherwise + */ +static bool kapi_validate_path(const char __user *path, + const struct kapi_param_spec *param_spec) +{ + size_t len; + + /* NULL is allowed for optional parameters */ + if (!path && (param_spec->flags & KAPI_PARAM_OPTIONAL)) + return true; + + if (!path) { + pr_warn("Parameter %s: NULL path not allowed\n", param_spec->name); + return false; + } + + /* Check if the path is accessible */ + if (!access_ok(path, 1)) { + pr_warn("Parameter %s: path pointer %p not accessible\n", + param_spec->name, path); + return false; + } + + /* Use strnlen_user to get the length and validate accessibility */ + len =3D strnlen_user(path, PATH_MAX + 1); + if (len =3D=3D 0) { + pr_warn("Parameter %s: invalid path pointer %p\n", + param_spec->name, path); + return false; + } + + /* Check path length limit */ + if (len > PATH_MAX) { + pr_warn("Parameter %s: path too long (exceeds PATH_MAX)\n", + param_spec->name); + return false; + } + + return true; +} + +/** + * kapi_validate_param - Validate a parameter against its specification + * @param_spec: Parameter specification + * @value: Parameter value to validate + * + * Return: true if valid, false otherwise + */ +bool kapi_validate_param(const struct kapi_param_spec *param_spec, s64 val= ue) +{ + int i; + + /* Special handling for file descriptor type */ + if (param_spec->type =3D=3D KAPI_TYPE_FD) { + if (!kapi_validate_fd((int)value)) { + pr_warn("Parameter %s: invalid file descriptor %lld\n", + param_spec->name, value); + return false; + } + /* Continue with additional constraint checks if needed */ + } + + /* Special handling for user pointer type */ + if (param_spec->type =3D=3D KAPI_TYPE_USER_PTR) { + const void __user *ptr =3D (const void __user *)value; + bool write =3D (param_spec->flags & KAPI_PARAM_OUT) || + (param_spec->flags & KAPI_PARAM_INOUT); + + /* NULL is allowed for optional parameters */ + if (!ptr && (param_spec->flags & KAPI_PARAM_OPTIONAL)) + return true; + + if (!kapi_validate_user_ptr(ptr, param_spec->size, write)) { + pr_warn("Parameter %s: invalid user pointer %p (size: %zu, %s)\n", + param_spec->name, ptr, param_spec->size, + write ? "write" : "read"); + return false; + } + /* Continue with additional constraint checks if needed */ + } + + /* Special handling for path type */ + if (param_spec->type =3D=3D KAPI_TYPE_PATH) { + const char __user *path =3D (const char __user *)value; + + if (!kapi_validate_path(path, param_spec)) { + return false; + } + /* Continue with additional constraint checks if needed */ + } + + switch (param_spec->constraint_type) { + case KAPI_CONSTRAINT_NONE: + return true; + + case KAPI_CONSTRAINT_RANGE: + if (value < param_spec->min_value || value > param_spec->max_value) { + pr_warn("Parameter %s value %lld out of range [%lld, %lld]\n", + param_spec->name, value, + param_spec->min_value, param_spec->max_value); + return false; + } + return true; + + case KAPI_CONSTRAINT_MASK: + if (value & ~param_spec->valid_mask) { + pr_warn("Parameter %s value 0x%llx contains invalid bits (valid mask: 0= x%llx)\n", + param_spec->name, value, param_spec->valid_mask); + return false; + } + return true; + + case KAPI_CONSTRAINT_ENUM: + if (!param_spec->enum_values || param_spec->enum_count =3D=3D 0) + return true; + + for (i =3D 0; i < param_spec->enum_count; i++) { + if (value =3D=3D param_spec->enum_values[i]) + return true; + } + pr_warn("Parameter %s value %lld not in valid enumeration\n", + param_spec->name, value); + return false; + + case KAPI_CONSTRAINT_CUSTOM: + if (param_spec->validate) + return param_spec->validate(value); + return true; + + default: + return true; + } +} +EXPORT_SYMBOL_GPL(kapi_validate_param); + +/** + * kapi_validate_param_with_context - Validate parameter with access to al= l params + * @param_spec: Parameter specification + * @value: Parameter value to validate + * @all_params: Array of all parameter values + * @param_count: Number of parameters + * + * Return: true if valid, false otherwise + */ +bool kapi_validate_param_with_context(const struct kapi_param_spec *param_= spec, + s64 value, const s64 *all_params, int param_count) +{ + /* Special handling for user pointer type with dynamic sizing */ + if (param_spec->type =3D=3D KAPI_TYPE_USER_PTR) { + const void __user *ptr =3D (const void __user *)value; + + /* NULL is allowed for optional parameters */ + if (!ptr && (param_spec->flags & KAPI_PARAM_OPTIONAL)) + return true; + + if (!kapi_validate_user_ptr_with_params(param_spec, ptr, all_params, par= am_count)) { + pr_warn("Parameter %s: invalid user pointer %p\n", + param_spec->name, ptr); + return false; + } + /* Continue with additional constraint checks if needed */ + } + + /* For other types, fall back to regular validation */ + return kapi_validate_param(param_spec, value); +} +EXPORT_SYMBOL_GPL(kapi_validate_param_with_context); + +/** + * kapi_validate_syscall_param - Validate syscall parameter with enforceme= nt + * @spec: API specification + * @param_idx: Parameter index + * @value: Parameter value + * + * Return: -EINVAL if invalid, 0 if valid + */ +int kapi_validate_syscall_param(const struct kernel_api_spec *spec, + int param_idx, s64 value) +{ + const struct kapi_param_spec *param_spec; + + if (!spec || param_idx >=3D spec->param_count) + return 0; + + param_spec =3D &spec->params[param_idx]; + + if (!kapi_validate_param(param_spec, value)) { + if (strncmp(spec->name, "sys_", 4) =3D=3D 0) { + /* For syscalls, we can return EINVAL to userspace */ + return -EINVAL; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(kapi_validate_syscall_param); + +/** + * kapi_validate_syscall_params - Validate all syscall parameters together + * @spec: API specification + * @params: Array of parameter values + * @param_count: Number of parameters + * + * Return: -EINVAL if any parameter is invalid, 0 if all valid + */ +int kapi_validate_syscall_params(const struct kernel_api_spec *spec, + const s64 *params, int param_count) +{ + int i; + + if (!spec || !params) + return 0; + + /* Validate that we have the expected number of parameters */ + if (param_count !=3D spec->param_count) { + pr_warn("API %s: parameter count mismatch (expected %u, got %d)\n", + spec->name, spec->param_count, param_count); + return -EINVAL; + } + + /* Validate each parameter with context */ + for (i =3D 0; i < spec->param_count && i < KAPI_MAX_PARAMS; i++) { + const struct kapi_param_spec *param_spec =3D &spec->params[i]; + + if (!kapi_validate_param_with_context(param_spec, params[i], params, par= am_count)) { + if (strncmp(spec->name, "sys_", 4) =3D=3D 0) { + /* For syscalls, we can return EINVAL to userspace */ + return -EINVAL; + } + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(kapi_validate_syscall_params); + +/** + * kapi_check_return_success - Check if return value indicates success + * @return_spec: Return specification + * @retval: Return value to check + * + * Returns true if the return value indicates success according to the spe= c. + */ +bool kapi_check_return_success(const struct kapi_return_spec *return_spec,= s64 retval) +{ + u32 i; + + if (!return_spec) + return true; /* No spec means we can't validate */ + + switch (return_spec->check_type) { + case KAPI_RETURN_EXACT: + return retval =3D=3D return_spec->success_value; + + case KAPI_RETURN_RANGE: + return retval >=3D return_spec->success_min && + retval <=3D return_spec->success_max; + + case KAPI_RETURN_ERROR_CHECK: + /* Success if NOT in error list */ + if (return_spec->error_values) { + for (i =3D 0; i < return_spec->error_count; i++) { + if (retval =3D=3D return_spec->error_values[i]) + return false; /* Found in error list */ + } + } + return true; /* Not in error list =3D success */ + + case KAPI_RETURN_FD: + /* File descriptors: >=3D 0 is success, < 0 is error */ + return retval >=3D 0; + + case KAPI_RETURN_CUSTOM: + if (return_spec->is_success) + return return_spec->is_success(retval); + fallthrough; + + default: + return true; /* Unknown check type, assume success */ + } +} +EXPORT_SYMBOL_GPL(kapi_check_return_success); + +/** + * kapi_validate_return_value - Validate that return value matches spec + * @spec: API specification + * @retval: Return value to validate + * + * Return: true if return value is valid according to spec, false otherwis= e. + * + * This function checks: + * 1. If the value indicates success, it must match the success criteria + * 2. If the value indicates error, it must be one of the specified error = codes + */ +bool kapi_validate_return_value(const struct kernel_api_spec *spec, s64 re= tval) +{ + int i; + bool is_success; + + if (!spec) + return true; /* No spec means we can't validate */ + + /* First check if this is a success return */ + is_success =3D kapi_check_return_success(&spec->return_spec, retval); + + if (is_success) { + /* Success case - already validated by kapi_check_return_success */ + return true; + } + + /* Special validation for file descriptor returns */ + if (spec->return_spec.check_type =3D=3D KAPI_RETURN_FD && is_success) { + /* For successful FD returns, validate it's a valid FD */ + if (!kapi_validate_fd((int)retval)) { + pr_warn("API %s returned invalid file descriptor %lld\n", + spec->name, retval); + return false; + } + return true; + } + + /* Error case - check if it's one of the specified errors */ + if (spec->error_count =3D=3D 0) { + /* No errors specified, so any error is potentially valid */ + pr_debug("API %s returned unspecified error %lld\n", + spec->name, retval); + return true; + } + + /* Check if the error is in our list of specified errors */ + for (i =3D 0; i < spec->error_count && i < KAPI_MAX_ERRORS; i++) { + if (retval =3D=3D spec->errors[i].error_code) + return true; + } + + /* Error not in spec */ + pr_warn("API %s returned unspecified error code %lld. Valid errors are:\n= ", + spec->name, retval); + for (i =3D 0; i < spec->error_count && i < KAPI_MAX_ERRORS; i++) { + pr_warn(" %s (%d): %s\n", + spec->errors[i].name, + spec->errors[i].error_code, + spec->errors[i].condition); + } + + return false; +} +EXPORT_SYMBOL_GPL(kapi_validate_return_value); + +/** + * kapi_validate_syscall_return - Validate syscall return value with enfor= cement + * @spec: API specification + * @retval: Return value + * + * Return: 0 if valid, -EINVAL if the return value doesn't match spec + * + * For syscalls, this can help detect kernel bugs where unspecified error + * codes are returned to userspace. + */ +int kapi_validate_syscall_return(const struct kernel_api_spec *spec, s64 r= etval) +{ + if (!spec) + return 0; + + if (!kapi_validate_return_value(spec, retval)) { + /* Log the violation but don't change the return value */ + WARN_ONCE(1, "Syscall %s returned unspecified value %lld\n", + spec->name, retval); + /* Could return -EINVAL here to enforce, but that might break userspace = */ + } + + return 0; +} +EXPORT_SYMBOL_GPL(kapi_validate_syscall_return); + +/** + * kapi_check_context - Check if current context matches API requirements + * @spec: API specification to check against + */ +void kapi_check_context(const struct kernel_api_spec *spec) +{ + u32 ctx =3D spec->context_flags; + bool valid =3D false; + + if (!ctx) + return; + + /* Check if we're in an allowed context */ + if ((ctx & KAPI_CTX_PROCESS) && !in_interrupt()) + valid =3D true; + + if ((ctx & KAPI_CTX_SOFTIRQ) && in_softirq()) + valid =3D true; + + if ((ctx & KAPI_CTX_HARDIRQ) && in_hardirq()) + valid =3D true; + + if ((ctx & KAPI_CTX_NMI) && in_nmi()) + valid =3D true; + + if (!valid) { + WARN_ONCE(1, "API %s called from invalid context\n", spec->name); + } + + /* Check specific requirements */ + if ((ctx & KAPI_CTX_ATOMIC) && preemptible()) { + WARN_ONCE(1, "API %s requires atomic context\n", spec->name); + } + + if ((ctx & KAPI_CTX_SLEEPABLE) && !preemptible()) { + WARN_ONCE(1, "API %s requires sleepable context\n", spec->name); + } +} +EXPORT_SYMBOL_GPL(kapi_check_context); + +#endif /* CONFIG_KAPI_RUNTIME_CHECKS */ + +/* DebugFS interface */ +#ifdef CONFIG_DEBUG_FS + +static struct dentry *kapi_debugfs_root; + +static int kapi_spec_show(struct seq_file *s, void *v) +{ + struct kernel_api_spec *spec =3D s->private; + char *buf; + int ret; + + buf =3D kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret =3D kapi_export_json(spec, buf, PAGE_SIZE * 4); + if (ret > 0) + seq_printf(s, "%s", buf); + + kfree(buf); + return 0; +} + +static int kapi_spec_open(struct inode *inode, struct file *file) +{ + return single_open(file, kapi_spec_show, inode->i_private); +} + +static const struct file_operations kapi_spec_fops =3D { + .open =3D kapi_spec_open, + .read =3D seq_read, + .llseek =3D seq_lseek, + .release =3D single_release, +}; + +static int kapi_list_show(struct seq_file *s, void *v) +{ + struct kernel_api_spec *spec; + struct dynamic_api_spec *dyn_spec; + + seq_printf(s, "Kernel API Specifications:\n\n"); + + /* List static specifications */ + seq_printf(s, "Static APIs:\n"); + for (spec =3D __start_kapi_specs; spec < __stop_kapi_specs; spec++) { + seq_printf(s, " %s (v%u): %s\n", + spec->name, spec->version, spec->description); + } + + /* List dynamic specifications */ + seq_printf(s, "\nDynamic APIs:\n"); + mutex_lock(&api_spec_mutex); + list_for_each_entry(dyn_spec, &dynamic_api_specs, list) { + spec =3D dyn_spec->spec; + seq_printf(s, " %s (v%u): %s\n", + spec->name, spec->version, spec->description); + } + mutex_unlock(&api_spec_mutex); + + return 0; +} + +static int kapi_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, kapi_list_show, NULL); +} + +static const struct file_operations kapi_list_fops =3D { + .open =3D kapi_list_open, + .read =3D seq_read, + .llseek =3D seq_lseek, + .release =3D single_release, +}; + +static int __init kapi_debugfs_init(void) +{ + struct kernel_api_spec *spec; + struct dentry *spec_dir; + + kapi_debugfs_root =3D debugfs_create_dir("kapi", NULL); + if (!kapi_debugfs_root) + return -ENOMEM; + + /* Create list file */ + debugfs_create_file("list", 0444, kapi_debugfs_root, NULL, + &kapi_list_fops); + + /* Create directory for specifications */ + spec_dir =3D debugfs_create_dir("specs", kapi_debugfs_root); + + /* Create files for each static specification */ + for (spec =3D __start_kapi_specs; spec < __stop_kapi_specs; spec++) { + debugfs_create_file(spec->name, 0444, spec_dir, spec, + &kapi_spec_fops); + } + + return 0; +} + +late_initcall(kapi_debugfs_init); + +#endif /* CONFIG_DEBUG_FS */ \ No newline at end of file --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0E4A22D9EE5; Sat, 14 Jun 2025 13:49:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908943; cv=none; b=m3XCahc44k1rPJb5xuqf1+HgW2EYABKI3Ksrx/D+HgoszFoQlX7aTOGYuJOv6w3RWT3qC/utnrD7phfEhjZanWNA/NU8hk+7ezN8PtbFj5nmfmkVsAfsifv6Pq/LGeOLYA5iBpiDswvFukwOTUm5X9quy9KsZyCk205r/dqttBI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908943; c=relaxed/simple; bh=BUrldnQPJ42xUrag3Jg1xnpVhRl3Q67bMpKOc+Wgtbs=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=fkB6StdTLki959HlSXVChaFoPTgf8nFtfEgengSUoVLMon0plMQ2oA7t+rsGR5II4ZPxj0CXy1qTPu1zXiHk7sz5ZZFrKb2TwyhiCDUthRR56d4NHTKhl+TIYFHmRxdnCx865IA23/58PxtHmXQuOgmC7h0kVQoQMy5FDQmgPd4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=CmZVLvre; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="CmZVLvre" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 40041C4CEF0; Sat, 14 Jun 2025 13:49:02 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908942; bh=BUrldnQPJ42xUrag3Jg1xnpVhRl3Q67bMpKOc+Wgtbs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=CmZVLvreLZ7ZPfep9/dudxMmrWK4WXhVHYZNeVNro1rbdVt+FztARkn49hP8NU1Ri 7nKa+pBUDdJnOvnv40X48Y6ekIvf3YKIsMHz33MH5WwaU6x1yDLpB2DZLE1YQwuMsK 6RBAqXs72GojYZIAi3zn0/dPHQwihhKV6YNLVguSWEtsTxkb6HeJJkTvWaGQB8/Fdw ZuAedZweI+wuKXlVoDjB31p0TsbhIpaSZfFvDjVFy/TdsOfrfzi4YEkJyRmrc/Yxe+ dY/8QDClcThvtvs2UPB56W5Yijh7Tpy4j+I11lvAmjqDFgiEtqU+ceZdslMcpLj+3Q 1pF+Xc5M4XpSQ== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 02/19] eventpoll: add API specification for epoll_create1 Date: Sat, 14 Jun 2025 09:48:41 -0400 Message-Id: <20250614134858.790460-3-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the epoll_create1() system call. Signed-off-by: Sasha Levin --- fs/eventpoll.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d4dbffdedd08e..8f8a64ebbaef6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2265,6 +2266,91 @@ static int do_epoll_create(int flags) return error; } =20 + +/* Valid values for epoll_create1 flags parameter */ +static const s64 epoll_create1_valid_values[] =3D { 0, EPOLL_CLOEXEC }; + +DEFINE_KERNEL_API_SPEC(sys_epoll_create1) + KAPI_DESCRIPTION("Create an epoll instance") + KAPI_LONG_DESC("Creates a new epoll instance and returns a file descripto= r " + "referring to that instance. The file descriptor is used for all " + "subsequent calls to the epoll interface.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "flags", "int", "Creation flags for the epoll instance") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .constraint_type =3D KAPI_CONSTRAINT_ENUM, + .enum_values =3D epoll_create1_valid_values, + .enum_count =3D ARRAY_SIZE(epoll_create1_valid_values), + .constraints =3D "Must be 0 or EPOLL_CLOEXEC", + KAPI_PARAM_END + + KAPI_RETURN("long", "File descriptor on success, negative error code on f= ailure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_FD, + KAPI_RETURN_END + + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid flags specified", + "The flags parameter contains invalid values. Only EPOLL_CLOEXEC is a= llowed.") + KAPI_ERROR(1, -EMFILE, "EMFILE", "Per-process file descriptor limit reach= ed", + "The per-process limit on the number of open file descriptors has bee= n reached.") + KAPI_ERROR(2, -ENFILE, "ENFILE", "System file table overflow", + "The system-wide limit on the total number of open files has been rea= ched.") + KAPI_ERROR(3, -ENOMEM, "ENOMEM", "Insufficient kernel memory", + "There was insufficient kernel memory to create the epoll instance.") + KAPI_ERROR(4, -EINTR, "EINTR", "Interrupted by signal", + "The system call was interrupted by a signal before the epoll instanc= e could be created.") + + .error_count =3D 5, + .param_count =3D 1, + .since_version =3D "2.6.27", + .examples =3D "int epfd =3D epoll_create1(EPOLL_CLOEXEC);", + .notes =3D "EPOLL_CLOEXEC sets the close-on-exec (FD_CLOEXEC) flag on the= new file descriptor. " + "When all file descriptors referring to an epoll instance are closed, t= he kernel " + "destroys the instance and releases associated resources.", + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_RESOURCE_CREATE | KAPI_EFFECT_ALLOC_MEMOR= Y, + "epoll instance", + "Creates a new epoll instance and allocates kernel memory for it") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_RESOURCE_CREATE, + "file descriptor", + "Allocates a new file descriptor in the process's file descriptor tabl= e") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(2) + + /* State transitions */ + KAPI_STATE_TRANS(0, "epoll instance", "non-existent", "created and empty", + "A new epoll instance is created with no monitored file descriptors") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(1) + + /* Signal specifications */ + KAPI_SIGNAL(0, SIGINT, "SIGINT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_= RETURN) + KAPI_SIGNAL_CONDITION("During creation if process receives SIGINT") + KAPI_SIGNAL_DESC("If interrupted during kernel memory allocation, return= s -EINTR") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGTERM, "SIGTERM", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("During creation if process receives SIGTERM") + KAPI_SIGNAL_DESC("If interrupted during kernel memory allocation, return= s -EINTR") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(2, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("At any point during the syscall") + KAPI_SIGNAL_DESC("Process is terminated immediately, epoll instance crea= tion may be incomplete") + KAPI_SIGNAL_END + + .signal_count =3D 3, +KAPI_END_SPEC; SYSCALL_DEFINE1(epoll_create1, int, flags) { return do_epoll_create(flags); --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 35D6D2DA74C; Sat, 14 Jun 2025 13:49:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908944; cv=none; b=oUIt4tIfcdFUleaL7eim1MEmT2GKn0fuIwVxmhil4gZG0c37Zpg6GVjk4HYn+xzEaq0/i2I/LNtpY4uvW/QKMnCmvNM7M/H3vy3djjx3KOw/5+qGOkvbK0gJEQXtS712JPgQLDsVLZLMD2bWUG+5ikMOHZ1xK7FkB46IzSnkrmA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908944; c=relaxed/simple; bh=iQT/MtoEuRsG20rli2jzMl+0g6mMlRhX4GeT2c666kA=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=WS+9PhT1SQF2HVnV83Jdx7NhXhnYmf9dvtUwS5Hu2EeRx9igRfvKkeoPJioZc857dKh9irlmmYl9kOQROD7VEu0I0lE+EjK5WWefIzfHXhspB708jishGhkY998NqEGJk8uFeF6vJB7Kj+XzJOI7qzVnDGuhhZmujKGMpS3Z9WE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=qzO2PTLh; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="qzO2PTLh" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2A9B9C4CEF3; Sat, 14 Jun 2025 13:49:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908943; bh=iQT/MtoEuRsG20rli2jzMl+0g6mMlRhX4GeT2c666kA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=qzO2PTLhMew6aDwiLgpRNDF/86BmzgQ7bBAGLq/+UaBs5mIdamXZb1oDs2v96CeK9 AK4eD7PoHwqa06wof91oVx97OtCpsuHpLNptWiwVYimYH0lvN/L8Dq6TGibA+VHnth VIqbqCC2qkzpNXcOotfV7ghrnH6rdvegckExmzmmI1rrY34+UHebYxA6vibWYpICjr DussrWk4cuWyuKvhhvUg64Qk99jA8Z674D+Sl130WYzJTS5E12cE0JFNirevyQAx3e eiLbnF91V22Ghb9qGneDU3VFvft+YJQTUOvZ/xF11FCS3OoxjoVoJpW773/33PbbAZ pywPzMWXyjk0A== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 03/19] eventpoll: add API specification for epoll_create Date: Sat, 14 Jun 2025 09:48:42 -0400 Message-Id: <20250614134858.790460-4-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the epoll_create() system call. Signed-off-by: Sasha Levin --- fs/eventpoll.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8f8a64ebbaef6..50adea7ba43d1 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2356,6 +2356,117 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) return do_epoll_create(flags); } =20 + +DEFINE_KERNEL_API_SPEC(sys_epoll_create) + KAPI_DESCRIPTION("Create an epoll instance (obsolete)") + KAPI_LONG_DESC("Creates a new epoll instance and returns a file descripto= r " + "referring to that instance. This is the obsolete interface; " + "new applications should use epoll_create1() instead.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "size", "int", "Ignored hint about expected number of file = descriptors") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + KAPI_PARAM_RANGE(1, INT_MAX) + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .constraints =3D "Must be greater than zero (ignored since Linux 2.6.8)", + KAPI_PARAM_END + + KAPI_RETURN("long", "File descriptor on success, negative error code on f= ailure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_FD, + KAPI_RETURN_END + + KAPI_ERROR(0, -EINVAL, "EINVAL", "size <=3D 0", + "The size parameter must be greater than zero.") + KAPI_ERROR(1, -EMFILE, "EMFILE", "Per-process file descriptor limit reach= ed", + "The per-process limit on the number of open file descriptors has bee= n reached.") + KAPI_ERROR(2, -ENFILE, "ENFILE", "System file table overflow", + "The system-wide limit on the total number of open files has been rea= ched.") + KAPI_ERROR(3, -ENOMEM, "ENOMEM", "Insufficient kernel memory", + "There was insufficient kernel memory to create the epoll instance.") + KAPI_ERROR(4, -EINTR, "EINTR", "Interrupted by signal", + "The system call was interrupted by a signal before the epoll instanc= e could be created.") + + .error_count =3D 5, + .param_count =3D 1, + .since_version =3D "2.6", + .deprecated =3D true, + .replacement =3D "epoll_create1", + .examples =3D "int epfd =3D epoll_create(1024); // size is ignored since = Linux 2.6.8", + .notes =3D "Since Linux 2.6.8, the size argument is ignored but must be g= reater than zero. " + "The kernel dynamically sizes the data structures as needed. " + "For new applications, epoll_create1() should be preferred as it allows= " + "setting close-on-exec flag atomically.", + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_RESOURCE_CREATE | KAPI_EFFECT_ALLOC_MEMOR= Y, + "epoll instance", + "Creates a new epoll instance and allocates kernel memory for it") + KAPI_EFFECT_CONDITION("Always when successful") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_RESOURCE_CREATE, + "file descriptor", + "Allocates a new file descriptor in the process's file descriptor tabl= e") + KAPI_EFFECT_CONDITION("Always when successful") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "kernel file table", + "Adds new file structure to system-wide file table") + KAPI_EFFECT_CONDITION("Always when successful") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(3) + + /* State transitions */ + KAPI_STATE_TRANS(0, "epoll instance", "non-existent", "created and empty", + "A new epoll instance is created with no monitored file descriptors") + KAPI_STATE_TRANS_COND("On successful creation") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "file descriptor", "unallocated", "allocated and open= ", + "A new file descriptor is allocated in the process's fd table") + KAPI_STATE_TRANS_COND("On successful creation") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(2) + + /* Signal specifications */ + KAPI_SIGNAL(0, SIGINT, "SIGINT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_= RETURN) + KAPI_SIGNAL_CONDITION("During kernel memory allocation") + KAPI_SIGNAL_DESC("If interrupted during memory allocation or fd allocati= on, returns -EINTR") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGTERM, "SIGTERM", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("During kernel memory allocation") + KAPI_SIGNAL_DESC("If interrupted during memory allocation or fd allocati= on, returns -EINTR") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(2, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("At any point during the syscall") + KAPI_SIGNAL_DESC("Process is terminated immediately, epoll instance crea= tion may be incomplete") + KAPI_SIGNAL_END + + KAPI_SIGNAL(3, SIGHUP, "SIGHUP", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_= RETURN) + KAPI_SIGNAL_CONDITION("During kernel operations") + KAPI_SIGNAL_DESC("If process is being terminated due to terminal hangup,= may return -EINTR") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(4, SIGPIPE, "SIGPIPE", KAPI_SIGNAL_IGNORE, KAPI_SIGNAL_ACTION= _DEFAULT) + KAPI_SIGNAL_CONDITION("Never generated by epoll_create") + KAPI_SIGNAL_DESC("This signal is not relevant to epoll_create as it does= n't involve pipes") + KAPI_SIGNAL_END + + .signal_count =3D 5, +KAPI_END_SPEC; + SYSCALL_DEFINE1(epoll_create, int, size) { if (size <=3D 0) --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 308912C327C; Sat, 14 Jun 2025 13:49:04 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908945; cv=none; b=a+X/5G1HVvKErQbQLlkbvGhNQHwLpXOjVFEK7lRqJhVm2gY8rwCQqw4hgqA/cj1w7lzPX++OVUOZhSBXbUii9Ft6yK2XZzT/JtbKOpDFEr33GMFLFT2WvsIMeomGVhqLB7ZTpbrcd0ks9AzvzWq5aOphwztMiA1pRtH6/hSub/I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908945; c=relaxed/simple; bh=YYNIrR9iBWnDzlT+z7s/BKJnGkQz73iGzIzH0z/SgP8=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=LymzuS6OXCJsOact4JuHSq5mpWH644w3dQXGoaPw7DetuVuSRRHdWr3c1HAy0IrkKBrjTbPY8iucghe8Uzsb/EPTXesXdZcxez9K5GVp/c2cRPyFj08ynVkXK0BIkDBo5v81mEZWFs0t05y/2RcEXkGttbJfg4Ot2CGazMfuLw0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=dyhGK3LX; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="dyhGK3LX" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 179F9C4CEF1; Sat, 14 Jun 2025 13:49:04 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908944; bh=YYNIrR9iBWnDzlT+z7s/BKJnGkQz73iGzIzH0z/SgP8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=dyhGK3LXEKlmW+/QCjXj/Dp7aWAMGMhLWU8BBtPgtHXB8CapL1h6cliFu/Je/0kJ5 aQ2JpB3Gx4x0n3cI2H+flSwRIWwgNXfhbA8/jmZhAVEhrobeeaxNaFfW9lFnwrHsiB w1muA4oTCLFpQhUjiHlSjVz9cFavjGNXWlCX8nRaPsByfAz08y0TBf+CsX0h9Choip 8zfALBWpGh4oqVzdYXRO9CS0+FShTQPEBOWpkIaZmtB5UTIfaSQPqZH3SCLj3aExX3 LHjc2G2J3sA96lm7cuSTelBpwzGw5v50kFzBlG/fxcmR0Hq3ipK4FrMfP4/wG76+OP 4ppKprzMm0uHw== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 04/19] eventpoll: add API specification for epoll_ctl Date: Sat, 14 Jun 2025 09:48:43 -0400 Message-Id: <20250614134858.790460-5-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the epoll_ctl() system call. Signed-off-by: Sasha Levin --- fs/eventpoll.c | 203 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 50adea7ba43d1..409a0c440f112 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2647,6 +2647,209 @@ int do_epoll_ctl(int epfd, int op, int fd, struct e= poll_event *epds, * the eventpoll file that enables the insertion/removal/change of * file descriptors inside the interest set. */ + +/* Valid values for epoll_ctl op parameter */ +static const s64 epoll_ctl_valid_ops[] =3D { + EPOLL_CTL_ADD, + EPOLL_CTL_DEL, + EPOLL_CTL_MOD, +}; + +DEFINE_KERNEL_API_SPEC(sys_epoll_ctl) + KAPI_DESCRIPTION("Control interface for an epoll file descriptor") + KAPI_LONG_DESC("Performs control operations on the epoll instance referre= d to by epfd. " + "It requests that the operation op be performed for the target fi= le " + "descriptor fd. Valid operations are adding, modifying, or deleti= ng " + "file descriptors from the interest set.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "epfd", "int", "File descriptor referring to the epoll inst= ance") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_FD, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + KAPI_PARAM_END + + KAPI_PARAM(1, "op", "int", "Operation to be performed") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_ENUM, + .enum_values =3D epoll_ctl_valid_ops, + .enum_count =3D ARRAY_SIZE(epoll_ctl_valid_ops), + .constraints =3D "Must be EPOLL_CTL_ADD, EPOLL_CTL_DEL, or EPOLL_CTL_MOD= ", + KAPI_PARAM_END + + KAPI_PARAM(2, "fd", "int", "File descriptor to be monitored") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_FD, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Must refer to a file that supports poll operations", + KAPI_PARAM_END + + KAPI_PARAM(3, "event", "struct epoll_event __user *", "Settings for the f= ile descriptor") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER | KAPI_PARAM_OPTIONAL) + .type =3D KAPI_TYPE_USER_PTR, + KAPI_PARAM_SIZE(sizeof(struct epoll_event)) + .constraints =3D "Required for ADD and MOD operations, ignored for DEL", + KAPI_PARAM_END + + KAPI_RETURN("long", "0 on success, negative error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_EXACT, + KAPI_RETURN_SUCCESS(0) + KAPI_RETURN_END + + KAPI_ERROR(0, -EBADF, "EBADF", "epfd or fd is not a valid file descriptor= ", + "One of the file descriptors is invalid or has been closed.") + KAPI_ERROR(1, -EEXIST, "EEXIST", "op is EPOLL_CTL_ADD and fd is already r= egistered", + "The file descriptor is already present in the epoll instance.") + KAPI_ERROR(2, -EINVAL, "EINVAL", "Invalid operation or parameters", + "epfd is not an epoll file descriptor, epfd =3D=3D fd, op is not vali= d, " + "or EPOLLEXCLUSIVE was specified with invalid events.") + KAPI_ERROR(3, -ENOENT, "ENOENT", "op is EPOLL_CTL_MOD or EPOLL_CTL_DEL an= d fd is not registered", + "The file descriptor is not registered with this epoll instance.") + KAPI_ERROR(4, -ENOMEM, "ENOMEM", "Insufficient kernel memory", + "There was insufficient memory to handle the requested operation.") + KAPI_ERROR(5, -EPERM, "EPERM", "Target file does not support epoll", + "The target file fd does not support poll operations.") + KAPI_ERROR(6, -ELOOP, "ELOOP", "Circular monitoring detected", + "fd refers to an epoll instance and this operation would result " + "in a circular loop of epoll instances monitoring one another.") + KAPI_ERROR(7, -EFAULT, "EFAULT", "event points outside accessible address= space", + "The memory area pointed to by event is not accessible with write per= missions.") + KAPI_ERROR(8, -EAGAIN, "EAGAIN", "Nonblocking mode and lock not available= ", + "The operation was called in nonblocking mode and could not acquire n= ecessary locks.") + KAPI_ERROR(9, -ENOSPC, "ENOSPC", "User epoll watch limit exceeded", + "The limit on the total number of epoll watches was exceeded. " + "See /proc/sys/fs/epoll/max_user_watches.") + KAPI_ERROR(10, -EINTR, "EINTR", "Interrupted by signal", + "The system call was interrupted by a signal before completion.") + + .error_count =3D 11, + .param_count =3D 4, + .since_version =3D "2.6", + + /* Locking specifications */ + KAPI_LOCK(0, "ep->mtx", KAPI_LOCK_MUTEX) + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects the epoll instance during control operations") + KAPI_LOCK_END + + KAPI_LOCK(1, "epnested_mutex", KAPI_LOCK_MUTEX) + KAPI_LOCK_DESC("Global mutex to prevent circular epoll structures (acqui= red for nested epoll)") + KAPI_LOCK_END + + .lock_count =3D 2, + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_ALLOC_MEMORY, + "epoll interest list", + "Adds new epitem structure to the epoll interest list") + KAPI_EFFECT_CONDITION("When op is EPOLL_CTL_ADD") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_FREE_MEMORY, + "epoll interest list", + "Removes epitem structure from the epoll interest list") + KAPI_EFFECT_CONDITION("When op is EPOLL_CTL_DEL") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "epoll event mask", + "Modifies the event mask for an existing epitem") + KAPI_EFFECT_CONDITION("When op is EPOLL_CTL_MOD") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE, + "file reference count", + "Increases reference count on the monitored file") + KAPI_EFFECT_CONDITION("When op is EPOLL_CTL_ADD") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE, + "file reference count", + "Decreases reference count on the monitored file") + KAPI_EFFECT_CONDITION("When op is EPOLL_CTL_DEL") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(5, KAPI_EFFECT_SCHEDULE, + "process state", + "May wake up processes waiting on the epoll instance if events become = available") + KAPI_EFFECT_CONDITION("When adding or modifying entries that match curre= nt events") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(6) + + /* State transitions */ + KAPI_STATE_TRANS(0, "epoll entry", "non-existent", "monitored", + "File descriptor is added to epoll interest list") + KAPI_STATE_TRANS_COND("When op is EPOLL_CTL_ADD") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "epoll entry", "monitored", "non-existent", + "File descriptor is removed from epoll interest list") + KAPI_STATE_TRANS_COND("When op is EPOLL_CTL_DEL") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "epoll entry", "monitored with events A", "monitored = with events B", + "Event mask for file descriptor is modified") + KAPI_STATE_TRANS_COND("When op is EPOLL_CTL_MOD") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "epoll ready list", "empty or partial", "contains new= events", + "Ready list may be updated if new/modified entry has pending events") + KAPI_STATE_TRANS_COND("When monitored fd has events matching the mask") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(4) + + /* Signal specifications */ + KAPI_SIGNAL(0, SIGINT, "SIGINT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_= RETURN) + KAPI_SIGNAL_CONDITION("During mutex acquisition or memory allocation") + KAPI_SIGNAL_DESC("Returns -EINTR if interrupted before completing the op= eration") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGTERM, "SIGTERM", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("During mutex acquisition or memory allocation") + KAPI_SIGNAL_DESC("Returns -EINTR if interrupted before completing the op= eration") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(2, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("At any point during the syscall") + KAPI_SIGNAL_DESC("Process is terminated immediately, operation may be pa= rtially completed") + KAPI_SIGNAL_END + + KAPI_SIGNAL(3, SIGHUP, "SIGHUP", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_= RETURN) + KAPI_SIGNAL_CONDITION("During blocking operations") + KAPI_SIGNAL_DESC("Returns -EINTR if terminal hangup occurs") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(4, SIGURG, "SIGURG", KAPI_SIGNAL_IGNORE, KAPI_SIGNAL_ACTION_D= EFAULT) + KAPI_SIGNAL_CONDITION("May be generated by monitored sockets") + KAPI_SIGNAL_DESC("Urgent data signals from monitored sockets do not affe= ct epoll_ctl") + KAPI_SIGNAL_END + + .signal_count =3D 5, + + .examples =3D "struct epoll_event ev;\n" + "ev.events =3D EPOLLIN | EPOLLOUT;\n" + "ev.data.fd =3D fd;\n" + "if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) =3D=3D -1)\n" + " handle_error();\n", + .notes =3D "EPOLL_CTL_DEL ignores the event parameter (can be NULL). " + "EPOLLEXCLUSIVE flag has restrictions and cannot be used with EPOLL_CTL= _MOD. " + "The epoll instance maintains a reference to registered files until the= y are " + "explicitly removed with EPOLL_CTL_DEL or the epoll instance is closed.= ", +KAPI_END_SPEC; + SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event) { --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B88BE2E1746; Sat, 14 Jun 2025 13:49:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908945; cv=none; b=H5x3gdvKXauukYofvvgOh0JPKo8APHpoK3d43ozVHrvxe1iMpoHlP4w2FcpJY4EjVGioGnUzQ/1bE1qz6uR1hMFAsrGy/6idAFkxuH3TzlIQ06ePv5JlDCAwXJZwQ+v7lsLocLK4tbC/uY10Y3HyYcehGybRuN7sE1VELZ1m5Wo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908945; c=relaxed/simple; bh=tsynXkXHj5iuDjiZyCfJZGrDQrbT0Giwhd4VvYtKm/I=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Z9sZb4HmvMP1D5Jel1b3NzQn0z6lBIIjjHQnbuO3upLPBOvJ00y+NCJGW/tWC4s4ECroOU+nmptnyqyF5dT6tsHW/sW7BdIgUnsJgAXg4Q4NUGEjjC+tqj0b4Vfj4as7VGtdYydmrfMY03PFBwI51c+u6zyEcJLtJsYbeYMOOkc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Pyu9KU7O; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Pyu9KU7O" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 02477C4CEF0; Sat, 14 Jun 2025 13:49:04 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908945; bh=tsynXkXHj5iuDjiZyCfJZGrDQrbT0Giwhd4VvYtKm/I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Pyu9KU7Oto+SK0cKYRCB/J8DjUNW3hD8TdmpKb+lUDjhZXziQ+NodABgb0GLB/1G3 +X+2K8p3DXezqTCPyAPRCpk7mvHdB3L4rv9p3J0NFkWdtOdwpdgPHSEgdntBtyrqto BiOcMdqUQrc84Y0fPLTvPpwzmEoKN0K+eKCmHcyDe9M76IPN0VRqMjdlTAg8SVtZBJ sCRwOKUzsz0qAR0oP/ccJgsdl0fM88HPiw7q/O0ziHLKUxkqCpN9PYD7XcIBgQCkZJ RI+uRVAxzHiBpJ/S4aQyKbbWMy8D9uRE03vURnb2GCBcK9i+GbszP6tM72RdklOLs0 04HS/sEcXyoUQ== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 05/19] eventpoll: add API specification for epoll_wait Date: Sat, 14 Jun 2025 09:48:44 -0400 Message-Id: <20250614134858.790460-6-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the epoll_wait() system call. Signed-off-by: Sasha Levin --- fs/eventpoll.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 409a0c440f112..254b50d687d37 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2932,6 +2932,188 @@ static int do_epoll_wait(int epfd, struct epoll_eve= nt __user *events, return ep_poll(ep, events, maxevents, to); } =20 + +DEFINE_KERNEL_API_SPEC(sys_epoll_wait) + KAPI_DESCRIPTION("Wait for events on an epoll instance") + KAPI_LONG_DESC("Waits for events on the epoll instance referred to by epf= d. " + "The function blocks the calling thread until either at least one= of the " + "file descriptors referred to by epfd becomes ready for some I/O = operation, " + "the call is interrupted by a signal handler, or the timeout expi= res.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "epfd", "int", "File descriptor referring to the epoll inst= ance") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_FD, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + KAPI_PARAM_END + + KAPI_PARAM(1, "events", "struct epoll_event __user *", "Buffer where read= y events will be stored") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + KAPI_PARAM_SIZE(sizeof(struct epoll_event)) /* Base size of single elem= ent */ + .size_param_idx =3D 2, /* Size determined by maxevents parameter */ + .size_multiplier =3D sizeof(struct epoll_event), + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Must point to an array of at least maxevents epoll_eve= nt structures", + KAPI_PARAM_END + + KAPI_PARAM(2, "maxevents", "int", "Maximum number of events to return") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + KAPI_PARAM_RANGE(1, INT_MAX / sizeof(struct epoll_event)) /* EP_MAX_EVE= NTS */ + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .constraints =3D "Must be greater than zero and not exceed system limits= ", + KAPI_PARAM_END + + KAPI_PARAM(3, "timeout", "int", "Timeout in milliseconds") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "-1 blocks indefinitely, 0 returns immediately, >0 spec= ifies milliseconds to wait", + KAPI_PARAM_END + + KAPI_RETURN("long", "Number of ready file descriptors on success, negativ= e error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_RANGE, + .success_min =3D 0, + .success_max =3D INT_MAX, + KAPI_RETURN_END + + KAPI_ERROR(0, -EBADF, "EBADF", "epfd is not a valid file descriptor", + "The epoll file descriptor is invalid or has been closed.") + KAPI_ERROR(1, -EFAULT, "EFAULT", "events points outside accessible addres= s space", + "The memory area pointed to by events is not accessible with write pe= rmissions.") + KAPI_ERROR(2, -EINTR, "EINTR", "Call interrupted by signal handler", + "The call was interrupted by a signal handler before any events " + "became ready or the timeout expired.") + KAPI_ERROR(3, -EINVAL, "EINVAL", "Invalid parameters", + "epfd is not an epoll file descriptor, or maxevents is less than or e= qual to zero.") + + .error_count =3D 4, + .param_count =3D 4, + .since_version =3D "2.6", + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE, + "ready list", + "Removes events from the epoll ready list as they are reported") + KAPI_EFFECT_CONDITION("When events are available and level-triggered") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_SCHEDULE, + "process state", + "Blocks the calling thread until events are available or timeout") + KAPI_EFFECT_CONDITION("When timeout !=3D 0 and no events are immediately= available") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "user memory", + "Writes event data to user-provided buffer") + KAPI_EFFECT_CONDITION("When events are available") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_PROCESS_STATE, + "signal state", + "Clears TIF_SIGPENDING if a signal was pending") + KAPI_EFFECT_CONDITION("When returning due to signal interruption") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(4) + + /* State transitions */ + KAPI_STATE_TRANS(0, "process", "running", "blocked", + "Process blocks waiting for events") + KAPI_STATE_TRANS_COND("When no events available and timeout !=3D 0") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "process", "blocked", "running", + "Process wakes up due to events, timeout, or signal") + KAPI_STATE_TRANS_COND("When wait condition is satisfied") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "epoll ready list", "has events", "events consumed", + "Ready events are consumed from the epoll instance") + KAPI_STATE_TRANS_COND("When returning events to userspace") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "events buffer", "uninitialized", "contains event dat= a", + "User buffer is populated with ready events") + KAPI_STATE_TRANS_COND("When events are available") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(4) + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "ANY", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN) + KAPI_SIGNAL_CONDITION("Any pending signal") + KAPI_SIGNAL_DESC("Any signal delivered to the thread will interrupt epol= l_wait() " + "and cause it to return -EINTR. This is checked via signal_pending() " + "after checking for available events.") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("Always delivered, cannot be blocked") + KAPI_SIGNAL_DESC("SIGKILL will terminate the process. The epoll_wait cal= l will " + "not return as the process is terminated immediately.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(2, SIGSTOP, "SIGSTOP", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_STOP) + KAPI_SIGNAL_CONDITION("Always delivered, cannot be blocked") + KAPI_SIGNAL_DESC("SIGSTOP will stop the process. When continued with SIG= CONT, " + "epoll_wait may return -EINTR if the timeout has not expired.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(3, SIGCONT, "SIGCONT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_CONTINUE) + KAPI_SIGNAL_CONDITION("When process is stopped") + KAPI_SIGNAL_DESC("SIGCONT resumes a stopped process. If epoll_wait was i= nterrupted " + "by SIGSTOP, it may return -EINTR when continued.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(4, SIGALRM, "SIGALRM", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Timer expiration") + KAPI_SIGNAL_DESC("SIGALRM from timer expiration will interrupt epoll_wai= t with -EINTR") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + .signal_count =3D 5, + .signal_mask_count =3D 0, /* No signal mask manipulation in epoll_wait */ + + /* Locking specifications */ + KAPI_LOCK(0, "ep->lock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects the ready list while checking for and consuming= events") + KAPI_LOCK_END + + KAPI_LOCK(1, "ep->mtx", KAPI_LOCK_MUTEX) + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects against concurrent epoll_ctl operations during = wait") + KAPI_LOCK_END + + .lock_count =3D 2, + + .examples =3D "struct epoll_event events[10];\n" + "int nfds =3D epoll_wait(epfd, events, 10, 1000);\n" + "if (nfds =3D=3D -1) {\n" + " perror(\"epoll_wait\");\n" + " exit(EXIT_FAILURE);\n" + "}\n" + "for (int n =3D 0; n < nfds; ++n) {\n" + " if (events[n].data.fd =3D=3D listen_sock) {\n" + " accept_new_connection();\n" + " } else {\n" + " handle_io(events[n].data.fd);\n" + " }\n" + "}", + .notes =3D "The timeout uses CLOCK_MONOTONIC and may be rounded up to sys= tem clock granularity. " + "A timeout of -1 causes epoll_wait to block indefinitely, while a timeo= ut of 0 " + "causes it to return immediately even if no events are available. " + "The struct epoll_event is defined as containing events (uint32_t) and = data (epoll_data_t union).", +KAPI_END_SPEC; + SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A2C212E2F0E; Sat, 14 Jun 2025 13:49:06 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908946; cv=none; b=YOl81LAb7zPKpVi4jr5PCV+hL0Q/j2RhYFs9lBXP/gsYsRCg+ILHS156nt7vNYZX7QSlU6jSZN7sXnm2ljJbxBkYARKbkjE6r334ByE9HdL8SX5YEPWERhB9lok3USOnN3lV7pA6IY4zv3TUOklmC46/dQpuo38qT3r5cS6sq1o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908946; c=relaxed/simple; bh=2/7buoiE7ii59cXEfLsm+K3hxqXnF9MpkV59KWDmUSw=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=ib2RZ2l8/uTN47yRtj8naiXtSXzPw4zqiTbog3ibXHdo9Eb43jffhWjDQ1Sr1YEP+Nu7xgYDq9ulEPX7OhQB2yPX3lLOsZbnjZSV+TnTwafze19IfOF0/AAX3cheMK2mGvjWs2mFwDIdF+YxBcwebdQ3NaeM7MvdZ20HvWiTHm4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=NsnFNkM1; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="NsnFNkM1" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DE603C4CEEB; Sat, 14 Jun 2025 13:49:05 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908946; bh=2/7buoiE7ii59cXEfLsm+K3hxqXnF9MpkV59KWDmUSw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=NsnFNkM1DDcwiR4YRbbQlgS9bfKqcEttPgXJ1HtW7Ko3oXEAxV3tGFie5Jk8v/wEN cCU0RF8r3iLKNrv1n2vDwAwjJgXRvgUIIp9VzflHI03YjDtpqDPbcuOTytm93piNDx W3Z/EHBsQ5tba9KnSRvETmJif+IoM46SBHHM7fVvPl5/5GfPPM7qnrpkL6CJEpgC2+ 0DcP8lRCNS4NSHHOmKwLDoQZHvTfRnaukNh0fH+wWTWXRZ7j2uvPoHxjjZFseKaYEN 9z+/OwJCOkhH3wwqGcSTWTIlcgkMaWL0YMwI3Sfqp/Fdbr5P8IeDdCxuFx1wW19Kd+ RWBd0e3wcggAQ== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 06/19] eventpoll: add API specification for epoll_pwait Date: Sat, 14 Jun 2025 09:48:45 -0400 Message-Id: <20250614134858.790460-7-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the epoll_pwait() system call. Signed-off-by: Sasha Levin --- fs/eventpoll.c | 230 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 254b50d687d37..8bd25f9230fc8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -3148,6 +3148,236 @@ static int do_epoll_pwait(int epfd, struct epoll_ev= ent __user *events, return error; } =20 + +DEFINE_KERNEL_API_SPEC(sys_epoll_pwait) + KAPI_DESCRIPTION("Wait for events on an epoll instance with signal handli= ng") + KAPI_LONG_DESC("Similar to epoll_wait(), but allows the caller to safely = wait for " + "either events on the epoll instance or the delivery of a signal.= " + "The sigmask argument specifies a signal mask which is atomically= " + "set during the wait, allowing signals to be blocked while not wa= iting " + "and ensuring no signal is lost between checking for events and b= locking.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "epfd", "int", "File descriptor referring to the epoll inst= ance") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_FD, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + KAPI_PARAM_END + + KAPI_PARAM(1, "events", "struct epoll_event __user *", "Buffer where read= y events will be stored") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + KAPI_PARAM_SIZE(sizeof(struct epoll_event)) + .size_param_idx =3D 2, /* Size determined by maxevents parameter */ + .size_multiplier =3D sizeof(struct epoll_event), + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Must point to an array of at least maxevents epoll_eve= nt structures", + KAPI_PARAM_END + + KAPI_PARAM(2, "maxevents", "int", "Maximum number of events to return") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + KAPI_PARAM_RANGE(1, INT_MAX / sizeof(struct epoll_event)) /* EP_MAX_EVE= NTS */ + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .constraints =3D "Must be greater than zero and not exceed system limits= ", + KAPI_PARAM_END + + KAPI_PARAM(3, "timeout", "int", "Timeout in milliseconds") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "-1 blocks indefinitely, 0 returns immediately, >0 spec= ifies milliseconds to wait", + KAPI_PARAM_END + + KAPI_PARAM(4, "sigmask", "const sigset_t __user *", "Signal mask to atomi= cally set during wait") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER | KAPI_PARAM_OPTIONAL) + .type =3D KAPI_TYPE_USER_PTR, + KAPI_PARAM_SIZE(sizeof(sigset_t)) + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Can be NULL if no signal mask change is desired", + KAPI_PARAM_END + + KAPI_PARAM(5, "sigsetsize", "size_t", "Size of the signal set in bytes") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_RANGE(sizeof(sigset_t), sizeof(sigset_t)) + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .constraints =3D "Must be sizeof(sigset_t)", + KAPI_PARAM_END + + KAPI_RETURN("long", "Number of ready file descriptors on success, negativ= e error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_RANGE, + .success_min =3D 0, + .success_max =3D INT_MAX, + KAPI_RETURN_END + + KAPI_ERROR(0, -EBADF, "EBADF", "epfd is not a valid file descriptor", + "The epoll file descriptor is invalid or has been closed.") + KAPI_ERROR(1, -EFAULT, "EFAULT", "Memory area not accessible", + "The memory area pointed to by events or sigmask is not accessible.") + KAPI_ERROR(2, -EINTR, "EINTR", "Call interrupted by signal handler", + "The call was interrupted by a signal handler before any events " + "became ready or the timeout expired; see signal(7).") + KAPI_ERROR(3, -EINVAL, "EINVAL", "Invalid parameters", + "epfd is not an epoll file descriptor, maxevents is less than or equa= l to zero, " + "or sigsetsize is not equal to sizeof(sigset_t).") + + .error_count =3D 4, + .param_count =3D 6, + .since_version =3D "2.6.19", + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE, + "signal mask", + "Atomically sets the signal mask for the calling thread") + KAPI_EFFECT_CONDITION("When sigmask is not NULL") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "ready list", + "Removes events from the epoll ready list as they are reported") + KAPI_EFFECT_CONDITION("When events are available and level-triggered") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_SCHEDULE, + "process state", + "Blocks the calling thread until events are available, timeout, or sig= nal") + KAPI_EFFECT_CONDITION("When timeout !=3D 0 and no events are immediately= available") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE, + "user memory", + "Writes event data to user-provided buffer") + KAPI_EFFECT_CONDITION("When events are available") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_PROCESS_STATE, + "saved signal mask", + "Saves and restores the original signal mask") + KAPI_EFFECT_CONDITION("When sigmask is not NULL") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(5) + + /* State transitions */ + KAPI_STATE_TRANS(0, "signal mask", "original mask", "user-specified mask", + "Thread's signal mask is atomically changed to the provided mask") + KAPI_STATE_TRANS_COND("When sigmask is not NULL") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "process", "running", "blocked", + "Process blocks waiting for events with specified signal mask") + KAPI_STATE_TRANS_COND("When no events available and timeout !=3D 0") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "process", "blocked", "running", + "Process wakes up due to events, timeout, or unblocked signal") + KAPI_STATE_TRANS_COND("When wait condition is satisfied") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "signal mask", "user-specified mask", "original mask", + "Thread's signal mask is restored to its original value") + KAPI_STATE_TRANS_COND("When returning from epoll_pwait") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(4, "pending signals", "blocked", "deliverable", + "Signals that were blocked by the temporary mask become deliverable") + KAPI_STATE_TRANS_COND("When signal mask is restored and signals were pen= ding") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(5) + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "ANY_UNBLOCKED", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Signal not blocked by provided sigmask") + KAPI_SIGNAL_DESC("Any signal not blocked by the sigmask parameter will i= nterrupt " + "epoll_pwait() and cause it to return -EINTR. The signal mask is " + "atomically set via set_user_sigmask() and restored via " + "restore_saved_sigmask_unless() before returning.") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("Cannot be blocked by sigmask") + KAPI_SIGNAL_DESC("SIGKILL cannot be blocked and will terminate the proce= ss immediately. " + "The epoll_pwait call will not return.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(2, SIGSTOP, "SIGSTOP", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_STOP) + KAPI_SIGNAL_CONDITION("Cannot be blocked by sigmask") + KAPI_SIGNAL_DESC("SIGSTOP cannot be blocked and will stop the process. W= hen continued " + "with SIGCONT, epoll_pwait may return -EINTR.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(3, 0, "BLOCKED_SIGNALS", KAPI_SIGNAL_BLOCK, KAPI_SIGNAL_ACTIO= N_DEFAULT) + KAPI_SIGNAL_CONDITION("Signals in provided sigmask") + KAPI_SIGNAL_DESC("Signals specified in the sigmask parameter are blocked= for the " + "duration of the epoll_pwait call. They remain pending and will be " + "delivered after the signal mask is restored.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(4, SIGCONT, "SIGCONT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_CONTINUE) + KAPI_SIGNAL_CONDITION("When process is stopped") + KAPI_SIGNAL_DESC("SIGCONT resumes a stopped process. If epoll_pwait was = interrupted " + "by SIGSTOP, it may return -EINTR when continued.") + KAPI_SIGNAL_END + + .signal_count =3D 5, + + /* Signal mask specifications */ + KAPI_SIGNAL_MASK(0, "user_sigmask", "User-provided signal mask atomically= applied") + .description =3D "The signal mask provided in the sigmask parameter is a= tomically " + "set for the duration of the wait operation. This prevents race " + "conditions between checking for events and blocking. The origin= al " + "signal mask is restored before epoll_pwait returns, unless the " + "return value is -EINTR (in which case the mask is restored by " + "the signal delivery mechanism)." + KAPI_SIGNAL_MASK_END + + .signal_mask_count =3D 1, + + /* Locking specifications */ + KAPI_LOCK(0, "ep->lock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects the ready list while checking for and consuming= events") + KAPI_LOCK_END + + KAPI_LOCK(1, "ep->mtx", KAPI_LOCK_MUTEX) + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects against concurrent epoll_ctl operations during = wait") + KAPI_LOCK_END + + .lock_count =3D 2, + + .examples =3D "sigset_t sigmask;\n" + "struct epoll_event events[10];\n\n" + "/* Block SIGINT during epoll_pwait */\n" + "sigemptyset(&sigmask);\n" + "sigaddset(&sigmask, SIGINT);\n\n" + "int nfds =3D epoll_pwait(epfd, events, 10, 1000, &sigmask, sizeof(s= igmask));\n" + "if (nfds =3D=3D -1) {\n" + " if (errno =3D=3D EINTR) {\n" + " /* Handle signal */\n" + " }\n" + " perror(\"epoll_pwait\");\n" + " exit(EXIT_FAILURE);\n" + "}", + .notes =3D "epoll_pwait() is equivalent to atomically executing:\n" + " sigset_t origmask;\n" + " pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);\n" + " ready =3D epoll_wait(epfd, events, maxevents, timeout);\n" + " pthread_sigmask(SIG_SETMASK, &origmask, NULL);\n" + "This atomicity prevents race conditions where a signal could be delive= red " + "after checking for events but before blocking in epoll_wait(). " + "The signal mask is always restored before epoll_pwait() returns.", +KAPI_END_SPEC; + SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, event= s, int, maxevents, int, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8E21A2E1733; Sat, 14 Jun 2025 13:49:07 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908947; cv=none; b=TTruaolGLmVxZeVeqQ41btYX46CfMSSosVJjSDSLlTUmVthsvzAcd0I5Hm9ckAHGt6Q8++qaSAN6sDJRKMxub1hqScT4WmIW54hxjek2hUfa3bpZ4SNNunSZypMHvxELlEilgVgKxDZZxvHHHumdq3PBSyzUPHOIkFSMne5mpTo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908947; c=relaxed/simple; bh=L9tgqs/QBzmOmiI24xd5dFd0SfY+ckEh0qbjk7cnIwY=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=SrsNpx3IkRnToz9H0Yi5wRornEMrEB6ODdobmV+PZEn+pdGWgBhEFOOHvrc73xdZHtcEW3klS3uk8j5L5TZSmSlOdfMvzzM4KA8ZdEqZwc31X6dg3NEeUcyHEcYN552SEOsGmwQDDc+dtxxkTBPIKYBQe9wPl8GYUIHplsV0uG0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=SXUQ1eBV; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="SXUQ1eBV" Received: by smtp.kernel.org (Postfix) with ESMTPSA id CABB9C4CEEB; Sat, 14 Jun 2025 13:49:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908947; bh=L9tgqs/QBzmOmiI24xd5dFd0SfY+ckEh0qbjk7cnIwY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SXUQ1eBVWz5hPnjhg4D6PDzc61CX3vk9IoYr9X/PkHXySYUU303GOEcO7GdgM6JmX JTusTbzNbSGgj7fZFrBJfYNbjWtEt0pgAhkG1J/6dNvuxIc0EP2iAY04d6pB7c/oM1 OD7uW9T9t/7z2u+e31htbmSMQ2R3RwywG1701272OcbfzG0qdlpcYmmKzslyMNzLzU AZkCnGPo8yG+gJ64GUUZW5vH3Baiz3fYH1RPPRxNEiWAg26ch8yzZ6QSpd1iNjBULA tdgL1vpdZdEazPY2pOohcUSEIOcZg6x5ZdqFvk9CNQNqGNookN/h/Yg8abaGE3eUl3 vx+iNFtIQv6Jw== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 07/19] eventpoll: add API specification for epoll_pwait2 Date: Sat, 14 Jun 2025 09:48:46 -0400 Message-Id: <20250614134858.790460-8-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the epoll_pwait2() system call. Signed-off-by: Sasha Levin --- fs/eventpoll.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8bd25f9230fc8..0e90d66467010 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -3389,6 +3389,250 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epol= l_event __user *, events, sigmask, sigsetsize); } =20 + +DEFINE_KERNEL_API_SPEC(sys_epoll_pwait2) + KAPI_DESCRIPTION("Wait for events on an epoll instance with nanosecond pr= ecision timeout") + KAPI_LONG_DESC("Similar to epoll_pwait(), but takes a timespec structure = that allows " + "nanosecond precision for the timeout value. This provides more a= ccurate " + "timeout control compared to the millisecond precision of epoll_p= wait(). " + "Like epoll_pwait(), it atomically sets a signal mask during the = wait.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "epfd", "int", "File descriptor referring to the epoll inst= ance") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_FD, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + KAPI_PARAM_END + + KAPI_PARAM(1, "events", "struct epoll_event __user *", "Buffer where read= y events will be stored") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + KAPI_PARAM_SIZE(sizeof(struct epoll_event)) + .size_param_idx =3D 2, /* Size determined by maxevents parameter */ + .size_multiplier =3D sizeof(struct epoll_event), + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Must point to an array of at least maxevents epoll_eve= nt structures", + KAPI_PARAM_END + + KAPI_PARAM(2, "maxevents", "int", "Maximum number of events to return") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + KAPI_PARAM_RANGE(1, INT_MAX / sizeof(struct epoll_event)) /* EP_MAX_EVE= NTS */ + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .constraints =3D "Must be greater than zero and not exceed system limits= ", + KAPI_PARAM_END + + KAPI_PARAM(3, "timeout", "const struct __kernel_timespec __user *", "Time= out with nanosecond precision") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER | KAPI_PARAM_OPTIONAL) + .type =3D KAPI_TYPE_USER_PTR, + KAPI_PARAM_SIZE(sizeof(struct __kernel_timespec)) + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "NULL means block indefinitely, {0, 0} returns immediat= ely, " + "negative values are invalid", + KAPI_PARAM_END + + KAPI_PARAM(4, "sigmask", "const sigset_t __user *", "Signal mask to atomi= cally set during wait") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER | KAPI_PARAM_OPTIONAL) + .type =3D KAPI_TYPE_USER_PTR, + KAPI_PARAM_SIZE(sizeof(sigset_t)) + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Can be NULL if no signal mask change is desired", + KAPI_PARAM_END + + KAPI_PARAM(5, "sigsetsize", "size_t", "Size of the signal set in bytes") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_RANGE(sizeof(sigset_t), sizeof(sigset_t)) + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .constraints =3D "Must be sizeof(sigset_t)", + KAPI_PARAM_END + + KAPI_RETURN("long", "Number of ready file descriptors on success, negativ= e error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_RANGE, + .success_min =3D 0, + .success_max =3D INT_MAX, + KAPI_RETURN_END + + KAPI_ERROR(0, -EBADF, "EBADF", "epfd is not a valid file descriptor", + "The epoll file descriptor is invalid or has been closed.") + KAPI_ERROR(1, -EFAULT, "EFAULT", "Memory area not accessible", + "The memory area pointed to by events, timeout, or sigmask is not acc= essible.") + KAPI_ERROR(2, -EINTR, "EINTR", "Call interrupted by signal handler", + "The call was interrupted by a signal handler before any events " + "became ready or the timeout expired.") + KAPI_ERROR(3, -EINVAL, "EINVAL", "Invalid parameters", + "epfd is not an epoll file descriptor, maxevents is less than or equa= l to zero, " + "sigsetsize is not equal to sizeof(sigset_t), or timeout values are i= nvalid.") + + .error_count =3D 4, + .param_count =3D 6, + .since_version =3D "5.11", + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE, + "signal mask", + "Atomically sets the signal mask for the calling thread") + KAPI_EFFECT_CONDITION("When sigmask is not NULL") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "ready list", + "Removes events from the epoll ready list as they are reported") + KAPI_EFFECT_CONDITION("When events are available and level-triggered") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_SCHEDULE, + "process state", + "Blocks the calling thread until events, timeout, or signal") + KAPI_EFFECT_CONDITION("When timeout !=3D NULL or timeout->tv_sec/tv_nsec= !=3D 0") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE, + "user memory", + "Writes event data to user-provided buffer") + KAPI_EFFECT_CONDITION("When events are available") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_PROCESS_STATE, + "saved signal mask", + "Saves and restores the original signal mask") + KAPI_EFFECT_CONDITION("When sigmask is not NULL") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(5, KAPI_EFFECT_MODIFY_STATE, + "timer precision", + "Timeout may be rounded up to system timer granularity") + KAPI_EFFECT_CONDITION("When timeout is specified") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(6) + + /* State transitions */ + KAPI_STATE_TRANS(0, "signal mask", "original mask", "user-specified mask", + "Thread's signal mask is atomically changed to the provided mask") + KAPI_STATE_TRANS_COND("When sigmask is not NULL") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "process", "running", "blocked", + "Process blocks waiting for events with specified signal mask") + KAPI_STATE_TRANS_COND("When no events available and not immediate return= ") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "process", "blocked", "running", + "Process wakes up due to events, timeout expiry, or unblocked signal") + KAPI_STATE_TRANS_COND("When wait condition is satisfied") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "signal mask", "user-specified mask", "original mask", + "Thread's signal mask is restored to its original value") + KAPI_STATE_TRANS_COND("When returning from epoll_pwait2") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(4, "pending signals", "blocked", "deliverable", + "Signals that were blocked by the temporary mask become deliverable") + KAPI_STATE_TRANS_COND("When signal mask is restored and signals were pen= ding") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(5, "timeout timer", "not started", "armed with nanosecon= d precision", + "High resolution timer is armed with the specified timeout") + KAPI_STATE_TRANS_COND("When timeout is specified and > 0") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(6) + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "ANY_UNBLOCKED", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Signal not blocked by provided sigmask") + KAPI_SIGNAL_DESC("Any signal not blocked by the sigmask parameter will i= nterrupt " + "epoll_pwait2() and cause it to return -EINTR. Signal handling is " + "identical to epoll_pwait().") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGKILL, "SIGKILL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("Cannot be blocked by sigmask") + KAPI_SIGNAL_DESC("SIGKILL cannot be blocked and will terminate the proce= ss immediately.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(2, SIGSTOP, "SIGSTOP", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_STOP) + KAPI_SIGNAL_CONDITION("Cannot be blocked by sigmask") + KAPI_SIGNAL_DESC("SIGSTOP cannot be blocked and will stop the process.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(3, 0, "BLOCKED_SIGNALS", KAPI_SIGNAL_BLOCK, KAPI_SIGNAL_ACTIO= N_DEFAULT) + KAPI_SIGNAL_CONDITION("Signals in provided sigmask") + KAPI_SIGNAL_DESC("Signals specified in the sigmask parameter are blocked= during " + "the epoll_pwait2 call.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(4, SIGCONT, "SIGCONT", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_CONTINUE) + KAPI_SIGNAL_CONDITION("When process is stopped") + KAPI_SIGNAL_DESC("SIGCONT resumes a stopped process. If epoll_pwait2 was= interrupted " + "by SIGSTOP, it may return -EINTR when continued.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(5, SIGALRM, "SIGALRM", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Timer expiration") + KAPI_SIGNAL_DESC("SIGALRM or other timer signals will interrupt epoll_pw= ait2 with -EINTR " + "if not blocked by sigmask") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + .signal_count =3D 6, + + /* Signal mask specifications */ + KAPI_SIGNAL_MASK(0, "user_sigmask", "User-provided signal mask atomically= applied") + .description =3D "The signal mask is atomically set and restored exactly= as in " + "epoll_pwait(), providing the same race-condition prevention." + KAPI_SIGNAL_MASK_END + + .signal_mask_count =3D 1, + + /* Locking specifications */ + KAPI_LOCK(0, "ep->lock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects the ready list while checking for and consuming= events") + KAPI_LOCK_END + + KAPI_LOCK(1, "ep->mtx", KAPI_LOCK_MUTEX) + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects against concurrent epoll_ctl operations during = wait") + KAPI_LOCK_END + + .lock_count =3D 2, + + .examples =3D "sigset_t sigmask;\n" + "struct epoll_event events[10];\n" + "struct __kernel_timespec ts;\n\n" + "/* Block SIGINT during epoll_pwait2 */\n" + "sigemptyset(&sigmask);\n" + "sigaddset(&sigmask, SIGINT);\n\n" + "/* Wait for 1.5 seconds */\n" + "ts.tv_sec =3D 1;\n" + "ts.tv_nsec =3D 500000000; /* 500 milliseconds */\n\n" + "int nfds =3D epoll_pwait2(epfd, events, 10, &ts, &sigmask, sizeof(s= igmask));\n" + "if (nfds =3D=3D -1) {\n" + " if (errno =3D=3D EINTR) {\n" + " /* Handle signal */\n" + " }\n" + " perror(\"epoll_pwait2\");\n" + " exit(EXIT_FAILURE);\n" + "}\n\n" + "/* Example with infinite timeout */\n" + "nfds =3D epoll_pwait2(epfd, events, 10, NULL, &sigmask, sizeof(sigm= ask));", + .notes =3D "epoll_pwait2() provides nanosecond precision timeouts, addres= sing the limitation " + "of epoll_pwait() which only supports millisecond precision. The timeou= t parameter " + "uses struct __kernel_timespec which is compatible with 64-bit time val= ues, making " + "it Y2038-safe. Like epoll_pwait(), the signal mask operation is atomic= . " + "The timeout is still subject to system timer granularity and may be ro= unded up.", +KAPI_END_SPEC; + SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, even= ts, int, maxevents, const struct __kernel_timespec __user *, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7EB8B2E62C5; Sat, 14 Jun 2025 13:49:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908948; cv=none; b=i34vD3AmSoAvRxrN7AIq8F5h6197yGO/ELZET9iLFRFE291XdEjCb8F6OZpLpfpDjXFwri8DccU1LN8IivSV8dGxfSsmuHkVPRUUpanfL5BVQRDEC/WSqUn1i3rvu2K8wpto64/aCX5yo+A3YwPgV0VQDUfsTU9QvrcZxbA8D8g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908948; c=relaxed/simple; bh=Su5C0ZLdpHBzNaB3tC3uk3Gzq0bkfxoEFcXkenceNw8=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=ZrWaZCGYBqa7caFymR21Clfh3YqeAJZxlSVOnHW2NYJJu+uzs+2QtRW7d3W+gzMjeQyw6aWO1dRh7N9S5Ab/NhzoCjuzsjrCPJ7/ndcWxHjp7hn8/C0RjNmf2KBG8u47vl6zJvbZjIHxInK3kEYo9zuuXSOFzc0PnBypc2gnRuM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=IiEIIBis; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="IiEIIBis" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B67FBC4CEF4; Sat, 14 Jun 2025 13:49:07 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908948; bh=Su5C0ZLdpHBzNaB3tC3uk3Gzq0bkfxoEFcXkenceNw8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=IiEIIBisIpSWSPSkeX6Z5FrxTiWx86+35Tzr//92ZyjWERwcAoSA6DNtRbELGWLWq PPVVL6FUVM1MxwiKEMNInS49k2eyI6O1QNWe5Q11+237RpBpuqTeJ8FkJNfrkCc0tD lV2/g6MCk7x92nz7sMVdP+1N0vn7qxnrtt1U5oK2ckcPxuFlPbq3Mzg8yrhJk7yx8I lqDU37FC3mI1+1/lNo6PimtiPq0hURMLba+EfdNDzQxLhQgTMBZc6gC7uwq9zXLVIy irYDeCYFKCI/TynQU8NQLmzlGyU2KJ5dwZAcGYGVM3GSEYEo+aZgqY9g775Hhfa/dc RR615vuJW6mtA== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 08/19] exec: add API specification for execve Date: Sat, 14 Jun 2025 09:48:47 -0400 Message-Id: <20250614134858.790460-9-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add comprehensive kernel API specification for the execve() system call. Signed-off-by: Sasha Levin --- fs/exec.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) diff --git a/fs/exec.c b/fs/exec.c index 1f5fdd2e096e3..3d006105ab23d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -1997,7 +1998,224 @@ void set_dumpable(struct mm_struct *mm, int value) set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value); } =20 + +DEFINE_KERNEL_API_SPEC(sys_execve) + KAPI_DESCRIPTION("Execute a new program") + KAPI_LONG_DESC("Executes the program referred to by filename. This causes= the program " + "that is currently being run by the calling process to be replace= d with " + "a new program, with newly initialized stack, heap, and (initiali= zed and " + "uninitialized) data segments. The process ID remains the same.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "filename", "const char __user *", "Pathname of the program= to execute") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_PATH, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Must be a valid pathname to an executable file or scri= pt", + KAPI_PARAM_END + + KAPI_PARAM(1, "argv", "const char __user *const __user *", "Array of argu= ment strings passed to the new program") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "NULL-terminated array of pointers to null-terminated s= trings", + KAPI_PARAM_END + + KAPI_PARAM(2, "envp", "const char __user *const __user *", "Array of envi= ronment strings for the new program") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "NULL-terminated array of pointers to null-terminated s= trings in form key=3Dvalue", + KAPI_PARAM_END + + KAPI_RETURN("long", "Does not return on success; returns -1 on error") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + KAPI_RETURN_END + + KAPI_ERROR(0, -E2BIG, "E2BIG", "Argument list too long", + "The total size of argv and envp exceeds the system limit.") + KAPI_ERROR(1, -EACCES, "EACCES", "Permission denied", + "Search permission denied on a component of the path, file is not reg= ular, " + "or execute permission denied for file or interpreter.") + KAPI_ERROR(2, -EFAULT, "EFAULT", "Bad address", + "filename, argv, or envp points outside accessible address space.") + KAPI_ERROR(3, -EINVAL, "EINVAL", "Invalid executable format", + "An ELF executable has more than one PT_INTERP segment.") + KAPI_ERROR(4, -EIO, "EIO", "I/O error", + "An I/O error occurred while reading from the file system.") + KAPI_ERROR(5, -EISDIR, "EISDIR", "Is a directory", + "An ELF interpreter was a directory.") + KAPI_ERROR(6, -ELIBBAD, "ELIBBAD", "Invalid ELF interpreter", + "An ELF interpreter was not in a recognized format.") + KAPI_ERROR(7, -ELOOP, "ELOOP", "Too many symbolic links", + "Too many symbolic links encountered while resolving filename or inte= rpreter.") + KAPI_ERROR(8, -EMFILE, "EMFILE", "Too many open files", + "The per-process limit on open file descriptors has been reached.") + KAPI_ERROR(9, -ENAMETOOLONG, "ENAMETOOLONG", "Filename too long", + "filename or one of the strings in argv or envp is too long.") + KAPI_ERROR(10, -ENFILE, "ENFILE", "System file table overflow", + "The system-wide limit on open files has been reached.") + KAPI_ERROR(11, -ENOENT, "ENOENT", "File not found", + "The file filename or an interpreter does not exist.") + KAPI_ERROR(12, -ENOEXEC, "ENOEXEC", "Exec format error", + "An executable is not in a recognized format, is for wrong architectu= re, " + "or has other format errors preventing execution.") + KAPI_ERROR(13, -ENOMEM, "ENOMEM", "Out of memory", + "Insufficient kernel memory available.") + KAPI_ERROR(14, -ENOTDIR, "ENOTDIR", "Not a directory", + "A component of the path prefix is not a directory.") + KAPI_ERROR(15, -EPERM, "EPERM", "Operation not permitted", + "The filesystem is mounted nosuid, the user is not root, and the file= has " + "set-user-ID or set-group-ID bit set.") + KAPI_ERROR(16, -ETXTBSY, "ETXTBSY", "Text file busy", + "The executable was open for writing by one or more processes.") + KAPI_ERROR(17, -EAGAIN, "EAGAIN", "Resource temporarily unavailable", + "RLIMIT_NPROC limit exceeded - too many processes for this user.") + + .error_count =3D 18, + .param_count =3D 3, + .since_version =3D "1.0", + .examples =3D "char *argv[] =3D { \"echo\", \"hello\", \"world\", NULL };= \n" + "char *envp[] =3D { \"PATH=3D/bin\", NULL };\n" + "execve(\"/bin/echo\", argv, envp);\n" + "/* This point is only reached on error */\n" + "perror(\"execve failed\");\n" + "exit(EXIT_FAILURE);", + .notes =3D "On success, execve() does not return; the new program is exec= uted. " + "File descriptors remain open unless marked close-on-exec. " + "Signal dispositions are reset to default except for ignored signals. " + "Any alternate signal stack is not preserved. " + "The process's set of pending signals is cleared. " + "All threads except the calling thread are destroyed.", + + /* Fatal signals can interrupt exec */ + KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("Fatal signal pending during exec setup") + KAPI_SIGNAL_DESC("Fatal signals (checked via fatal_signal_pending()) can= interrupt " + "exec during setup phases like de_thread(). This causes exec to fail " + "and the process to exit.") + KAPI_SIGNAL_END + + /* SIGKILL sent to other threads */ + KAPI_SIGNAL(1, SIGKILL, "SIGKILL", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_T= ERMINATE) + KAPI_SIGNAL_TARGET("All other threads in the thread group") + KAPI_SIGNAL_CONDITION("Multi-threaded process doing exec") + KAPI_SIGNAL_DESC("During de_thread(), zap_other_threads() sends SIGKILL = to all " + "other threads in the thread group to ensure only the execing " + "thread survives.") + KAPI_SIGNAL_END + + /* Signal handlers reset */ + KAPI_SIGNAL(2, 0, "ALL_HANDLERS", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTION_= CUSTOM) + KAPI_SIGNAL_CONDITION("Signal has a handler installed") + KAPI_SIGNAL_DESC("flush_signal_handlers() resets all signal handlers to = SIG_DFL " + "except for signals that are ignored (SIG_IGN). This happens " + "after de_thread() completes.") + KAPI_SIGNAL_END + + /* Ignored signals preserved */ + KAPI_SIGNAL(3, 0, "IGNORED_SIGNALS", KAPI_SIGNAL_IGNORE, KAPI_SIGNAL_ACTI= ON_CUSTOM) + KAPI_SIGNAL_CONDITION("Signal disposition is SIG_IGN") + KAPI_SIGNAL_DESC("Signals set to SIG_IGN are preserved across exec. This= is " + "POSIX-compliant behavior allowing parent processes to ignore " + "signals in children.") + KAPI_SIGNAL_END + + /* Pending signals cleared */ + KAPI_SIGNAL(4, 0, "PENDING_SIGNALS", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTI= ON_CUSTOM) + KAPI_SIGNAL_CONDITION("Any pending signals") + KAPI_SIGNAL_DESC("All pending signals are cleared during exec. This incl= udes " + "both thread-specific and process-wide pending signals.") + KAPI_SIGNAL_END + + /* Timer signals cleared */ + KAPI_SIGNAL(5, 0, "TIMER_SIGNALS", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTION= _CUSTOM) + KAPI_SIGNAL_CONDITION("Timer-generated signals pending") + KAPI_SIGNAL_DESC("flush_itimer_signals() clears any pending timer signal= s " + "(SIGALRM, SIGVTALRM, SIGPROF) to prevent confusion in the " + "new program.") + KAPI_SIGNAL_END + + /* Exit signal set to SIGCHLD */ + KAPI_SIGNAL(6, SIGCHLD, "SIGCHLD", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_D= EFAULT) + KAPI_SIGNAL_TARGET("Parent process when this process exits") + KAPI_SIGNAL_CONDITION("Process exit after exec") + KAPI_SIGNAL_DESC("The exit_signal is set to SIGCHLD during exec, ensurin= g the " + "parent will receive SIGCHLD when this process terminates.") + KAPI_SIGNAL_END + + /* Alternate signal stack cleared */ + KAPI_SIGNAL(7, 0, "SIGALTSTACK", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTION_C= USTOM) + KAPI_SIGNAL_CONDITION("Process had alternate signal stack") + KAPI_SIGNAL_DESC("Any alternate signal stack (sigaltstack) is not preser= ved " + "across exec. The new program starts with no alternate stack.") + KAPI_SIGNAL_END + + .signal_count =3D 8, + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_PROCESS_STATE | KAPI_EFFECT_FREE_MEMORY |= KAPI_EFFECT_ALLOC_MEMORY, + "process image", + "Replaces entire process image including code, data, heap, and stack") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_RESOURCE_DESTR= OY, + "file descriptors", + "Closes all file descriptors with close-on-exec flag set") + KAPI_EFFECT_CONDITION("FD_CLOEXEC flag set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "signal handlers", + "Resets all signal handlers to default, preserves ignored signals") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_PROCESS_STATE | KAPI_EFFECT_SIGNAL_SEND, + "thread group", + "Kills all other threads in the thread group with SIGKILL") + KAPI_EFFECT_CONDITION("Multi-threaded process") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE, + "process attributes", + "Clears pending signals, timers, alternate signal stack, and various p= rocess attributes") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(5, KAPI_EFFECT_FILESYSTEM, + "executable file", + "Opens and reads the executable file, may trigger filesystem operation= s") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(6) + + /* State transitions */ + KAPI_STATE_TRANS(0, "process memory", + "old program image", "new program image", + "Complete replacement of process address space with new program") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "process credentials", + "current credentials", "potentially modified credentials", + "May change effective UID/GID based on file permissions") + KAPI_STATE_TRANS_COND("setuid/setgid binary") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "thread state", + "multi-threaded", "single-threaded", + "Process becomes single-threaded after killing other threads") + KAPI_STATE_TRANS_COND("Multi-threaded process") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "signal state", + "custom handlers and pending signals", "default handlers, no pending s= ignals", + "Signal handling reset to clean state for new program") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(4) +KAPI_END_SPEC; SYSCALL_DEFINE3(execve, + const char __user *, filename, const char __user *const __user *, argv, const char __user *const __user *, envp) --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C28952E6D2F; Sat, 14 Jun 2025 13:49:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908949; cv=none; b=inWSN7tqEuWIT07oBrkku9Y4EEtEI4EcZTZAq4m+6bql+DaVsQiEs2mtYiLn0GgM5AK6cQJWxvA7UtIwWBvABkJdT7UCPpSC9wi+6ORIwkottBfwrKcBj32kWvzA9Vz4auM1hCCHcflv4jcUAXuflG1EKwNprGyH8NL8rGiawdg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908949; c=relaxed/simple; bh=HR3jSxpUUdGXQ0QKpM97JMgGl4QSqTNmyU2iIOv1VtQ=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=q5GH5umDD3iVXnsfAtbl2vawWla7lUWCO7iWuERGc4fgUABFIsn8Y69E3RpnqgGuP8RfApyj+AREWx8W3SyuA3sE/M5mmzc7Bivxg/feYrGC11jn1nOwI8Ws6bnme58klNKEihcg0j4c3/3PY7yemY6dvHGzWaJb4RF+ENnx0U0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=FukOVZmc; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="FukOVZmc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A5050C4CEEE; Sat, 14 Jun 2025 13:49:08 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908949; bh=HR3jSxpUUdGXQ0QKpM97JMgGl4QSqTNmyU2iIOv1VtQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FukOVZmcFAf0VQiVgCCp3/r9fELg0NqZSl5ITpxOcef9+t5SgAw2D4aMc62zpknis /MbKLDiH6qyc+ATgMSPoA6H4RYz1pUgxTHAZ2Mcv1wxwGYM6z12NCS3PYUfhLQ9sZ5 JjCV1aZNDO0W+YIsdGkmjM8+uwM8Vuk0ZZsLmbL+C3PMTtyuECInDoAL8SVbyMJxUC hbTwDrWJQxeAfp3isnrC6FVngWRM2uOMwIBP++f5kVYVIYKhZfproNjPBInVwbgsfi w6HEzXaZob4/jknyC+/2DmRcoipwwsZZ6ASQS5xu4GyQNhdSIoHF3XOjvRGrOs2g4L wmZy/kFgA31cg== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 09/19] exec: add API specification for execveat Date: Sat, 14 Jun 2025 09:48:48 -0400 Message-Id: <20250614134858.790460-10-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add comprehensive kernel API specification for the execveat() system call. Signed-off-by: Sasha Levin --- fs/exec.c | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) diff --git a/fs/exec.c b/fs/exec.c index 3d006105ab23d..49d8647c053ef 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2223,6 +2223,251 @@ SYSCALL_DEFINE3(execve, return do_execve(getname(filename), argv, envp); } =20 + +/* Valid flag combinations for execveat */ +static const s64 execveat_valid_flags[] =3D { + 0, + AT_EMPTY_PATH, + AT_SYMLINK_NOFOLLOW, + AT_EXECVE_CHECK, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, + AT_EMPTY_PATH | AT_EXECVE_CHECK, + AT_SYMLINK_NOFOLLOW | AT_EXECVE_CHECK, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW | AT_EXECVE_CHECK, +}; + +DEFINE_KERNEL_API_SPEC(sys_execveat) + KAPI_DESCRIPTION("Execute a new program relative to a directory file desc= riptor") + KAPI_LONG_DESC("Executes the program referred to by the combination of fd= and filename. " + "This system call is useful when implementing a secure execution = environment " + "or when the calling process has an open file descriptor but no a= ccess to " + "the corresponding pathname. Like execve(), it replaces the curre= nt process " + "image with a new process image.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "fd", "int", "Directory file descriptor") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_FD, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "AT_FDCWD for current directory, or valid directory fil= e descriptor", + KAPI_PARAM_END + + KAPI_PARAM(1, "filename", "const char __user *", "Pathname of the program= to execute") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER | KAPI_PARAM_OPTIONAL) + .type =3D KAPI_TYPE_PATH, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Relative or absolute path; empty string with AT_EMPTY_= PATH to use fd directly", + KAPI_PARAM_END + + KAPI_PARAM(2, "argv", "const char __user *const __user *", "Array of argu= ment strings passed to the new program") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "NULL-terminated array of pointers to null-terminated s= trings", + KAPI_PARAM_END + + KAPI_PARAM(3, "envp", "const char __user *const __user *", "Array of envi= ronment strings for the new program") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "NULL-terminated array of pointers to null-terminated s= trings in form key=3Dvalue", + KAPI_PARAM_END + + KAPI_PARAM(4, "flags", "int", "Execution flags") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_MASK, + .valid_mask =3D AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW | AT_EXECVE_CHECK, + .constraints =3D "0 or combination of AT_EMPTY_PATH, AT_SYMLINK_NOFOLLOW= , and AT_EXECVE_CHECK", + KAPI_PARAM_END + + /* Return specification */ + KAPI_RETURN("long", "Does not return on success (except with AT_EXECVE_CH= ECK which returns 0); returns -1 on error") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + KAPI_RETURN_END + + /* Error codes */ + KAPI_ERROR(0, -E2BIG, "E2BIG", "Argument list too long", "The total size = of argv and envp exceeds the system limit.") + KAPI_ERROR(1, -EACCES, "EACCES", "Permission denied", "Search permission = denied on a component of the path, file is not regular, or execute permissi= on denied for file or interpreter.") + KAPI_ERROR(2, -EBADF, "EBADF", "Bad file descriptor", "fd is not a valid = file descriptor.") + KAPI_ERROR(3, -EFAULT, "EFAULT", "Bad address", "filename, argv, or envp = points outside accessible address space.") + KAPI_ERROR(4, -EINVAL, "EINVAL", "Invalid flags or executable format", "I= nvalid flags specified, or ELF executable has more than one PT_INTERP segme= nt.") + KAPI_ERROR(5, -EIO, "EIO", "I/O error", "An I/O error occurred while read= ing from the file system.") + KAPI_ERROR(6, -EISDIR, "EISDIR", "Is a directory", "An ELF interpreter wa= s a directory.") + KAPI_ERROR(7, -ELIBBAD, "ELIBBAD", "Invalid ELF interpreter", "An ELF int= erpreter was not in a recognized format.") + KAPI_ERROR(8, -ELOOP, "ELOOP", "Too many symbolic links", "Too many symbo= lic links encountered, or AT_SYMLINK_NOFOLLOW was specified but filename re= fers to a symbolic link.") + KAPI_ERROR(9, -EMFILE, "EMFILE", "Too many open files", "The per-process = limit on open file descriptors has been reached.") + KAPI_ERROR(10, -ENAMETOOLONG, "ENAMETOOLONG", "Filename too long", "filen= ame or one of the strings in argv or envp is too long.") + KAPI_ERROR(11, -ENFILE, "ENFILE", "System file table overflow", "The syst= em-wide limit on open files has been reached.") + KAPI_ERROR(12, -ENOENT, "ENOENT", "File not found", "The file filename or= an interpreter does not exist, or filename is empty and AT_EMPTY_PATH was = not specified in flags.") + KAPI_ERROR(13, -ENOEXEC, "ENOEXEC", "Exec format error", "An executable i= s not in a recognized format, is for wrong architecture, or has other forma= t errors preventing execution.") + KAPI_ERROR(14, -ENOMEM, "ENOMEM", "Out of memory", "Insufficient kernel m= emory available.") + KAPI_ERROR(15, -ENOTDIR, "ENOTDIR", "Not a directory", "A component of th= e path prefix is not a directory, or fd is not a directory when a relative = path is given.") + KAPI_ERROR(16, -EPERM, "EPERM", "Operation not permitted", "The filesyste= m is mounted nosuid, the user is not root, and the file has set-user-ID or = set-group-ID bit set.") + KAPI_ERROR(17, -ETXTBSY, "ETXTBSY", "Text file busy", "The executable was= open for writing by one or more processes.") + KAPI_ERROR(18, -EAGAIN, "EAGAIN", "Resource temporarily unavailable", "RL= IMIT_NPROC limit exceeded - too many processes for this user.") + KAPI_ERROR(19, -EINTR, "EINTR", "Interrupted by signal", "The exec was in= terrupted by a signal during setup phase.") + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_TERMINATE) + KAPI_SIGNAL_CONDITION("Fatal signal pending during exec setup") + KAPI_SIGNAL_DESC("Fatal signals (checked via fatal_signal_pending()) can= interrupt exec during setup phases like de_thread(). This causes exec to f= ail and the process to exit.") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGKILL, "SIGKILL", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_T= ERMINATE) + KAPI_SIGNAL_TARGET("All other threads in the thread group") + KAPI_SIGNAL_CONDITION("Multi-threaded process doing exec") + KAPI_SIGNAL_DESC("During de_thread(), zap_other_threads() sends SIGKILL = to all other threads in the thread group to ensure only the execing thread = survives.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(2, 0, "ALL_HANDLERS", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTION_= CUSTOM) + KAPI_SIGNAL_CONDITION("Signal has a handler installed") + KAPI_SIGNAL_DESC("flush_signal_handlers() resets all signal handlers to = SIG_DFL except for signals that are ignored (SIG_IGN). This happens after d= e_thread() completes.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(3, 0, "IGNORED_SIGNALS", KAPI_SIGNAL_IGNORE, KAPI_SIGNAL_ACTI= ON_CUSTOM) + KAPI_SIGNAL_CONDITION("Signal disposition is SIG_IGN") + KAPI_SIGNAL_DESC("Signals set to SIG_IGN are preserved across exec. This= is POSIX-compliant behavior allowing parent processes to ignore signals in= children.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(4, 0, "PENDING_SIGNALS", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTI= ON_CUSTOM) + KAPI_SIGNAL_CONDITION("Any pending signals") + KAPI_SIGNAL_DESC("All pending signals are cleared during exec. This incl= udes both thread-specific and process-wide pending signals.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(5, 0, "TIMER_SIGNALS", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTION= _CUSTOM) + KAPI_SIGNAL_CONDITION("Timer-generated signals pending") + KAPI_SIGNAL_DESC("flush_itimer_signals() clears any pending timer signal= s (SIGALRM, SIGVTALRM, SIGPROF) to prevent confusion in the new program.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(6, SIGCHLD, "SIGCHLD", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_D= EFAULT) + KAPI_SIGNAL_TARGET("Parent process when this process exits") + KAPI_SIGNAL_CONDITION("Process exit after exec") + KAPI_SIGNAL_DESC("The exit_signal is set to SIGCHLD during exec, ensurin= g the parent will receive SIGCHLD when this process terminates.") + KAPI_SIGNAL_END + + KAPI_SIGNAL(7, 0, "SIGALTSTACK", KAPI_SIGNAL_HANDLE, KAPI_SIGNAL_ACTION_C= USTOM) + KAPI_SIGNAL_CONDITION("Process had alternate signal stack") + KAPI_SIGNAL_DESC("Any alternate signal stack (sigaltstack) is not preser= ved across exec. The new program starts with no alternate stack.") + KAPI_SIGNAL_END + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_PROCESS_STATE | KAPI_EFFECT_FREE_MEMORY |= KAPI_EFFECT_ALLOC_MEMORY, + "process image", + "Replaces entire process image including code, data, heap, and stack") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_RESOURCE_DESTR= OY, + "file descriptors", + "Closes all file descriptors with close-on-exec flag set") + KAPI_EFFECT_CONDITION("FD_CLOEXEC flag set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "signal handlers", + "Resets all signal handlers to default, preserves ignored signals") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_PROCESS_STATE | KAPI_EFFECT_SIGNAL_SEND, + "thread group", + "Kills all other threads in the thread group with SIGKILL") + KAPI_EFFECT_CONDITION("Multi-threaded process") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE, + "process attributes", + "Clears pending signals, timers, alternate signal stack, and various p= rocess attributes") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(5, KAPI_EFFECT_FILESYSTEM, + "executable file", + "Opens and reads the executable file, may trigger filesystem operation= s") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(6, KAPI_EFFECT_MODIFY_STATE, + "security context", + "May change SELinux/AppArmor context based on file labels and transiti= ons") + KAPI_EFFECT_CONDITION("LSM enabled") + KAPI_SIDE_EFFECT_END + + /* State transitions */ + KAPI_STATE_TRANS(0, "process memory", + "old program image", "new program image", + "Complete replacement of process address space with new program") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "process credentials", + "current credentials", "potentially modified credentials", + "May change effective UID/GID based on file permissions") + KAPI_STATE_TRANS_COND("setuid/setgid binary") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "thread state", + "multi-threaded", "single-threaded", + "Process becomes single-threaded after killing other threads") + KAPI_STATE_TRANS_COND("Multi-threaded process") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "signal state", + "custom handlers and pending signals", "default handlers, no pending s= ignals", + "Signal handling reset to clean state for new program") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(4, "file descriptor table", + "contains close-on-exec FDs", "close-on-exec FDs closed", + "All file descriptors marked FD_CLOEXEC are closed during exec") + KAPI_STATE_TRANS_COND("FDs with FD_CLOEXEC") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(5, "working directory", + "fd-relative operations", "resolved to absolute paths", + "Directory fd operations resolved before exec completes") + KAPI_STATE_TRANS_COND("Using dirfd !=3D AT_FDCWD") + KAPI_STATE_TRANS_END + + /* Locking information */ + KAPI_LOCK(0, "cred_guard_mutex", KAPI_LOCK_MUTEX) + KAPI_LOCK_DESC("Protects against concurrent credential changes during ex= ec") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_DESC("Ensures atomic credential transition during exec process= ") + KAPI_LOCK_END + + KAPI_LOCK(1, "sighand->siglock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_DESC("Protects signal handler modifications") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Taken during signal handler reset and pending signal cle= aring") + KAPI_LOCK_END + + KAPI_SIDE_EFFECT_COUNT(7) + KAPI_STATE_TRANS_COUNT(6) + + .error_count =3D 20, + .param_count =3D 5, + .since_version =3D "3.19", + .examples =3D "/* Execute /bin/echo using AT_FDCWD */\n" + "char *argv[] =3D { \"echo\", \"hello\", NULL };\n" + "char *envp[] =3D { \"PATH=3D/bin\", NULL };\n" + "execveat(AT_FDCWD, \"/bin/echo\", argv, envp, 0);\n\n" + "/* Execute via file descriptor */\n" + "int fd =3D open(\"/bin/echo\", O_PATH);\n" + "execveat(fd, \"\", argv, envp, AT_EMPTY_PATH);\n\n" + "/* Execute relative to directory fd */\n" + "int dirfd =3D open(\"/bin\", O_RDONLY | O_DIRECTORY);\n" + "execveat(dirfd, \"echo\", argv, envp, 0);", + .notes =3D "execveat() was added to allow fexecve() to be implemented on = systems that " + "do not have /proc mounted. When filename is an empty string and AT_EMP= TY_PATH " + "is specified, the file descriptor fd specifies the file to be executed= . " + "AT_SYMLINK_NOFOLLOW prevents following symbolic links. " + "AT_EXECVE_CHECK (since Linux 6.12) only checks if execution would be a= llowed " + "without actually executing. Like execve(), on success execveat() does = not return " + "(except with AT_EXECVE_CHECK which returns 0).", + .signal_count =3D 8, + .lock_count =3D 2, +KAPI_END_SPEC; + SYSCALL_DEFINE5(execveat, int, fd, const char __user *, filename, const char __user *const __user *, argv, --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 877162E7627; Sat, 14 Jun 2025 13:49:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908950; cv=none; b=Ox56176m/eK3nb4IUwJnL6ktrwfrM9GM2gf1ysXX6naUWc1q2nWMlfvMJfA7yihvWGke9/BlyvbIIe9kaBsZJce9o7fAC4u0psZAMVyEKq5T9p60b+zYL7psdsj4oQ7R7p6ZhyuwjDBL/cDlAnOQHhuiOQjiKrPD2IAKd5hNrsU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908950; c=relaxed/simple; bh=IX6FdEJxog+IYJ7fUmLO8O0E0nHfZo6I9ZqvL4mp2Ps=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=EU6Al7Eq9VwsZmecEXf0q3yj190D+vUP7slJZZzb/z8K2sjIf/Q00h1wLZn7Lg8Wjr/QlHsjmltMK1EFyDoc4fm9wBR9DzmQqnwcHKknnEZqpqTHHJjLEgQp7TYIItfJKde51yEjw7t7NjHyEoET+LMz2eIbejirNS1oYClHonE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=BX5Fmk0q; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="BX5Fmk0q" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 912E9C4CEEB; Sat, 14 Jun 2025 13:49:09 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908950; bh=IX6FdEJxog+IYJ7fUmLO8O0E0nHfZo6I9ZqvL4mp2Ps=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=BX5Fmk0qQ4LF++jbcrWQctzE3oCtNiAB4xol8dHhRUVO4BZtsaXsFVMBt54dy0H6g 16oQjJ/+uaacYKNNz8AoyZK5XWfwgGB0Y4advlCh5bYBEP1oU6O8tofNtojP2tP4kL f5Qaa3Je1GIt5XonmsuwIshmZX+s0IJoL8LdLBBUzdYp6EEq6I4Opv7h1fGe9h1b+G diCcQRNhenagkgfl7iCp+2o2CAHrbdBivq/3iOuoOXyMbgW23arHg8jATgbW8mlB6s rwS1cNbKFTTHd6joUddfoq+oecyuz8QxfoKgIL976a7HI05/7LuftVi05fxCcpCxDD DO92pPrktXfMg== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 10/19] mm/mlock: add API specification for mlock Date: Sat, 14 Jun 2025 09:48:49 -0400 Message-Id: <20250614134858.790460-11-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the mlock() system call. Signed-off-by: Sasha Levin --- mm/mlock.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/mm/mlock.c b/mm/mlock.c index 3cb72b579ffd3..a37102df54b01 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -25,6 +25,7 @@ #include #include #include +#include =20 #include "internal.h" =20 @@ -658,6 +659,110 @@ static __must_check int do_mlock(unsigned long start,= size_t len, vm_flags_t fla return 0; } =20 + +DEFINE_KERNEL_API_SPEC(sys_mlock) + KAPI_DESCRIPTION("Lock pages in memory") + KAPI_LONG_DESC("Locks pages in the specified address range into RAM, " + "preventing them from being paged to swap. Requires " + "CAP_IPC_LOCK capability or RLIMIT_MEMLOCK resource limit.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "start", "unsigned long", "Starting address of memory range= to lock") + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Rounded down to page boundary", + KAPI_PARAM_END + KAPI_PARAM(1, "len", "size_t", "Length of memory range to lock in bytes") + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + KAPI_PARAM_RANGE(0, LONG_MAX) + .constraints =3D "Rounded up to page boundary", + KAPI_PARAM_END + + .return_spec =3D { + .type_name =3D "long", + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .success_value =3D 0, + .description =3D "0 on success, negative error code on failure", + }, + + KAPI_ERROR(0, -ENOMEM, "ENOMEM", "Address range issue", + "Some of the specified range is not mapped, has unmapped gaps, " + "or the lock would cause the number of mapped regions to exceed the l= imit.") + KAPI_ERROR(1, -EPERM, "EPERM", "Insufficient privileges", + "The caller is not privileged (no CAP_IPC_LOCK) and RLIMIT_MEMLOCK is= 0.") + KAPI_ERROR(2, -EINVAL, "EINVAL", "Address overflow", + "The result of the addition start+len was less than start (arithmetic= overflow).") + KAPI_ERROR(3, -EAGAIN, "EAGAIN", "Some or all memory could not be locked", + "Some or all of the specified address range could not be locked.") + KAPI_ERROR(4, -EINTR, "EINTR", "Interrupted by signal", + "The operation was interrupted by a fatal signal before completion.") + + .error_count =3D 5, + .param_count =3D 2, + .since_version =3D "2.0", + + .locks[0] =3D { + .lock_name =3D "mmap_lock", + .lock_type =3D KAPI_LOCK_RWLOCK, + .acquired =3D true, + .released =3D true, + .description =3D "Process memory map write lock", + }, + .lock_count =3D 1, + + /* Signal specifications */ + .signal_count =3D 1, + + /* Fatal signals can interrupt mmap_write_lock_killable */ + KAPI_SIGNAL(0, 0, "FATAL", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RETURN) + KAPI_SIGNAL_CONDITION("Fatal signal pending") + KAPI_SIGNAL_DESC("Fatal signals (SIGKILL, etc.) can interrupt the operat= ion " + "when acquiring mmap_write_lock_killable(), causing -EINTR return") + KAPI_SIGNAL_END + + .examples =3D "mlock(addr, 4096); // Lock one page\n" + "mlock(addr, len); // Lock range of pages", + .notes =3D "Memory locks do not stack - multiple calls on the same range = can be " + "undone by a single munlock. Locks are not inherited by child processes= . " + "Pages are locked on whole page boundaries.", + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_ALLOC_MEMORY, + "process memory", + "Locks pages into physical memory, preventing swapping") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "mm->locked_vm", + "Increases process locked memory counter") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_ALLOC_MEMORY, + "physical pages", + "May allocate and populate page table entries") + KAPI_EFFECT_CONDITION("Pages not already present") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(3) + + /* State transitions */ + KAPI_STATE_TRANS(0, "memory pages", "swappable", "locked in RAM", + "Pages become non-swappable and pinned in physical memory") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "VMA flags", "unlocked", "VM_LOCKED set", + "Virtual memory area marked as locked") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(2) +KAPI_END_SPEC; + SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) { return do_mlock(start, len, VM_LOCKED); --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9E44B2E7F00; Sat, 14 Jun 2025 13:49:11 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908951; cv=none; b=KTMeDrVNo5Jti+Telzyi45RZZJ5f9rtnYWhcxMelP6Ts8OZ0c76Exmt8U0IBQFT7FAecC0u6Sj9gYlpoM4YQg5rwk32BxQeXme7FVokH/xTPw/jn6jequ2HF8il+nzhV83szyS3ND7KeHanCVSfs/9O2jxK2/rtTU4lkRX2KC5w= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908951; c=relaxed/simple; bh=ibJSxGhn2sy6Pn+cpIBXMz0mb1TNNGHU2JUEIbkLD1g=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=gGa3rWwr2ngu9Ml3HGX/CKVzXjkMsQ7R/cqZN4WIKu61Mz63j+suo/1y0kqf82zNVTiutiOC+SusHL9w/bSxtwPTGmJMr1jIMzx6R6WwEWT+pcAd7Izhg/Tng8way31PfJ9lIgg54H/UFwgXRyTq8nMO9CIlWq0N4wLQXGysBUc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=OBRH/LRE; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="OBRH/LRE" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7C966C4AF0D; Sat, 14 Jun 2025 13:49:10 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908951; bh=ibJSxGhn2sy6Pn+cpIBXMz0mb1TNNGHU2JUEIbkLD1g=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=OBRH/LREvhPgizuyQuBDGiMMBTIqbwq9Lhv2Rul0dlMPCKmj+aLlehwm35hroE+ws Qgq9BZjAkIU95QmHziCfPFO2B0y9w0/KLJgzSizwHHzXFPRjK6cIA1/u+dsKg7/q2R /xLCEuBtJr/sr+pWsYo2KiooY/kq3sJy3ot1uXhqrc8MN53MtWgCgQPFV0OPX971w9 BJvhhqekig9Nb7EZ/SzfOk9xktVpDzrCNXjD2n7fCSPQkbR9Kp5uVd6uj3i5dsA3LY YNPevAmvOThRpejPizMm1bMrPE9bo6hfNFNie9LzQOXnjp69WG/KxsV9ZNn7XD0jqh Xv+m9J//tom6Q== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 11/19] mm/mlock: add API specification for mlock2 Date: Sat, 14 Jun 2025 09:48:50 -0400 Message-Id: <20250614134858.790460-12-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the mlock2() system call. Signed-off-by: Sasha Levin --- mm/mlock.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/mm/mlock.c b/mm/mlock.c index a37102df54b01..af2ab78acc226 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -768,6 +768,154 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, = len) return do_mlock(start, len, VM_LOCKED); } =20 + +DEFINE_KERNEL_API_SPEC(sys_mlock2) + KAPI_DESCRIPTION("Lock pages in memory with flags") + KAPI_LONG_DESC("Enhanced version of mlock() that supports flags. " + "MLOCK_ONFAULT flag allows locking pages on fault rather than imm= ediately.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "start", "unsigned long", "Starting address of memory range= to lock") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Rounded down to page boundary", + KAPI_PARAM_END + + KAPI_PARAM(1, "len", "size_t", "Length of memory range to lock in bytes") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + KAPI_PARAM_RANGE(0, LONG_MAX) + .constraints =3D "Rounded up to page boundary", + KAPI_PARAM_END + + KAPI_PARAM(2, "flags", "int", "Flags controlling lock behavior") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_MASK, + .valid_mask =3D MLOCK_ONFAULT, + .constraints =3D "Only MLOCK_ONFAULT flag is currently supported", + KAPI_PARAM_END + + /* Return specification */ + KAPI_RETURN("long", "0 on success, negative error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .success_value =3D 0, + KAPI_RETURN_END + + /* Error codes */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid flags", "Unknown flags were spe= cified (flags & ~MLOCK_ONFAULT).") + KAPI_ERROR(1, -ENOMEM, "ENOMEM", "Address range issue", "Some of the spec= ified range is not mapped, has unmapped gaps, or the lock would cause the n= umber of mapped regions to exceed the limit.") + KAPI_ERROR(2, -EPERM, "EPERM", "Insufficient privileges", "The caller is = not privileged (no CAP_IPC_LOCK) and RLIMIT_MEMLOCK is 0.") + KAPI_ERROR(3, -EAGAIN, "EAGAIN", "Some or all memory could not be locked"= , "Some or all of the specified address range could not be locked.") + KAPI_ERROR(4, -EINTR, "EINTR", "Interrupted by signal", "The operation wa= s interrupted by a fatal signal before completion.") + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Fatal signal pending during mmap_write_lock_killa= ble") + KAPI_SIGNAL_DESC("Fatal signals (SIGKILL, SIGTERM, etc.) can interrupt t= he operation when acquiring mmap_write_lock_killable(), causing -EINTR retu= rn") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGBUS, "SIGBUS", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_DEF= AULT) + KAPI_SIGNAL_TARGET("Current process") + KAPI_SIGNAL_CONDITION("Memory access to locked page fails") + KAPI_SIGNAL_DESC("Can be generated if accessing a locked page that canno= t be brought into memory (e.g., truncated file mapping)") + KAPI_SIGNAL_END + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_ALLOC_MEMORY, + "process memory", + "Locks pages into physical memory, preventing swapping") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("Pages become resident in RAM") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "mm->locked_vm", + "Increases process locked memory counter") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("Counted against RLIMIT_MEMLOCK") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_ALLOC_MEMORY, + "page tables", + "May allocate and populate page table entries") + KAPI_EFFECT_CONDITION("Pages not already present") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE, + "VMA flags", + "Sets VM_LOCKED and optionally VM_LOCKONFAULT on affected VMAs") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_FILESYSTEM, + "page fault behavior", + "With MLOCK_ONFAULT, changes how future page faults are handled") + KAPI_EFFECT_CONDITION("MLOCK_ONFAULT flag specified") + KAPI_SIDE_EFFECT_END + + /* State transitions */ + KAPI_STATE_TRANS(0, "memory pages", + "swappable", "locked in RAM", + "Pages become non-swappable and pinned in physical memory") + KAPI_STATE_TRANS_COND("Without MLOCK_ONFAULT") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "VMA flags", + "unlocked", "VM_LOCKED set", + "Virtual memory area marked as locked") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "VMA flags", + "normal fault", "VM_LOCKONFAULT set", + "VMA marked to lock pages on future faults") + KAPI_STATE_TRANS_COND("MLOCK_ONFAULT flag specified") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "page residency", + "may be swapped", "resident in memory", + "Pages brought into RAM and kept there") + KAPI_STATE_TRANS_COND("Without MLOCK_ONFAULT") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(4, "process statistics", + "normal memory accounting", "locked memory accounting", + "Memory counted against RLIMIT_MEMLOCK") + KAPI_STATE_TRANS_END + + /* Locking information */ + KAPI_LOCK(0, "mmap_lock", KAPI_LOCK_RWLOCK) + KAPI_LOCK_DESC("Process memory map write lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects VMA modifications during lock operation") + KAPI_LOCK_END + + KAPI_LOCK(1, "lru_lock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_DESC("Per-memcg LRU list lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Taken when moving pages to unevictable list when locking= pages") + KAPI_LOCK_END + + .error_count =3D 5, + .param_count =3D 3, + .since_version =3D "4.4", + .signal_count =3D 2, + .side_effect_count =3D 5, + .state_trans_count =3D 5, + .lock_count =3D 2, + .examples =3D "mlock2(addr, len, 0); // Same as mlock()\n" + "mlock2(addr, len, MLOCK_ONFAULT); // Lock on fault", + .notes =3D "MLOCK_ONFAULT flag defers actual page locking until pages are= accessed. " + "Memory locks do not stack. Locks are not inherited by child processes.= ", +KAPI_END_SPEC; + SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) { vm_flags_t vm_flags =3D VM_LOCKED; --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 286332E7F1A; Sat, 14 Jun 2025 13:49:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908952; cv=none; b=ZCpjvbLvsJHlZblW7YI0g9hi5wDr6RB+wxTuZsIdsIlxD9WXJUvnau2ut125+ExLC9hPmmlOdO4Di2wE6OyUFhU9r0AyJVcTI5oPBGrxIHipQOTnB6gPK2FEphS1GwVErp7/CZbc2r/nrSlDYk096iy0x0o/3OvuWj/XXPyA0A8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908952; c=relaxed/simple; bh=zS8ZFG4HpWsJFM53jLyQzCNYlIU54deOx6WKkgpE/6M=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=fbE98vsPaSKzq4QagMECsaNzRapsiZ/V99Wno3LvCbbmmGgLWuLFLS8ptcMhhgJdj/GwogHJFo0iplaG4Dwx9EZdG4fTOk598jMxV+81AuQtDTTJsk9WnvrvIyt+KQlA0yaJKSOUdaQEql2vJ4HGi2UqxbmWlMuYRmruUC8PDeE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=k58zgvBL; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="k58zgvBL" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 65B13C4CEEE; Sat, 14 Jun 2025 13:49:11 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908952; bh=zS8ZFG4HpWsJFM53jLyQzCNYlIU54deOx6WKkgpE/6M=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k58zgvBLLy0GEluPrFS97RLZTXqYzyMII1P2FBtNLubbBpcLo6eDz037z66PqhQWq c5WR6OMwXXrx21oJbG2Ygwf8wjTqheqhSt3uJ4MZIbYq9l/pUABWNUxWMyd6otiJt6 NfvW/8KdkxUyEw727dM9gz5VtQZ1czcJr4IDxEVgwbaGZW5hTAlf+yLa0EZh/7topF QpdjGgn5gvq2lut7ivNMLaqZp5KTwrN9021R1lN/wKMsXeMHUtcBhilRTz85fTwwfD aIBH8OC5ATf0f0iepRflqRNO+sVkG4U58QiuvRlUAGC61raQEvyAun/6/BBq8yUODQ fCgcoHJXiIwhw== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 12/19] mm/mlock: add API specification for mlockall Date: Sat, 14 Jun 2025 09:48:51 -0400 Message-Id: <20250614134858.790460-13-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the mlockall() system call. Signed-off-by: Sasha Levin --- mm/mlock.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/mm/mlock.c b/mm/mlock.c index af2ab78acc226..95ee707c5922f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -997,6 +997,150 @@ static int apply_mlockall_flags(int flags) return 0; } =20 + +DEFINE_KERNEL_API_SPEC(sys_mlockall) + KAPI_DESCRIPTION("Lock all process pages in memory") + KAPI_LONG_DESC("Locks all pages mapped into the process address space. " + "MCL_CURRENT locks current pages, MCL_FUTURE locks future mapping= s, " + "MCL_ONFAULT defers locking until page fault.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "flags", "int", "Flags controlling which pages to lock") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_MASK, + .valid_mask =3D MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT, + .constraints =3D "Must specify MCL_CURRENT and/or MCL_FUTURE; MCL_ONFAUL= T can be OR'd", + KAPI_PARAM_END + + /* Return specification */ + KAPI_RETURN("long", "0 on success, negative error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .success_value =3D 0, + KAPI_RETURN_END + + /* Error codes */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid flags", "Invalid combination of= flags specified, or no flags set, or only MCL_ONFAULT without MCL_CURRENT = or MCL_FUTURE.") + KAPI_ERROR(1, -EPERM, "EPERM", "Insufficient privileges", "The caller is = not privileged (no CAP_IPC_LOCK) and RLIMIT_MEMLOCK is 0.") + KAPI_ERROR(2, -ENOMEM, "ENOMEM", "Insufficient resources", "MCL_CURRENT i= s set and total VM size exceeds RLIMIT_MEMLOCK and caller lacks CAP_IPC_LOC= K.") + KAPI_ERROR(3, -EINTR, "EINTR", "Interrupted by signal", "The operation wa= s interrupted by a signal before completion.") + KAPI_ERROR(4, -EAGAIN, "EAGAIN", "Some memory could not be locked", "Some= pages could not be locked, possibly due to memory pressure.") + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Fatal signal pending during mmap_write_lock_killa= ble") + KAPI_SIGNAL_DESC("Fatal signals (SIGKILL, SIGTERM, etc.) can interrupt t= he operation when acquiring mmap_write_lock_killable(), causing -EINTR retu= rn") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + KAPI_SIGNAL(1, SIGBUS, "SIGBUS", KAPI_SIGNAL_SEND, KAPI_SIGNAL_ACTION_DEF= AULT) + KAPI_SIGNAL_TARGET("Current process") + KAPI_SIGNAL_CONDITION("Memory access to locked page fails") + KAPI_SIGNAL_DESC("Can be generated later if accessing a locked page that= cannot be brought into memory (e.g., truncated file mapping)") + KAPI_SIGNAL_END + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_ALLOC_MEMORY, + "all process memory", + "Locks all current pages into physical memory, preventing swapping") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("MCL_CURRENT flag set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "mm->def_flags", + "Sets VM_LOCKED in default flags for future mappings") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("MCL_FUTURE flag set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "mm->locked_vm", + "Increases process locked memory counter for entire address space") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("MCL_CURRENT flag set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_ALLOC_MEMORY, + "page tables", + "May allocate and populate page table entries for all mappings") + KAPI_EFFECT_CONDITION("MCL_CURRENT without MCL_ONFAULT") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE, + "VMA flags", + "Sets VM_LOCKED on all existing VMAs") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("MCL_CURRENT flag set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(5, KAPI_EFFECT_SCHEDULE, + "mm_populate", + "Triggers population of entire address space") + KAPI_EFFECT_CONDITION("MCL_CURRENT without MCL_ONFAULT") + KAPI_SIDE_EFFECT_END + + /* State transitions */ + KAPI_STATE_TRANS(0, "all memory pages", + "swappable", "locked in RAM", + "All pages in process become non-swappable") + KAPI_STATE_TRANS_COND("MCL_CURRENT flag set") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "future mappings", + "normal", "auto-locked", + "New mappings will be automatically locked") + KAPI_STATE_TRANS_COND("MCL_FUTURE flag set") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "VMA flags", + "varied", "all VM_LOCKED", + "All virtual memory areas marked as locked") + KAPI_STATE_TRANS_COND("MCL_CURRENT flag set") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "page fault behavior", + "normal faulting", "lock on fault", + "Pages locked when faulted in rather than immediately") + KAPI_STATE_TRANS_COND("MCL_ONFAULT flag set") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(4, "process statistics", + "partial locked memory", "all memory locked", + "Entire VM size counted against RLIMIT_MEMLOCK") + KAPI_STATE_TRANS_COND("MCL_CURRENT flag set") + KAPI_STATE_TRANS_END + + /* Locking information */ + KAPI_LOCK(0, "mmap_lock", KAPI_LOCK_RWLOCK) + KAPI_LOCK_DESC("Process memory map write lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects VMA modifications during mlockall operation") + KAPI_LOCK_END + + KAPI_LOCK(1, "lru_lock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_DESC("Per-memcg LRU list lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Taken when moving pages to unevictable list for all lock= ed pages") + KAPI_LOCK_END + + .error_count =3D 5, + .param_count =3D 1, + .since_version =3D "2.0", + .signal_count =3D 2, + .side_effect_count =3D 6, + .state_trans_count =3D 5, + .lock_count =3D 2, + .examples =3D "mlockall(MCL_CURRENT); // Lock current = mappings\n" + "mlockall(MCL_CURRENT | MCL_FUTURE); // Lock current and futur= e\n" + "mlockall(MCL_CURRENT | MCL_ONFAULT); // Lock current on fault", + .notes =3D "Affects all current VMAs and optionally future mappings via m= m->def_flags", +KAPI_END_SPEC; + SYSCALL_DEFINE1(mlockall, int, flags) { unsigned long lock_limit; --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0E5212E8880; Sat, 14 Jun 2025 13:49:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908953; cv=none; b=Bm6zeeD8GVFhOx0lNwZwsWG5VaTQczTU5Svw9XhxJGY9etjiQAmP1zPq4s+PAl7cEGhIluDs+CC+e8G4t5MgwPX/fpdYCdbPIf+vqq9aXE0gb2Wok0cfY3FRh6IvebHAFDVvvk7ZByi2fuPq0/1+8N577xd51KbcFv6njI736Xw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908953; c=relaxed/simple; bh=BBnacI0/ivtpQYx1QHbF8Tz/qIcLNSNB7FFd9e1Kogo=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=WuMZ1RC3xwns2izw6QPn4GFSvKSi1fXTTdy1Oy/oWn73umr9RWEH+88REOLwNBUN4QLZLZhJwVZDwnT2lCgCe2oEUHGmoKO5EXx20EPmgqPTPNNqnYlkdQFsP+9Mmz+Z9XnB0vAkfn9QJ6t8Bp2yoUWXvFZ2+Ckz239jPfqFcBo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=XtjS/DWg; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="XtjS/DWg" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4DEA7C4CEF3; Sat, 14 Jun 2025 13:49:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908952; bh=BBnacI0/ivtpQYx1QHbF8Tz/qIcLNSNB7FFd9e1Kogo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=XtjS/DWgFsmlX8+54GimlnqYnkKtkFWUgKzaVjsEBsnG8y1MR0tf4EHO8Fb/ork9v N5lqjSV+RJAnxoLe1kG3WyqmtZN9HbWTbOfXuBB0Ww+Co1BWa+okFXxDjC25fX5q3R uN3OsI1tpEH9S/nLpX1tJMmDgX3+D6oG1KbLhEVX+oGcmLfFhce+kVfL+Hnl51Q/M7 h5e/QJ5GIDbpQsiQZVE74CoEjxPmbMBnBtYfCB3KptVihKIypd7HNfHj64V/VzT6SG mJ6vGgVrxtI31h8FEc+H8QnAlSQ06Xp8AM1QMUiXKK5r1O2U8NysHeLgVS8ObOnbNP aHXUFRaUUnF2g== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 13/19] mm/mlock: add API specification for munlock Date: Sat, 14 Jun 2025 09:48:52 -0400 Message-Id: <20250614134858.790460-14-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the munlock() system call. Signed-off-by: Sasha Levin --- mm/mlock.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/mm/mlock.c b/mm/mlock.c index 95ee707c5922f..ef691adc78ad7 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -929,6 +929,135 @@ SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t,= len, int, flags) return do_mlock(start, len, vm_flags); } =20 + +DEFINE_KERNEL_API_SPEC(sys_munlock) + KAPI_DESCRIPTION("Unlock pages in memory") + KAPI_LONG_DESC("Unlocks pages in the specified address range, allowing th= em " + "to be paged out to swap if needed.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "start", "unsigned long", "Starting address of memory range= to unlock") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_NONE, + .constraints =3D "Rounded down to page boundary", + KAPI_PARAM_END + + KAPI_PARAM(1, "len", "size_t", "Length of memory range to unlock in bytes= ") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + KAPI_PARAM_RANGE(0, LONG_MAX) + .constraints =3D "Rounded up to page boundary", + KAPI_PARAM_END + + /* Return specification */ + KAPI_RETURN("long", "0 on success, negative error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .success_value =3D 0, + KAPI_RETURN_END + + /* Error codes */ + KAPI_ERROR(0, -ENOMEM, "ENOMEM", "Memory range not mapped", "(Linux 2.6.9= and later) Some of the specified address range does not correspond to mapp= ed pages in the process address space.") + KAPI_ERROR(1, -EINTR, "EINTR", "Interrupted by signal", "The operation wa= s interrupted by a signal before completion.") + KAPI_ERROR(2, -EINVAL, "EINVAL", "Address overflow", "The result of the a= ddition start+len was less than start (arithmetic overflow).") + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Fatal signal pending during mmap_write_lock_killa= ble") + KAPI_SIGNAL_DESC("Fatal signals (SIGKILL, SIGTERM, etc.) can interrupt t= he operation when acquiring mmap_write_lock_killable(), causing -EINTR retu= rn") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE, + "process memory", + "Unlocks pages, making them eligible for swapping") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("Pages were previously locked") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "mm->locked_vm", + "Decreases process locked memory counter") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("Pages were counted in locked_vm") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "VMA flags", + "Clears VM_LOCKED and VM_LOCKONFAULT from affected VMAs") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE, + "page flags", + "Clears PG_mlocked flag from unlocked pages") + KAPI_EFFECT_CONDITION("Pages had PG_mlocked set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE, + "LRU lists", + "Moves pages from unevictable to appropriate LRU list") + KAPI_EFFECT_CONDITION("Pages were on unevictable list") + KAPI_SIDE_EFFECT_END + + /* State transitions */ + KAPI_STATE_TRANS(0, "memory pages", + "locked in RAM", "swappable", + "Pages become eligible for swap out") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "VMA flags", + "VM_LOCKED set", "VM_LOCKED cleared", + "Virtual memory areas no longer marked as locked") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "page residency", + "guaranteed resident", "may be swapped", + "Pages can now be evicted under memory pressure") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "process statistics", + "locked memory accounted", "normal memory accounting", + "Memory no longer counted against RLIMIT_MEMLOCK") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(4, "page LRU status", + "unevictable list", "active/inactive list", + "Pages moved to normal LRU lists for reclaim") + KAPI_STATE_TRANS_COND("Pages were mlocked") + KAPI_STATE_TRANS_END + + /* Locking information */ + KAPI_LOCK(0, "mmap_lock", KAPI_LOCK_RWLOCK) + KAPI_LOCK_DESC("Process memory map write lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects VMA modifications during unlock operation") + KAPI_LOCK_END + + KAPI_LOCK(1, "lru_lock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_DESC("Per-memcg LRU list lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Taken when moving pages from unevictable to normal LRU l= ists") + KAPI_LOCK_END + + .error_count =3D 3, + .param_count =3D 2, + .since_version =3D "2.0", + .signal_count =3D 1, + .side_effect_count =3D 5, + .state_trans_count =3D 5, + .lock_count =3D 2, + .examples =3D "munlock(addr, 4096); // Unlock one page\n" + "munlock(addr, len); // Unlock range of pages", + .notes =3D "No special permissions required to unlock memory", +KAPI_END_SPEC; + SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5817E2E88BD; Sat, 14 Jun 2025 13:49:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908954; cv=none; b=Xw2oMvAk9CjXcec8S/MI2sN4Scuaz8VN+4ojJqqa5RpqQ4cgljed1Xqh/4KxAn6NnVCOA7AjK3o1601z+7lgRPOCtttZ723ZhW1O+IIOGrHtRP/yB8iYN/juFgvwHYpOckDAQyBlzbgx2aka9rR6VCuIkuGBLl+WvtbntpjCm1Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908954; c=relaxed/simple; bh=X6Vv2g/1CFZ/HVdEzZ2DI37QD2R1mWa+3vYCt/c6zM0=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=ldfCRDPQEkvZqr3tNojMp4NkrF1UBw5TEyvlwub+8e+uH8qKHBoZ55IQctnYdgH6o5tF+rQ8clFJYPmAVm7EGq9fET0EbjxKjsyUPS0tzD2rPd/7X8/qWRAju4rIWDrtI3UAHsg7bauY8Tf7Vo9Zu2VAy8qlL4POE+h9VSa7DT4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=FZdXN6dy; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="FZdXN6dy" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 37024C4CEF0; Sat, 14 Jun 2025 13:49:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908953; bh=X6Vv2g/1CFZ/HVdEzZ2DI37QD2R1mWa+3vYCt/c6zM0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FZdXN6dyBaCOC/zOYipLnpKw1Y2J0Iz4hFXODov+4FN9AM+s19eF6kZVRCFMuRcls ZWMfV/xT9yGg11g9GxH2Ba4qjZJ2U7PsxttaFq5AOB6mwZNuulYxtlpEPH8OyS16uf SnY0SlbvZoZvqJyBK+Wc403Xt3Tb5J+oHtvhuIupt2803WMvlZksw3qgfhkzoZ/vc4 Fa6aLYyWQ6ptyentM0hn/RCtY+7VX2tv9Tdds6T8Uv/FSERkfc+nuNAxGdxrNzdLh6 fxMT/kPNC03m6W0Pr1bLFugiG9+r3WVKj+dib2jmH4qXZtdvwQTRIx4kFTHMNO84a9 T+51Tt2i8AxYQ== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 14/19] mm/mlock: add API specification for munlockall Date: Sat, 14 Jun 2025 09:48:53 -0400 Message-Id: <20250614134858.790460-15-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specification for the munlockall() system call. Signed-off-by: Sasha Levin --- mm/mlock.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/mm/mlock.c b/mm/mlock.c index ef691adc78ad7..80f51e932aa95 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -1299,6 +1299,126 @@ SYSCALL_DEFINE1(mlockall, int, flags) return ret; } =20 + +DEFINE_KERNEL_API_SPEC(sys_munlockall) + KAPI_DESCRIPTION("Unlock all process pages") + KAPI_LONG_DESC("Unlocks all pages mapped into the process address space a= nd " + "clears the MCL_FUTURE flag if set.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* No parameters - this is a SYSCALL_DEFINE0 */ + .param_count =3D 0, + + /* Return specification */ + KAPI_RETURN("long", "0 on success, negative error code on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .success_value =3D 0, + KAPI_RETURN_END + + /* Error codes */ + KAPI_ERROR(0, -EINTR, "EINTR", "Interrupted by signal", "The operation wa= s interrupted by a signal before completion.") + KAPI_ERROR(1, -ENOMEM, "ENOMEM", "Memory operation failed", "Failed to mo= dify memory mappings (should not normally occur).") + + /* Signal specifications */ + KAPI_SIGNAL(0, 0, "FATAL_SIGNALS", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTIO= N_RETURN) + KAPI_SIGNAL_CONDITION("Fatal signal pending during mmap_write_lock_killa= ble") + KAPI_SIGNAL_DESC("Fatal signals (SIGKILL, SIGTERM, etc.) can interrupt t= he operation when acquiring mmap_write_lock_killable(), causing -EINTR retu= rn") + KAPI_SIGNAL_RESTARTABLE + KAPI_SIGNAL_END + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE, + "all process memory", + "Unlocks all pages, making entire address space swappable") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("Process had locked pages") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "mm->def_flags", + "Clears VM_LOCKED from default flags for future mappings") + KAPI_EFFECT_REVERSIBLE + KAPI_EFFECT_CONDITION("MCL_FUTURE was previously set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_MODIFY_STATE, + "mm->locked_vm", + "Resets process locked memory counter to zero") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE, + "all VMA flags", + "Clears VM_LOCKED and VM_LOCKONFAULT from all VMAs") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE, + "page flags", + "Clears PG_mlocked flag from all locked pages") + KAPI_EFFECT_CONDITION("Pages had PG_mlocked set") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(5, KAPI_EFFECT_MODIFY_STATE, + "LRU lists", + "Moves all pages from unevictable to normal LRU lists") + KAPI_EFFECT_CONDITION("Pages were on unevictable list") + KAPI_SIDE_EFFECT_END + + /* State transitions */ + KAPI_STATE_TRANS(0, "all memory pages", + "locked in RAM", "swappable", + "All pages in process become eligible for swap out") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "future mappings", + "auto-locked", "normal", + "New mappings will no longer be automatically locked") + KAPI_STATE_TRANS_COND("MCL_FUTURE was set") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "all VMA flags", + "VM_LOCKED set", "VM_LOCKED cleared", + "All virtual memory areas no longer marked as locked") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(3, "process statistics", + "all memory locked", "no memory locked", + "Entire locked memory accounting reset to zero") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(4, "page LRU status", + "unevictable list", "active/inactive list", + "All pages moved to normal LRU lists for reclaim") + KAPI_STATE_TRANS_COND("Pages were mlocked") + KAPI_STATE_TRANS_END + + /* Locking information */ + KAPI_LOCK(0, "mmap_lock", KAPI_LOCK_RWLOCK) + KAPI_LOCK_DESC("Process memory map write lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Protects VMA modifications during munlockall operation") + KAPI_LOCK_END + + KAPI_LOCK(1, "lru_lock", KAPI_LOCK_SPINLOCK) + KAPI_LOCK_DESC("Per-memcg LRU list lock") + KAPI_LOCK_ACQUIRED + KAPI_LOCK_RELEASED + KAPI_LOCK_DESC("Taken when moving all pages from unevictable to normal L= RU lists") + KAPI_LOCK_END + + .error_count =3D 2, + .since_version =3D "2.0", + .signal_count =3D 1, + .side_effect_count =3D 6, + .state_trans_count =3D 5, + .lock_count =3D 2, + .examples =3D "munlockall(); // Unlock all pages", + .notes =3D "Clears VM_LOCKED and VM_LOCKONFAULT from all VMAs and mm->def= _flags", +KAPI_END_SPEC; + SYSCALL_DEFINE0(munlockall) { int ret; --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D4F1C2E92BA; Sat, 14 Jun 2025 13:49:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908954; cv=none; b=OQwigceYZjSGPu71WdidGDd/A/YDcKJ1atigrWHnlQ4ppN57qu1Nb48O1HnZSxyCQz18kTzcLKYOsxCnkClRpqK4Iw7eUOYRImefgNm7s7C+YOyqTLXsHUpDFns2a62OTg5pDXPJhE5M8Gz/V1KCbKEkNuCwcB8eq7HNlYFdeFA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908954; c=relaxed/simple; bh=P2s5mEiKe4Hy2Gm3hQTnSt3MLEij7AIBPkgx8m8Hkmk=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=P1MwlOArzRLh9u+IguNGgYWX642awVCw27KtoW73TbGwQJzfeiHepJ9oCUo4r3s7P8LDeZqSxKj1lztJtAGQVD3SqOsJDC3yLaBeUIjt5kzpVXyE9RX4MqAVtElHnPJvWnwmJh57Np+2yQR7y/eYsdidUuO0rJKrhthstLpxBpQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Y0d38YO0; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Y0d38YO0" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1FED6C4CEEE; Sat, 14 Jun 2025 13:49:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908954; bh=P2s5mEiKe4Hy2Gm3hQTnSt3MLEij7AIBPkgx8m8Hkmk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Y0d38YO0V7cSIQa9gNWPQEmosLQ9xaGbh3d2Rj3ooFmw1nZuO4YWVEwo3Ph6yi4Zi IIWXaUsH+hf+AX/oPgQ/Bugws3/yW8iUjLBKO2i51+bhYIPmPg0sxEa1uZb9P7pIm+ IFHKvVcn4NRaB7eq0d3xbkYkKz2uk7WiGjmMgezcRTYp8iRMS2Ce9OLVAxh2NXTdGq QNbQr3uXawQPP2r6AADIW8FR4MMhlEzuvR9cebLnFY6HbecvLie3l5+8Z/3vdKcwAW sxwPe8wAL8t7FBs7G6AYZNUQQMAtzFFxOT7O9wD2lIAGkHCFZM0obvV08lRqShOlRN M0KviSoiuuilw== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 15/19] kernel/api: add debugfs interface for kernel API specifications Date: Sat, 14 Jun 2025 09:48:54 -0400 Message-Id: <20250614134858.790460-16-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a debugfs interface to expose kernel API specifications at runtime. This allows tools and users to query the complete API specifications through the debugfs filesystem. The interface provides: - /sys/kernel/debug/kapi/list - lists all available API specifications - /sys/kernel/debug/kapi/specs/ - detailed info for each API Each specification file includes: - Function name, version, and descriptions - Execution context requirements and flags - Parameter details with types, flags, and constraints - Return value specifications and success conditions - Error codes with descriptions and conditions - Locking requirements and constraints - Signal handling specifications - Examples, notes, and deprecation status This enables runtime introspection of kernel APIs for documentation tools, static analyzers, and debugging purposes. Signed-off-by: Sasha Levin --- kernel/api/Kconfig | 20 +++ kernel/api/Makefile | 5 +- kernel/api/kapi_debugfs.c | 340 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 kernel/api/kapi_debugfs.c diff --git a/kernel/api/Kconfig b/kernel/api/Kconfig index fde25ec70e134..d2754b21acc43 100644 --- a/kernel/api/Kconfig +++ b/kernel/api/Kconfig @@ -33,3 +33,23 @@ config KAPI_RUNTIME_CHECKS development. The checks use WARN_ONCE to report violations. =20 If unsure, say N. + +config KAPI_SPEC_DEBUGFS + bool "Export kernel API specifications via debugfs" + depends on KAPI_SPEC + depends on DEBUG_FS + help + This option enables exporting kernel API specifications through + the debugfs filesystem. When enabled, specifications can be + accessed at /sys/kernel/debug/kapi/. + + The debugfs interface provides: + - A list of all available API specifications + - Detailed information for each API including parameters, + return values, errors, locking requirements, and constraints + - Complete machine-readable representation of the specs + + This is useful for documentation tools, static analyzers, and + runtime introspection of kernel APIs. + + If unsure, say N. diff --git a/kernel/api/Makefile b/kernel/api/Makefile index 4120ded7e5cf1..07b8c007ec156 100644 --- a/kernel/api/Makefile +++ b/kernel/api/Makefile @@ -4,4 +4,7 @@ # =20 # Core API specification framework -obj-$(CONFIG_KAPI_SPEC) +=3D kernel_api_spec.o \ No newline at end of file +obj-$(CONFIG_KAPI_SPEC) +=3D kernel_api_spec.o + +# Debugfs interface for kernel API specs +obj-$(CONFIG_KAPI_SPEC_DEBUGFS) +=3D kapi_debugfs.o \ No newline at end of file diff --git a/kernel/api/kapi_debugfs.c b/kernel/api/kapi_debugfs.c new file mode 100644 index 0000000000000..bf65ea6a49205 --- /dev/null +++ b/kernel/api/kapi_debugfs.c @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Kernel API specification debugfs interface + * + * This provides a debugfs interface to expose kernel API specifications + * at runtime, allowing tools and users to query the complete API specs. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* External symbols for kernel API spec section */ +extern struct kernel_api_spec __start_kapi_specs[]; +extern struct kernel_api_spec __stop_kapi_specs[]; + +static struct dentry *kapi_debugfs_root; + +/* Helper function to print parameter type as string */ +static const char *param_type_str(enum kapi_param_type type) +{ + switch (type) { + case KAPI_TYPE_INT: return "int"; + case KAPI_TYPE_UINT: return "uint"; + case KAPI_TYPE_PTR: return "ptr"; + case KAPI_TYPE_STRUCT: return "struct"; + case KAPI_TYPE_UNION: return "union"; + case KAPI_TYPE_ARRAY: return "array"; + case KAPI_TYPE_FD: return "fd"; + case KAPI_TYPE_ENUM: return "enum"; + case KAPI_TYPE_USER_PTR: return "user_ptr"; + case KAPI_TYPE_PATH: return "path"; + case KAPI_TYPE_FUNC_PTR: return "func_ptr"; + case KAPI_TYPE_CUSTOM: return "custom"; + default: return "unknown"; + } +} + +/* Helper to print parameter flags */ +static void print_param_flags(struct seq_file *m, u32 flags) +{ + seq_printf(m, " flags: "); + if (flags & KAPI_PARAM_IN) seq_printf(m, "IN "); + if (flags & KAPI_PARAM_OUT) seq_printf(m, "OUT "); + if (flags & KAPI_PARAM_INOUT) seq_printf(m, "INOUT "); + if (flags & KAPI_PARAM_OPTIONAL) seq_printf(m, "OPTIONAL "); + if (flags & KAPI_PARAM_CONST) seq_printf(m, "CONST "); + if (flags & KAPI_PARAM_USER) seq_printf(m, "USER "); + if (flags & KAPI_PARAM_VOLATILE) seq_printf(m, "VOLATILE "); + if (flags & KAPI_PARAM_DMA) seq_printf(m, "DMA "); + if (flags & KAPI_PARAM_ALIGNED) seq_printf(m, "ALIGNED "); + seq_printf(m, "\n"); +} + +/* Helper to print context flags */ +static void print_context_flags(struct seq_file *m, u32 flags) +{ + seq_printf(m, "Context flags: "); + if (flags & KAPI_CTX_PROCESS) seq_printf(m, "PROCESS "); + if (flags & KAPI_CTX_HARDIRQ) seq_printf(m, "HARDIRQ "); + if (flags & KAPI_CTX_SOFTIRQ) seq_printf(m, "SOFTIRQ "); + if (flags & KAPI_CTX_NMI) seq_printf(m, "NMI "); + if (flags & KAPI_CTX_SLEEPABLE) seq_printf(m, "SLEEPABLE "); + if (flags & KAPI_CTX_ATOMIC) seq_printf(m, "ATOMIC "); + if (flags & KAPI_CTX_PREEMPT_DISABLED) seq_printf(m, "PREEMPT_DISABLED "); + if (flags & KAPI_CTX_IRQ_DISABLED) seq_printf(m, "IRQ_DISABLED "); + seq_printf(m, "\n"); +} + +/* Show function for individual API spec */ +static int kapi_spec_show(struct seq_file *m, void *v) +{ + struct kernel_api_spec *spec =3D m->private; + int i; + + seq_printf(m, "Kernel API Specification\n"); + seq_printf(m, "=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D\n\n"); + + /* Basic info */ + seq_printf(m, "Name: %s\n", spec->name); + seq_printf(m, "Version: %u\n", spec->version); + seq_printf(m, "Description: %s\n", spec->description); + if (strlen(spec->long_description) > 0) + seq_printf(m, "Long description: %s\n", spec->long_description); + + /* Context */ + print_context_flags(m, spec->context_flags); + seq_printf(m, "\n"); + + /* Parameters */ + if (spec->param_count > 0) { + seq_printf(m, "Parameters (%u):\n", spec->param_count); + for (i =3D 0; i < spec->param_count; i++) { + struct kapi_param_spec *param =3D &spec->params[i]; + seq_printf(m, " [%d] %s:\n", i, param->name); + seq_printf(m, " type: %s (%s)\n", + param_type_str(param->type), param->type_name); + print_param_flags(m, param->flags); + if (strlen(param->description) > 0) + seq_printf(m, " description: %s\n", param->description); + if (param->size > 0) + seq_printf(m, " size: %zu\n", param->size); + if (param->alignment > 0) + seq_printf(m, " alignment: %zu\n", param->alignment); + + /* Print constraints if any */ + if (param->constraint_type !=3D KAPI_CONSTRAINT_NONE) { + seq_printf(m, " constraints:\n"); + switch (param->constraint_type) { + case KAPI_CONSTRAINT_RANGE: + seq_printf(m, " type: range\n"); + seq_printf(m, " min: %lld\n", param->min_value); + seq_printf(m, " max: %lld\n", param->max_value); + break; + case KAPI_CONSTRAINT_MASK: + seq_printf(m, " type: mask\n"); + seq_printf(m, " valid_bits: 0x%llx\n", param->valid_mask); + break; + case KAPI_CONSTRAINT_ENUM: + seq_printf(m, " type: enum\n"); + seq_printf(m, " count: %u\n", param->enum_count); + break; + case KAPI_CONSTRAINT_CUSTOM: + seq_printf(m, " type: custom\n"); + if (strlen(param->constraints) > 0) + seq_printf(m, " description: %s\n", + param->constraints); + break; + default: + break; + } + } + seq_printf(m, "\n"); + } + } + + /* Return value */ + seq_printf(m, "Return value:\n"); + seq_printf(m, " type: %s\n", spec->return_spec.type_name); + if (strlen(spec->return_spec.description) > 0) + seq_printf(m, " description: %s\n", spec->return_spec.description); + + switch (spec->return_spec.check_type) { + case KAPI_RETURN_EXACT: + seq_printf(m, " success: =3D=3D %lld\n", spec->return_spec.success_valu= e); + break; + case KAPI_RETURN_RANGE: + seq_printf(m, " success: [%lld, %lld]\n", + spec->return_spec.success_min, + spec->return_spec.success_max); + break; + case KAPI_RETURN_FD: + seq_printf(m, " success: valid file descriptor (>=3D 0)\n"); + break; + case KAPI_RETURN_ERROR_CHECK: + seq_printf(m, " success: error check\n"); + break; + case KAPI_RETURN_CUSTOM: + seq_printf(m, " success: custom check\n"); + break; + default: + break; + } + seq_printf(m, "\n"); + + /* Errors */ + if (spec->error_count > 0) { + seq_printf(m, "Errors (%u):\n", spec->error_count); + for (i =3D 0; i < spec->error_count; i++) { + struct kapi_error_spec *err =3D &spec->errors[i]; + seq_printf(m, " %s (%d): %s\n", + err->name, err->error_code, err->description); + if (strlen(err->condition) > 0) + seq_printf(m, " condition: %s\n", err->condition); + } + seq_printf(m, "\n"); + } + + /* Locks */ + if (spec->lock_count > 0) { + seq_printf(m, "Locks (%u):\n", spec->lock_count); + for (i =3D 0; i < spec->lock_count; i++) { + struct kapi_lock_spec *lock =3D &spec->locks[i]; + const char *type_str; + switch (lock->lock_type) { + case KAPI_LOCK_MUTEX: type_str =3D "mutex"; break; + case KAPI_LOCK_SPINLOCK: type_str =3D "spinlock"; break; + case KAPI_LOCK_RWLOCK: type_str =3D "rwlock"; break; + case KAPI_LOCK_SEMAPHORE: type_str =3D "semaphore"; break; + case KAPI_LOCK_RCU: type_str =3D "rcu"; break; + case KAPI_LOCK_SEQLOCK: type_str =3D "seqlock"; break; + default: type_str =3D "unknown"; break; + } + seq_printf(m, " %s (%s): %s\n", + lock->lock_name, type_str, lock->description); + if (lock->acquired) + seq_printf(m, " acquired by function\n"); + if (lock->released) + seq_printf(m, " released by function\n"); + } + seq_printf(m, "\n"); + } + + /* Constraints */ + if (spec->constraint_count > 0) { + seq_printf(m, "Additional constraints (%u):\n", spec->constraint_count); + for (i =3D 0; i < spec->constraint_count; i++) { + seq_printf(m, " - %s\n", spec->constraints[i].description); + } + seq_printf(m, "\n"); + } + + /* Signals */ + if (spec->signal_count > 0) { + seq_printf(m, "Signal handling (%u):\n", spec->signal_count); + for (i =3D 0; i < spec->signal_count; i++) { + struct kapi_signal_spec *sig =3D &spec->signals[i]; + seq_printf(m, " %s (%d):\n", sig->signal_name, sig->signal_num); + seq_printf(m, " direction: "); + if (sig->direction & KAPI_SIGNAL_SEND) seq_printf(m, "send "); + if (sig->direction & KAPI_SIGNAL_RECEIVE) seq_printf(m, "receive "); + if (sig->direction & KAPI_SIGNAL_HANDLE) seq_printf(m, "handle "); + if (sig->direction & KAPI_SIGNAL_BLOCK) seq_printf(m, "block "); + if (sig->direction & KAPI_SIGNAL_IGNORE) seq_printf(m, "ignore "); + seq_printf(m, "\n"); + seq_printf(m, " action: "); + switch (sig->action) { + case KAPI_SIGNAL_ACTION_DEFAULT: seq_printf(m, "default"); break; + case KAPI_SIGNAL_ACTION_TERMINATE: seq_printf(m, "terminate"); break; + case KAPI_SIGNAL_ACTION_COREDUMP: seq_printf(m, "coredump"); break; + case KAPI_SIGNAL_ACTION_STOP: seq_printf(m, "stop"); break; + case KAPI_SIGNAL_ACTION_CONTINUE: seq_printf(m, "continue"); break; + case KAPI_SIGNAL_ACTION_CUSTOM: seq_printf(m, "custom"); break; + case KAPI_SIGNAL_ACTION_RETURN: seq_printf(m, "return"); break; + case KAPI_SIGNAL_ACTION_RESTART: seq_printf(m, "restart"); break; + default: seq_printf(m, "unknown"); break; + } + seq_printf(m, "\n"); + if (strlen(sig->description) > 0) + seq_printf(m, " description: %s\n", sig->description); + } + seq_printf(m, "\n"); + } + + /* Additional info */ + if (strlen(spec->examples) > 0) { + seq_printf(m, "Examples:\n%s\n\n", spec->examples); + } + if (strlen(spec->notes) > 0) { + seq_printf(m, "Notes:\n%s\n\n", spec->notes); + } + if (strlen(spec->since_version) > 0) { + seq_printf(m, "Since: %s\n", spec->since_version); + } + if (spec->deprecated) { + seq_printf(m, "DEPRECATED"); + if (strlen(spec->replacement) > 0) + seq_printf(m, " - use %s instead", spec->replacement); + seq_printf(m, "\n"); + } + + return 0; +} + +static int kapi_spec_open(struct inode *inode, struct file *file) +{ + return single_open(file, kapi_spec_show, inode->i_private); +} + +static const struct file_operations kapi_spec_fops =3D { + .open =3D kapi_spec_open, + .read =3D seq_read, + .llseek =3D seq_lseek, + .release =3D single_release, +}; + +/* Show all available API specs */ +static int kapi_list_show(struct seq_file *m, void *v) +{ + struct kernel_api_spec *spec; + int count =3D 0; + + seq_printf(m, "Available Kernel API Specifications\n"); + seq_printf(m, "=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D\n\n"); + + for (spec =3D __start_kapi_specs; spec < __stop_kapi_specs; spec++) { + seq_printf(m, "%s - %s\n", spec->name, spec->description); + count++; + } + + seq_printf(m, "\nTotal: %d specifications\n", count); + return 0; +} + +static int kapi_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, kapi_list_show, NULL); +} + +static const struct file_operations kapi_list_fops =3D { + .open =3D kapi_list_open, + .read =3D seq_read, + .llseek =3D seq_lseek, + .release =3D single_release, +}; + +static int __init kapi_debugfs_init(void) +{ + struct kernel_api_spec *spec; + struct dentry *spec_dir; + + /* Create main directory */ + kapi_debugfs_root =3D debugfs_create_dir("kapi", NULL); + + /* Create list file */ + debugfs_create_file("list", 0444, kapi_debugfs_root, NULL, &kapi_list_fop= s); + + /* Create specs subdirectory */ + spec_dir =3D debugfs_create_dir("specs", kapi_debugfs_root); + + /* Create a file for each API spec */ + for (spec =3D __start_kapi_specs; spec < __stop_kapi_specs; spec++) { + debugfs_create_file(spec->name, 0444, spec_dir, spec, &kapi_spec_fops); + } + + pr_info("Kernel API debugfs interface initialized\n"); + return 0; +} + +static void __exit kapi_debugfs_exit(void) +{ + debugfs_remove_recursive(kapi_debugfs_root); +} + +/* Initialize as part of kernel, not as a module */ +fs_initcall(kapi_debugfs_init); \ No newline at end of file --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 105922EA486; Sat, 14 Jun 2025 13:49:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908957; cv=none; b=QfYrCuIKBC/T42x1ADhM2LASTkA9GnZQKV80beM2RdxPFzeUYkYbSvpzg/aymFfgEK610vUW12M/8KME9AZt+QN6as04tr7cBirbNio+5vBSkO6AzSHD/UmiFBtSOzTH/oNW8cuspngnnPp1r44MET8qdVrlKwb8UhapE8eXD4o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908957; c=relaxed/simple; bh=GI44pLwmhBFun8knb7fvgIc7M6bBnm5MRa+vS7fKwr4=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Of0V+WhkaMQfiNkztgkhN+0kZwt9b5+fnr3pnfEwGETaGjS7yMepeOeXKLEnyB8+xUMSU4eAdYoLeRMso4aSo1yhZWAQDeIhcq775m9BIsTiwXo3pF93L2OggI3V+9N3yO2i5ChhvmIT/QHMh7p7KoOIyKh1uB6sLvFgUhBoDx8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=L+F/2xVY; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="L+F/2xVY" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 081E9C4CEF1; Sat, 14 Jun 2025 13:49:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908955; bh=GI44pLwmhBFun8knb7fvgIc7M6bBnm5MRa+vS7fKwr4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=L+F/2xVYT9Rfkf2heHdSWUjzyn1HxlZbdab0/2+LyZudEA1u/SE7d9XsQxFyPu/q4 r/6irRC862b0by83XBD4oKuObnWc5ZOzbh7Z5Cf+CGJRtmDNf3kx1ocqpjjkKUYP0I ogQ+n8Ypb4xv1ZmhBxjY2HTvpxJQXC5UcwE1Gv9Cng9dmnmr1U2LXLo+hgrvo6mmz+ teLJpiiEuSrID4G+Q1p+bl9ApzdI7nOoIvF+UXr3Dq8aeeK7boJiO/HckkFkgQ3SEq h2jIQ1W3AlMnLnB3SYdW7fOO64SLhY02I5kYi/IyYaYoN/IIiE0TQBhCz+iY9nLV4N TSWwBW1Qz43+A== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 16/19] kernel/api: add IOCTL specification infrastructure Date: Sat, 14 Jun 2025 09:48:55 -0400 Message-Id: <20250614134858.790460-17-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add IOCTL API specification support to the kernel API specification framework. This enables detailed documentation and runtime validation of IOCTL interfaces. Key features: - IOCTL specification structure with command info and parameter details - Registration/unregistration functions for IOCTL specs - Helper macros for defining IOCTL specifications - KAPI_IOCTL_SPEC_DRIVER macro for simplified driver integration - Runtime validation support with KAPI_DEFINE_FOPS wrapper - Validation of IOCTL parameters and return values - Integration with existing kernel API spec infrastructure The validation framework checks: - Parameter constraints (ranges, enums, masks) - User pointer validity - Buffer size constraints - Return value correctness against specification Signed-off-by: Sasha Levin --- include/linux/ioctl_api_spec.h | 540 ++++++++++++++++++++++++++++++++ include/linux/kernel_api_spec.h | 2 +- kernel/api/Makefile | 5 +- kernel/api/ioctl_validation.c | 360 +++++++++++++++++++++ kernel/api/kernel_api_spec.c | 90 +++++- 5 files changed, 994 insertions(+), 3 deletions(-) create mode 100644 include/linux/ioctl_api_spec.h create mode 100644 kernel/api/ioctl_validation.c diff --git a/include/linux/ioctl_api_spec.h b/include/linux/ioctl_api_spec.h new file mode 100644 index 0000000000000..ab3337449ad77 --- /dev/null +++ b/include/linux/ioctl_api_spec.h @@ -0,0 +1,540 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ioctl_api_spec.h - IOCTL API specification framework + * + * Extends the kernel API specification framework to support ioctl validat= ion + * and documentation. + */ + +#ifndef _LINUX_IOCTL_API_SPEC_H +#define _LINUX_IOCTL_API_SPEC_H + +#include +#include +#include + +/* Forward declarations */ +struct file; + +/** + * struct kapi_ioctl_spec - IOCTL-specific API specification + * @api_spec: Base API specification + * @cmd: IOCTL command number + * @cmd_name: Human-readable command name + * @input_size: Size of input structure (0 if none) + * @output_size: Size of output structure (0 if none) + * @file_ops_name: Name of the file_operations structure + */ +struct kapi_ioctl_spec { + struct kernel_api_spec api_spec; + unsigned int cmd; + const char *cmd_name; + size_t input_size; + size_t output_size; + const char *file_ops_name; +}; + +/* Registry functions for IOCTL specifications */ +#ifdef CONFIG_KAPI_SPEC +int kapi_register_ioctl_spec(const struct kapi_ioctl_spec *spec); +void kapi_unregister_ioctl_spec(unsigned int cmd); +const struct kapi_ioctl_spec *kapi_get_ioctl_spec(unsigned int cmd); + +/* IOCTL validation functions */ +#ifdef CONFIG_KAPI_RUNTIME_CHECKS +int kapi_validate_ioctl(struct file *filp, unsigned int cmd, void __user *= arg); +int kapi_validate_ioctl_struct(const struct kapi_ioctl_spec *spec, + const void *data, size_t size); +#else +static inline int kapi_validate_ioctl(struct file *filp, unsigned int cmd, + void __user *arg) +{ + return 0; +} +#endif /* CONFIG_KAPI_RUNTIME_CHECKS */ + +#else /* !CONFIG_KAPI_SPEC */ +static inline int kapi_register_ioctl_spec(const struct kapi_ioctl_spec *s= pec) +{ + return 0; +} +static inline void kapi_unregister_ioctl_spec(unsigned int cmd) {} +static inline const struct kapi_ioctl_spec *kapi_get_ioctl_spec(unsigned i= nt cmd) +{ + return NULL; +} +#endif /* CONFIG_KAPI_SPEC */ + +/* Helper macros for IOCTL specification */ + +/** + * DEFINE_IOCTL_API_SPEC - Start an IOCTL API specification + * @name: Unique identifier for the specification + * @cmd: IOCTL command number + * @cmd_name_str: String name of the command + */ +#define DEFINE_IOCTL_API_SPEC(name, cmd, cmd_name_str) \ +static const struct kapi_ioctl_spec name##_spec =3D { \ + .cmd =3D cmd, \ + .cmd_name =3D cmd_name_str, \ + .api_spec =3D { \ + .name =3D #name, + +/** + * KAPI_IOCTL_SIZE - Specify input/output structure sizes + * @in_size: Size of input structure + * @out_size: Size of output structure + */ +#define KAPI_IOCTL_SIZE(in_size, out_size) \ + }, \ + .input_size =3D in_size, \ + .output_size =3D out_size, + +/** + * KAPI_IOCTL_FILE_OPS - Specify the file_operations structure name + * @ops_name: Name of the file_operations structure + */ +#define KAPI_IOCTL_FILE_OPS(ops_name) \ + .file_ops_name =3D #ops_name, + +/** + * Common IOCTL parameter specifications + */ +#define KAPI_IOCTL_PARAM_SIZE \ + KAPI_PARAM(0, "size", "__u32", "Size of the structure") \ + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) \ + .type =3D KAPI_TYPE_UINT, \ + .constraint_type =3D KAPI_CONSTRAINT_CUSTOM, \ + .constraints =3D "Must match sizeof(struct)", \ + KAPI_PARAM_END + +#define KAPI_IOCTL_PARAM_FLAGS \ + KAPI_PARAM(1, "flags", "__u32", "Feature flags") \ + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) \ + .type =3D KAPI_TYPE_UINT, \ + .constraint_type =3D KAPI_CONSTRAINT_MASK, \ + .valid_mask =3D 0, /* 0 means no flags currently */ \ + KAPI_PARAM_END + +/** + * KAPI_IOCTL_PARAM_USER_BUF - User buffer parameter + * @idx: Parameter index + * @name: Parameter name + * @desc: Parameter description + * @len_idx: Index of the length parameter + */ +#define KAPI_IOCTL_PARAM_USER_BUF(idx, name, desc, len_idx) \ + KAPI_PARAM(idx, name, "__aligned_u64", desc) \ + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER_PTR) \ + .type =3D KAPI_TYPE_USER_PTR, \ + .size_param_idx =3D len_idx, \ + KAPI_PARAM_END + +/** + * KAPI_IOCTL_PARAM_USER_OUT_BUF - User output buffer parameter + * @idx: Parameter index + * @name: Parameter name + * @desc: Parameter description + * @len_idx: Index of the length parameter + */ +#define KAPI_IOCTL_PARAM_USER_OUT_BUF(idx, name, desc, len_idx) \ + KAPI_PARAM(idx, name, "__aligned_u64", desc) \ + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT | KAPI_PARAM_USER_PTR) \ + .type =3D KAPI_TYPE_USER_PTR, \ + .size_param_idx =3D len_idx, \ + KAPI_PARAM_END + +/** + * KAPI_IOCTL_PARAM_LEN - Buffer length parameter + * @idx: Parameter index + * @name: Parameter name + * @desc: Parameter description + * @max_size: Maximum allowed size + */ +#define KAPI_IOCTL_PARAM_LEN(idx, name, desc, max_size) \ + KAPI_PARAM(idx, name, "__u32", desc) \ + KAPI_PARAM_FLAGS(KAPI_PARAM_INOUT) \ + .type =3D KAPI_TYPE_UINT, \ + .constraint_type =3D KAPI_CONSTRAINT_RANGE, \ + .min_value =3D 0, \ + .max_value =3D max_size, \ + KAPI_PARAM_END + +/* End the IOCTL specification */ +#define KAPI_IOCTL_END_SPEC \ +}; \ + \ +static int __init name##_spec_init(void) \ +{ \ + return kapi_register_ioctl_spec(&name##_spec); \ +} \ + \ +static void __exit name##_spec_exit(void) \ +{ \ + kapi_unregister_ioctl_spec(name##_spec.cmd); \ +} \ + \ +module_init(name##_spec_init); \ +module_exit(name##_spec_exit); + +/* Inline IOCTL specification support */ + +/* Forward declaration */ +struct fwctl_ucmd; + +/** + * struct kapi_ioctl_handler - IOCTL handler with inline specification + * @spec: IOCTL specification + * @handler: Original IOCTL handler function + */ +struct kapi_ioctl_handler { + struct kapi_ioctl_spec spec; + int (*handler)(struct fwctl_ucmd *ucmd); +}; + +/** + * DEFINE_IOCTL_HANDLER - Define an IOCTL handler with inline specification + * @name: Handler name + * @cmd: IOCTL command number + * @handler_func: Handler function + * @struct_type: Structure type for this IOCTL + * @last_field: Last field in the structure + */ +#define DEFINE_IOCTL_HANDLER(name, cmd, handler_func, struct_type, last_fi= eld) \ +static const struct kapi_ioctl_handler name =3D { \ + .spec =3D { \ + .cmd =3D cmd, \ + .cmd_name =3D #cmd, \ + .input_size =3D sizeof(struct_type), \ + .output_size =3D sizeof(struct_type), \ + .api_spec =3D { \ + .name =3D #name, + +#define KAPI_IOCTL_HANDLER_END \ + }, \ + }, \ + .handler =3D handler_func, \ +} + +/** + * kapi_ioctl_wrapper - Wrapper function for transparent IOCTL validation + * @filp: File pointer + * @cmd: IOCTL command + * @arg: User argument + * @real_ioctl: The real ioctl handler + * + * This wrapper performs validation before and after the actual IOCTL call + */ +static inline long kapi_ioctl_wrapper(struct file *filp, unsigned int cmd, + unsigned long arg, + long (*real_ioctl)(struct file *, unsigned int, unsigned long)) +{ + long ret; + +#ifdef CONFIG_KAPI_RUNTIME_CHECKS + /* Pre-validation */ + ret =3D kapi_validate_ioctl(filp, cmd, (void __user *)arg); + if (ret) + return ret; +#endif + + /* Call the real IOCTL handler */ + ret =3D real_ioctl(filp, cmd, arg); + +#ifdef CONFIG_KAPI_RUNTIME_CHECKS + /* Post-validation could be added here if needed */ + /* For example, validating output parameters */ +#endif + + return ret; +} + +/** + * KAPI_IOCTL_OPS - Define file_operations with transparent validation + * @name: Name of the file_operations structure + * @real_ioctl: The real ioctl handler function + * @... : Other file operation handlers + */ +#define KAPI_IOCTL_OPS(name, real_ioctl, ...) \ +static long name##_validated_ioctl(struct file *filp, unsigned int cmd, \ + unsigned long arg) \ +{ \ + return kapi_ioctl_wrapper(filp, cmd, arg, real_ioctl); \ +} \ + \ +static const struct file_operations name =3D { \ + .unlocked_ioctl =3D name##_validated_ioctl, \ + __VA_ARGS__ \ +} + +/** + * KAPI_IOCTL_OP_ENTRY - Define an IOCTL operation table entry with spec + * @_ioctl: IOCTL command macro + * @_handler: Handler structure (defined with DEFINE_IOCTL_HANDLER) + * @_struct: Structure type + * @_last: Last field name + */ +#define KAPI_IOCTL_OP_ENTRY(_ioctl, _handler, _struct, _last) \ + [_IOC_NR(_ioctl) - FWCTL_CMD_BASE] =3D { \ + .size =3D sizeof(_struct) + \ + BUILD_BUG_ON_ZERO(sizeof(union fwctl_ucmd_buffer) < \ + sizeof(_struct)), \ + .min_size =3D offsetofend(_struct, _last), \ + .ioctl_num =3D _ioctl, \ + .execute =3D _handler.handler, \ + } + +/* Helper to register all handlers in a module */ +#define KAPI_REGISTER_IOCTL_HANDLERS(handlers, count) \ +static int __init kapi_ioctl_handlers_init(void) \ +{ \ + int i, ret; \ + for (i =3D 0; i < count; i++) { \ + ret =3D kapi_register_ioctl_spec(&handlers[i].spec); \ + if (ret) { \ + while (--i >=3D 0) \ + kapi_unregister_ioctl_spec(handlers[i].spec.cmd); \ + return ret; \ + } \ + } \ + return 0; \ +} \ + \ +static void __exit kapi_ioctl_handlers_exit(void) \ +{ \ + int i; \ + for (i =3D 0; i < count; i++) \ + kapi_unregister_ioctl_spec(handlers[i].spec.cmd); \ +} \ + \ +module_init(kapi_ioctl_handlers_init); \ +module_exit(kapi_ioctl_handlers_exit) + +/** + * KAPI_REGISTER_IOCTL_SPECS - Register an array of IOCTL specifications + * @specs: Array of pointers to kapi_ioctl_spec + * @count: Number of specifications + * + * This macro generates init/exit functions to register/unregister + * the IOCTL specifications. The functions return 0 on success or + * negative error code on failure. + * + * Usage: + * static const struct kapi_ioctl_spec *my_ioctl_specs[] =3D { + * &spec1, &spec2, &spec3, + * }; + * KAPI_REGISTER_IOCTL_SPECS(my_ioctl_specs, ARRAY_SIZE(my_ioctl_specs)) + * + * Then call the generated functions in your module init/exit: + * ret =3D kapi_register_##name(); + * kapi_unregister_##name(); + */ +#define KAPI_REGISTER_IOCTL_SPECS(name, specs) \ +static int kapi_register_##name(void) \ +{ \ + int i, ret; \ + for (i =3D 0; i < ARRAY_SIZE(specs); i++) { \ + ret =3D kapi_register_ioctl_spec(specs[i]); \ + if (ret) { \ + pr_warn("Failed to register IOCTL spec for %s: %d\n", \ + specs[i]->cmd_name, ret); \ + while (--i >=3D 0) \ + kapi_unregister_ioctl_spec(specs[i]->cmd); \ + return ret; \ + } \ + } \ + pr_info("Registered %zu IOCTL specifications\n", \ + ARRAY_SIZE(specs)); \ + return 0; \ +} \ + \ +static void kapi_unregister_##name(void) \ +{ \ + int i; \ + for (i =3D 0; i < ARRAY_SIZE(specs); i++) \ + kapi_unregister_ioctl_spec(specs[i]->cmd); \ +} + +/** + * KAPI_DEFINE_IOCTL_SPEC - Define a single IOCTL specification + * @name: Name of the specification variable + * @cmd: IOCTL command number + * @cmd_name: String name of the command + * @in_size: Input structure size + * @out_size: Output structure size + * @fops_name: Name of the file_operations structure + * + * This macro starts the definition of an IOCTL specification. + * It must be followed by the API specification details and + * ended with KAPI_END_IOCTL_SPEC. + * + * Example: + * KAPI_DEFINE_IOCTL_SPEC(my_ioctl_spec, MY_IOCTL, "MY_IOCTL", + * sizeof(struct my_input), sizeof(struct my_outp= ut), + * "my_fops") + * KAPI_DESCRIPTION("Description here") + * ... + * KAPI_END_IOCTL_SPEC; + */ +#define KAPI_DEFINE_IOCTL_SPEC(name, cmd, cmd_name_str, in_size, out_size,= fops) \ +static const struct kapi_ioctl_spec name =3D { \ + .cmd =3D (cmd), \ + .cmd_name =3D cmd_name_str, \ + .input_size =3D in_size, \ + .output_size =3D out_size, \ + .file_ops_name =3D fops, \ + .api_spec =3D { \ + .name =3D #name, + +#define KAPI_END_IOCTL_SPEC \ + }, \ +} + +/** + * KAPI_IOCTL_SPEC_DRIVER - Complete IOCTL specification for a driver + * @driver_name: Name of the driver (used for logging) + * @specs_array: Name of the array containing IOCTL spec pointers + * + * This macro provides everything needed for IOCTL spec registration: + * 1. Generates the specs array declaration + * 2. Creates init/exit functions for registration + * 3. Provides simple function names to call from module init/exit + * + * Usage: + * // Define individual specs + * KAPI_DEFINE_IOCTL_SPEC(spec1, ...) ... KAPI_END_IOCTL_SPEC; + * KAPI_DEFINE_IOCTL_SPEC(spec2, ...) ... KAPI_END_IOCTL_SPEC; + * + * // Create the driver registration (at end of file) + * KAPI_IOCTL_SPEC_DRIVER("my_driver", { + * &spec1, + * &spec2, + * }) + * + * // In module init: ret =3D kapi_ioctl_specs_init(); + * // In module exit: kapi_ioctl_specs_exit(); + */ +#define KAPI_IOCTL_SPEC_DRIVER(driver_name, ...) \ +static const struct kapi_ioctl_spec *__kapi_ioctl_specs[] =3D __VA_ARGS__;= \ + \ +static int __init kapi_ioctl_specs_init(void) \ +{ \ + int i, ret; \ + for (i =3D 0; i < ARRAY_SIZE(__kapi_ioctl_specs); i++) { \ + ret =3D kapi_register_ioctl_spec(__kapi_ioctl_specs[i]); \ + if (ret) { \ + pr_warn("%s: Failed to register %s: %d\n", \ + driver_name, \ + __kapi_ioctl_specs[i]->cmd_name, ret); \ + while (--i >=3D 0) \ + kapi_unregister_ioctl_spec( \ + __kapi_ioctl_specs[i]->cmd); \ + return ret; \ + } \ + } \ + pr_info("%s: Registered %zu IOCTL specifications\n", \ + driver_name, ARRAY_SIZE(__kapi_ioctl_specs)); \ + return 0; \ +} \ + \ +static void kapi_ioctl_specs_exit(void) \ +{ \ + int i; \ + for (i =3D 0; i < ARRAY_SIZE(__kapi_ioctl_specs); i++) \ + kapi_unregister_ioctl_spec(__kapi_ioctl_specs[i]->cmd);\ +} + +/* Transparent IOCTL validation wrapper support */ + +#ifdef CONFIG_KAPI_RUNTIME_CHECKS + +/** + * struct kapi_fops_wrapper - Wrapper for file_operations with validation + * @real_fops: Original file_operations + * @wrapped_fops: Modified file_operations with validation wrapper + * @real_ioctl: Original unlocked_ioctl handler + */ +struct kapi_fops_wrapper { + const struct file_operations *real_fops; + const struct file_operations *wrapped_fops; + long (*real_ioctl)(struct file *, unsigned int, unsigned long); +}; + +/* Forward declarations */ +long kapi_ioctl_validation_wrapper(struct file *filp, unsigned int cmd, + unsigned long arg); +void kapi_register_wrapper(struct kapi_fops_wrapper *wrapper); + +/** + * kapi_wrap_file_operations - Wrap file_operations for transparent valida= tion + * @fops: Original file_operations to wrap + * + * This creates a wrapper that intercepts ioctl calls for validation. + * The wrapper is stored in a static variable in the calling module. + */ +#define kapi_wrap_file_operations(fops) \ +({ \ + static struct kapi_fops_wrapper __kapi_wrapper =3D { \ + .real_fops =3D &(fops), \ + }; \ + if (__kapi_wrapper.real_fops->unlocked_ioctl) { \ + __kapi_wrapper.wrapped_fops =3D (fops); \ + __kapi_wrapper.real_ioctl =3D (fops).unlocked_ioctl; \ + __kapi_wrapper.wrapped_fops.unlocked_ioctl =3D \ + kapi_ioctl_validation_wrapper; \ + &__kapi_wrapper.wrapped_fops; \ + } else { \ + &(fops); \ + } \ +}) + + +/** + * KAPI_DEFINE_FOPS - Define file_operations with automatic validation + * @name: Name of the file_operations structure + * @... : File operation handlers + * + * Usage: + * KAPI_DEFINE_FOPS(my_fops, + * .owner =3D THIS_MODULE, + * .open =3D my_open, + * .unlocked_ioctl =3D my_ioctl, + * ); + * + * Then in your module init, call: kapi_init_fops_##name() + */ +#define KAPI_DEFINE_FOPS(name, ...) \ +static const struct file_operations __kapi_real_##name =3D { \ + __VA_ARGS__ \ +}; \ +static struct file_operations __kapi_wrapped_##name; \ +static struct kapi_fops_wrapper __kapi_wrapper_##name; \ +static const struct file_operations *name; \ +static void kapi_init_fops_##name(void) \ +{ \ + if (__kapi_real_##name.unlocked_ioctl) { \ + __kapi_wrapped_##name =3D __kapi_real_##name; \ + __kapi_wrapper_##name.real_fops =3D &__kapi_real_##name; \ + __kapi_wrapper_##name.wrapped_fops =3D &__kapi_wrapped_##name; \ + __kapi_wrapper_##name.real_ioctl =3D \ + __kapi_real_##name.unlocked_ioctl; \ + __kapi_wrapped_##name.unlocked_ioctl =3D \ + kapi_ioctl_validation_wrapper; \ + kapi_register_wrapper(&__kapi_wrapper_##name); \ + name =3D &__kapi_wrapped_##name; \ + } else { \ + name =3D &__kapi_real_##name; \ + } \ +} + +#else /* !CONFIG_KAPI_RUNTIME_CHECKS */ + +/* When runtime checks are disabled, no wrapping occurs */ +#define kapi_wrap_file_operations(fops) (&(fops)) +#define KAPI_DEFINE_FOPS(name, ...) \ +static const struct file_operations name =3D { __VA_ARGS__ }; \ +static inline void kapi_init_fops_##name(void) {} + +#endif /* CONFIG_KAPI_RUNTIME_CHECKS */ + +#endif /* _LINUX_IOCTL_API_SPEC_H */ \ No newline at end of file diff --git a/include/linux/kernel_api_spec.h b/include/linux/kernel_api_spe= c.h index 04df5892bc6d6..9590fe3bb007c 100644 --- a/include/linux/kernel_api_spec.h +++ b/include/linux/kernel_api_spec.h @@ -849,7 +849,7 @@ struct kernel_api_spec { #define KAPI_PARAM_OUT (KAPI_PARAM_OUT) #define KAPI_PARAM_INOUT (KAPI_PARAM_IN | KAPI_PARAM_OUT) #define KAPI_PARAM_OPTIONAL (KAPI_PARAM_OPTIONAL) -#define KAPI_PARAM_USER_PTR (KAPI_PARAM_USER | KAPI_PARAM_PTR) +#define KAPI_PARAM_USER_PTR (KAPI_PARAM_USER) =20 /* Validation and runtime checking */ =20 diff --git a/kernel/api/Makefile b/kernel/api/Makefile index 07b8c007ec156..9d2daf38f0029 100644 --- a/kernel/api/Makefile +++ b/kernel/api/Makefile @@ -6,5 +6,8 @@ # Core API specification framework obj-$(CONFIG_KAPI_SPEC) +=3D kernel_api_spec.o =20 +# IOCTL validation framework +obj-$(CONFIG_KAPI_SPEC) +=3D ioctl_validation.o + # Debugfs interface for kernel API specs -obj-$(CONFIG_KAPI_SPEC_DEBUGFS) +=3D kapi_debugfs.o \ No newline at end of file +obj-$(CONFIG_KAPI_SPEC_DEBUGFS) +=3D kapi_debugfs.o diff --git a/kernel/api/ioctl_validation.c b/kernel/api/ioctl_validation.c new file mode 100644 index 0000000000000..25f6db8cb33eb --- /dev/null +++ b/kernel/api/ioctl_validation.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ioctl_validation.c - Runtime validation for IOCTL API specifications + * + * Provides functions to validate ioctl parameters against their specifica= tions + * at runtime when CONFIG_KAPI_RUNTIME_CHECKS is enabled. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_KAPI_RUNTIME_CHECKS + +/** + * kapi_validate_ioctl - Validate an ioctl call against its specification + * @filp: File pointer + * @cmd: IOCTL command + * @arg: IOCTL argument + * + * Return: 0 if valid, negative errno if validation fails + */ +int kapi_validate_ioctl(struct file *filp, unsigned int cmd, void __user *= arg) +{ + const struct kapi_ioctl_spec *spec; + const struct kernel_api_spec *api_spec; + void *data =3D NULL; + size_t copy_size; + int ret =3D 0; + int i; + + spec =3D kapi_get_ioctl_spec(cmd); + if (!spec) + return 0; /* No spec, can't validate */ + + api_spec =3D &spec->api_spec; + + pr_debug("kapi: validating ioctl %s (0x%x)\n", spec->cmd_name, cmd); + + /* Check if this ioctl requires specific capabilities */ + if (api_spec->param_count > 0) { + for (i =3D 0; i < api_spec->param_count; i++) { + const struct kapi_param_spec *param =3D &api_spec->params[i]; + + /* Check for capability requirements in constraints */ + if (param->constraint_type =3D=3D KAPI_CONSTRAINT_CUSTOM && + param->constraints[0] && strstr(param->constraints, "CAP_")) { + /* Could add capability checks here if needed */ + } + } + } + + /* For ioctls with input/output structures, copy and validate */ + if (spec->input_size > 0 || spec->output_size > 0) { + copy_size =3D max(spec->input_size, spec->output_size); + + /* Allocate temporary buffer for validation */ + data =3D kzalloc(copy_size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* Copy input data from user */ + if (spec->input_size > 0) { + ret =3D copy_from_user(data, arg, spec->input_size); + if (ret) { + ret =3D -EFAULT; + goto out; + } + } + + /* Validate structure fields */ + ret =3D kapi_validate_ioctl_struct(spec, data, copy_size); + if (ret) + goto out; + } + +out: + kfree(data); + return ret; +} +EXPORT_SYMBOL_GPL(kapi_validate_ioctl); + +/** + * struct field_offset - Maps structure fields to their offsets + * @field_idx: Parameter index + * @offset: Offset in structure + * @size: Size of field + */ +struct field_offset { + int field_idx; + size_t offset; + size_t size; +}; + +/* Common ioctl structure layouts */ +static const struct field_offset fwctl_info_offsets[] =3D { + {0, 0, sizeof(u32)}, /* size */ + {1, 4, sizeof(u32)}, /* flags */ + {2, 8, sizeof(u32)}, /* out_device_type */ + {3, 12, sizeof(u32)}, /* device_data_len */ + {4, 16, sizeof(u64)}, /* out_device_data */ +}; + +static const struct field_offset fwctl_rpc_offsets[] =3D { + {0, 0, sizeof(u32)}, /* size */ + {1, 4, sizeof(u32)}, /* scope */ + {2, 8, sizeof(u32)}, /* in_len */ + {3, 12, sizeof(u32)}, /* out_len */ + {4, 16, sizeof(u64)}, /* in */ + {5, 24, sizeof(u64)}, /* out */ +}; + +/** + * get_field_offsets - Get field offset information for an ioctl + * @cmd: IOCTL command + * @count: Returns number of fields + * + * Return: Array of field offsets or NULL + */ +static const struct field_offset *get_field_offsets(unsigned int cmd, int = *count) +{ + switch (cmd) { + case FWCTL_INFO: + *count =3D ARRAY_SIZE(fwctl_info_offsets); + return fwctl_info_offsets; + case FWCTL_RPC: + *count =3D ARRAY_SIZE(fwctl_rpc_offsets); + return fwctl_rpc_offsets; + default: + *count =3D 0; + return NULL; + } +} + +/** + * extract_field_value - Extract a field value from structure + * @data: Structure data + * @param: Parameter specification + * @offset_info: Field offset information + * + * Return: Field value or 0 on error + */ +static s64 extract_field_value(const void *data, + const struct kapi_param_spec *param, + const struct field_offset *offset_info) +{ + const void *field =3D data + offset_info->offset; + + switch (param->type) { + case KAPI_TYPE_UINT: + if (offset_info->size =3D=3D sizeof(u32)) + return *(u32 *)field; + else if (offset_info->size =3D=3D sizeof(u64)) + return *(u64 *)field; + break; + case KAPI_TYPE_INT: + if (offset_info->size =3D=3D sizeof(s32)) + return *(s32 *)field; + else if (offset_info->size =3D=3D sizeof(s64)) + return *(s64 *)field; + break; + case KAPI_TYPE_USER_PTR: + /* User pointers are typically u64 in ioctl structures */ + return (s64)(*(u64 *)field); + default: + break; + } + + return 0; +} + +/** + * kapi_validate_ioctl_struct - Validate an ioctl structure against specif= ication + * @spec: IOCTL specification + * @data: Structure data + * @size: Size of the structure + * + * Return: 0 if valid, negative errno if validation fails + */ +int kapi_validate_ioctl_struct(const struct kapi_ioctl_spec *spec, + const void *data, size_t size) +{ + const struct kernel_api_spec *api_spec =3D &spec->api_spec; + const struct field_offset *offsets; + int offset_count; + int i, j; + + if (!spec || !data) + return -EINVAL; + + /* Get field offset information for this ioctl */ + offsets =3D get_field_offsets(spec->cmd, &offset_count); + + /* Validate each parameter in the structure */ + for (i =3D 0; i < api_spec->param_count && i < KAPI_MAX_PARAMS; i++) { + const struct kapi_param_spec *param =3D &api_spec->params[i]; + const struct field_offset *offset_info =3D NULL; + s64 value; + + /* Find offset information for this parameter */ + if (offsets) { + for (j =3D 0; j < offset_count; j++) { + if (offsets[j].field_idx =3D=3D i) { + offset_info =3D &offsets[j]; + break; + } + } + } + + if (!offset_info) { + pr_debug("kapi: no offset info for param %d\n", i); + continue; + } + + /* Extract field value */ + value =3D extract_field_value(data, param, offset_info); + + /* Special handling for user pointers */ + if (param->type =3D=3D KAPI_TYPE_USER_PTR) { + /* Check if pointer looks valid (non-kernel address) */ + if (value && (value >=3D TASK_SIZE)) { + pr_warn("ioctl %s: parameter %s has kernel pointer %llx\n", + spec->cmd_name, param->name, value); + return -EINVAL; + } + + /* For size validation, check against size_param_idx */ + if (param->size_param_idx >=3D 0 && + param->size_param_idx < offset_count) { + const struct field_offset *size_offset =3D NULL; + + for (j =3D 0; j < offset_count; j++) { + if (offsets[j].field_idx =3D=3D param->size_param_idx) { + size_offset =3D &offsets[j]; + break; + } + } + + if (size_offset) { + s64 buf_size =3D extract_field_value(data, + &api_spec->params[param->size_param_idx], + size_offset); + + /* Validate buffer size constraints */ + if (buf_size > 0 && + !kapi_validate_param(&api_spec->params[param->size_param_idx], + buf_size)) { + pr_warn("ioctl %s: buffer size %lld invalid for %s\n", + spec->cmd_name, buf_size, param->name); + return -EINVAL; + } + } + } + } else { + /* Validate using the standard parameter validation */ + if (!kapi_validate_param(param, value)) { + pr_warn("ioctl %s: parameter %s validation failed (value=3D%lld)\n", + spec->cmd_name, param->name, value); + return -EINVAL; + } + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(kapi_validate_ioctl_struct); + +/* Global registry of wrappers - in real implementation this would be per-= module */ +static struct kapi_fops_wrapper *kapi_global_wrapper; + +/** + * kapi_register_wrapper - Register a wrapper (called from macro) + * @wrapper: Wrapper to register + */ +void kapi_register_wrapper(struct kapi_fops_wrapper *wrapper) +{ + /* Simple implementation - just store the last one */ + kapi_global_wrapper =3D wrapper; +} +EXPORT_SYMBOL_GPL(kapi_register_wrapper); + +/** + * kapi_find_wrapper - Find wrapper for given file_operations + * @fops: File operations structure to check + * + * Return: Wrapper structure or NULL if not wrapped + */ +static struct kapi_fops_wrapper *kapi_find_wrapper(const struct file_opera= tions *fops) +{ + /* Simple implementation - just return the global one if it matches */ + if (kapi_global_wrapper && kapi_global_wrapper->wrapped_fops =3D=3D fops) + return kapi_global_wrapper; + return NULL; +} + +/** + * kapi_ioctl_validation_wrapper - Wrapper function for transparent valida= tion + * @filp: File pointer + * @cmd: IOCTL command + * @arg: User argument + * + * This function is called instead of the real ioctl handler when validati= on + * is enabled. It performs pre-validation, calls the real handler, then do= es + * post-validation. + * + * Return: Result from the real ioctl handler or error + */ +long kapi_ioctl_validation_wrapper(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct kapi_fops_wrapper *wrapper; + const struct kapi_ioctl_spec *spec; + long ret; + + wrapper =3D kapi_find_wrapper(filp->f_op); + if (!wrapper || !wrapper->real_ioctl) + return -EINVAL; + + /* Pre-validation */ + spec =3D kapi_get_ioctl_spec(cmd); + if (spec) { + ret =3D kapi_validate_ioctl(filp, cmd, (void __user *)arg); + if (ret) + return ret; + } + + /* Call the real ioctl handler */ + ret =3D wrapper->real_ioctl(filp, cmd, arg); + + /* Post-validation - check return value against spec */ + if (spec && spec->api_spec.error_count > 0) { + /* Validate that returned error is in the spec */ + if (ret < 0) { + int i; + bool found =3D false; + for (i =3D 0; i < spec->api_spec.error_count; i++) { + if (ret =3D=3D spec->api_spec.errors[i].error_code) { + found =3D true; + break; + } + } + if (!found) { + pr_warn("IOCTL %s returned unexpected error %ld\n", + spec->cmd_name, ret); + } + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(kapi_ioctl_validation_wrapper); + +#endif /* CONFIG_KAPI_RUNTIME_CHECKS */ diff --git a/kernel/api/kernel_api_spec.c b/kernel/api/kernel_api_spec.c index 29c0c84d87f7c..70e16a49f5dbe 100644 --- a/kernel/api/kernel_api_spec.c +++ b/kernel/api/kernel_api_spec.c @@ -1166,4 +1166,92 @@ static int __init kapi_debugfs_init(void) =20 late_initcall(kapi_debugfs_init); =20 -#endif /* CONFIG_DEBUG_FS */ \ No newline at end of file +#endif /* CONFIG_DEBUG_FS */ + +/* IOCTL specification registry */ +#ifdef CONFIG_KAPI_SPEC + +#include + +static DEFINE_MUTEX(ioctl_spec_mutex); +static LIST_HEAD(ioctl_specs); + +struct ioctl_spec_entry { + struct list_head list; + const struct kapi_ioctl_spec *spec; +}; + +/** + * kapi_register_ioctl_spec - Register an IOCTL API specification + * @spec: IOCTL specification to register + * + * Return: 0 on success, negative error code on failure + */ +int kapi_register_ioctl_spec(const struct kapi_ioctl_spec *spec) +{ + struct ioctl_spec_entry *entry; + + if (!spec || !spec->cmd_name) + return -EINVAL; + + entry =3D kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->spec =3D spec; + + mutex_lock(&ioctl_spec_mutex); + list_add_tail(&entry->list, &ioctl_specs); + mutex_unlock(&ioctl_spec_mutex); + + pr_debug("Registered IOCTL spec: %s (0x%x)\n", spec->cmd_name, spec->cmd); + return 0; +} +EXPORT_SYMBOL_GPL(kapi_register_ioctl_spec); + +/** + * kapi_unregister_ioctl_spec - Unregister an IOCTL API specification + * @cmd: IOCTL command number to unregister + */ +void kapi_unregister_ioctl_spec(unsigned int cmd) +{ + struct ioctl_spec_entry *entry, *tmp; + + mutex_lock(&ioctl_spec_mutex); + list_for_each_entry_safe(entry, tmp, &ioctl_specs, list) { + if (entry->spec->cmd =3D=3D cmd) { + list_del(&entry->list); + kfree(entry); + pr_debug("Unregistered IOCTL spec for cmd 0x%x\n", cmd); + break; + } + } + mutex_unlock(&ioctl_spec_mutex); +} +EXPORT_SYMBOL_GPL(kapi_unregister_ioctl_spec); + +/** + * kapi_get_ioctl_spec - Retrieve IOCTL specification by command number + * @cmd: IOCTL command number + * + * Return: Pointer to the specification or NULL if not found + */ +const struct kapi_ioctl_spec *kapi_get_ioctl_spec(unsigned int cmd) +{ + struct ioctl_spec_entry *entry; + const struct kapi_ioctl_spec *spec =3D NULL; + + mutex_lock(&ioctl_spec_mutex); + list_for_each_entry(entry, &ioctl_specs, list) { + if (entry->spec->cmd =3D=3D cmd) { + spec =3D entry->spec; + break; + } + } + mutex_unlock(&ioctl_spec_mutex); + + return spec; +} +EXPORT_SYMBOL_GPL(kapi_get_ioctl_spec); + +#endif /* CONFIG_KAPI_SPEC */ --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AB0672EA47A; Sat, 14 Jun 2025 13:49:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908956; cv=none; b=SutzRmDrvb6XHs2aYlFkuCXaZqcycMLcXvFiKAm8O0HF+oTR48YG8laV+w187colXsqcVCAp6p7g5Gvl70jWAhymX38yPesyURVoXbfqboGvEU5LFMUO6UPMduZbexvwCRkSq75W/FaCI+wd8iISo8bAM/OoD+A9NUqpnF1amjM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908956; c=relaxed/simple; bh=MSsLjvVcoT+7z7CV8XQN9Se7pCyds4GF+lbNqQEVNZs=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=cBFxseFDBlCPbSD+RYLFSKavLAjxnLzZdSAFibSP6J25U4KLmXY93nEagoE15wRUPfld61Hrho8P9c3O1dynplJ4aWoyvwJOksaa6lo1ckiJVuHcSNfeDLJ6EQ125QqScm+Z6iCHYi24cDBGqYFKLTrz6o7onedv3xGK2/S4h1I= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=pTDpGcSb; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="pTDpGcSb" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E75E5C4CEEB; Sat, 14 Jun 2025 13:49:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908956; bh=MSsLjvVcoT+7z7CV8XQN9Se7pCyds4GF+lbNqQEVNZs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=pTDpGcSbyfqDa7BpCN3gOZ16M9+CEy3zHIhgcK1ES2WlRNZ7kVAIObB8DpsQAPO1e b90xd5+braap/yZePx6VIc0ITjm7EFesP00z59JwgWdv0k2A3DRtTuxpcZF0e4fQtE cme+zs9dlsIH2dpkbDuc8ZAejy50utMF4sGgR9loViLHss4UWe6L88Id5Dnd8GoTtW uMGSVC80s/OW+hJlntBiiPR5u6GUZuGBK3UE5hb+vOJwgvxWbipo6FwUhN9ZvcA1w6 2HdzMw/luFxVVNdRKYfDFuAOHYzwnaRyTNka9INAzaJkcjGbKlxunqav7mrXQN8g3W 3Vao7WMyeKMJQ== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 17/19] fwctl: add detailed IOCTL API specifications Date: Sat, 14 Jun 2025 09:48:56 -0400 Message-Id: <20250614134858.790460-18-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specifications to the fwctl driver using the IOCTL specification framework. This provides detailed documentation and enables runtime validation of the fwctl IOCTL interface. Signed-off-by: Sasha Levin --- drivers/fwctl/main.c | 295 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 293 insertions(+), 2 deletions(-) diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c index bc6378506296c..fc85d54ecb6a0 100644 --- a/drivers/fwctl/main.c +++ b/drivers/fwctl/main.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include =20 #include =20 @@ -261,13 +263,291 @@ static int fwctl_fops_release(struct inode *inode, s= truct file *filp) return 0; } =20 -static const struct file_operations fwctl_fops =3D { +/* Use KAPI_DEFINE_FOPS for automatic validation wrapping */ +KAPI_DEFINE_FOPS(fwctl_fops, .owner =3D THIS_MODULE, .open =3D fwctl_fops_open, .release =3D fwctl_fops_release, .unlocked_ioctl =3D fwctl_fops_ioctl, +); + +/* IOCTL API Specifications */ + +static const struct kapi_ioctl_spec fwctl_info_spec =3D { + .cmd =3D FWCTL_INFO, + .cmd_name =3D "FWCTL_INFO", + .input_size =3D sizeof(struct fwctl_info), + .output_size =3D sizeof(struct fwctl_info), + .file_ops_name =3D "fwctl_fops", + .api_spec =3D { + .name =3D "fwctl_info", + KAPI_DESCRIPTION("Query device information and capabilities") + KAPI_LONG_DESC("Returns basic information about the fwctl instance, " + "including the device type and driver-specific data. " + "The driver-specific data format depends on the device type.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_IOCTL_PARAM_SIZE + KAPI_IOCTL_PARAM_FLAGS + + KAPI_PARAM(2, "out_device_type", "__u32", "Device type from enum fwctl_de= vice_type") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_ENUM, + .enum_values =3D (const s64[]){FWCTL_DEVICE_TYPE_ERROR, + FWCTL_DEVICE_TYPE_MLX5, + FWCTL_DEVICE_TYPE_CXL, + FWCTL_DEVICE_TYPE_PDS}, + .enum_count =3D 4, + KAPI_PARAM_END + + KAPI_PARAM(3, "device_data_len", "__u32", "Length of device data buffer") + KAPI_PARAM_FLAGS(KAPI_PARAM_INOUT) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D SZ_1M, /* Reasonable limit for device info */ + KAPI_PARAM_END + + KAPI_IOCTL_PARAM_USER_OUT_BUF(4, "out_device_data", + "Driver-specific device data", 3) + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EFAULT, -EOPNOTSUPP, -ENODEV}, + .error_count =3D 3, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EFAULT, "EFAULT", "Failed to copy data to/from user space", + "Check that provided pointers are valid user space addresses") + KAPI_ERROR(1, -EOPNOTSUPP, "EOPNOTSUPP", "Invalid flags provided", + "Currently flags must be 0") + KAPI_ERROR(2, -ENODEV, "ENODEV", "Device has been hot-unplugged", + "The underlying device is no longer available") + + .error_count =3D 3, + .param_count =3D 5, + .since_version =3D "6.13", + + /* Structure specifications */ + KAPI_STRUCT_SPEC(0, fwctl_info, "Device information query structure") + KAPI_STRUCT_SIZE(sizeof(struct fwctl_info), __alignof__(struct fwctl_inf= o)) + KAPI_STRUCT_FIELD_COUNT(4) + + KAPI_STRUCT_FIELD(0, "size", KAPI_TYPE_UINT, "__u32", + "Structure size for versioning") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_info, size)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(1, "flags", KAPI_TYPE_UINT, "__u32", + "Must be 0, reserved for future use") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_info, flags)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_FIELD_CONSTRAINT_RANGE(0, 0) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(2, "out_device_type", KAPI_TYPE_UINT, "__u32", + "Device type identifier") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_info, out_device_type)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(3, "device_data_len", KAPI_TYPE_UINT, "__u32", + "Length of device-specific data") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_info, device_data_len)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_STRUCT_FIELD_END + KAPI_STRUCT_SPEC_END + + KAPI_STRUCT_SPEC_COUNT(1) + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_NONE, + "none", + "Read-only operation with no side effects") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(1) + + /* State transitions */ + KAPI_STATE_TRANS_COUNT(0) /* No state transitions for query operation */ + }, }; =20 +static const struct kapi_ioctl_spec fwctl_rpc_spec =3D { + .cmd =3D FWCTL_RPC, + .cmd_name =3D "FWCTL_RPC", + .input_size =3D sizeof(struct fwctl_rpc), + .output_size =3D sizeof(struct fwctl_rpc), + .file_ops_name =3D "fwctl_fops", + .api_spec =3D { + .name =3D "fwctl_rpc", + KAPI_DESCRIPTION("Execute a Remote Procedure Call to device firmware") + KAPI_LONG_DESC("Delivers an RPC to the device firmware and returns the re= sponse. " + "The RPC format is device-specific and determined by out_device_t= ype " + "from FWCTL_INFO. Different scopes have different permission requ= irements.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_IOCTL_PARAM_SIZE + + KAPI_PARAM(1, "scope", "__u32", "Access scope from enum fwctl_rpc_scope") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_ENUM, + .enum_values =3D (const s64[]){FWCTL_RPC_CONFIGURATION, + FWCTL_RPC_DEBUG_READ_ONLY, + FWCTL_RPC_DEBUG_WRITE, + FWCTL_RPC_DEBUG_WRITE_FULL}, + .enum_count =3D 4, + .constraints =3D "FWCTL_RPC_DEBUG_WRITE_FULL requires CAP_SYS_RAWIO", + KAPI_PARAM_END + + KAPI_PARAM(2, "in_len", "__u32", "Length of input buffer") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D MAX_RPC_LEN, + KAPI_PARAM_END + + KAPI_PARAM(3, "out_len", "__u32", "Length of output buffer") + KAPI_PARAM_FLAGS(KAPI_PARAM_INOUT) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D MAX_RPC_LEN, + KAPI_PARAM_END + + KAPI_IOCTL_PARAM_USER_BUF(4, "in", "RPC request in device-specific format= ", 2) + KAPI_IOCTL_PARAM_USER_OUT_BUF(5, "out", "RPC response in device-specific = format", 3) + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EMSGSIZE, -EOPNOTSUPP, -EPERM, + -ENOMEM, -EFAULT, -ENODEV}, + .error_count =3D 6, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EMSGSIZE, "EMSGSIZE", "RPC message too large", + "in_len or out_len exceeds MAX_RPC_LEN (2MB)") + KAPI_ERROR(1, -EOPNOTSUPP, "EOPNOTSUPP", "Invalid scope value", + "scope must be one of the defined fwctl_rpc_scope values") + KAPI_ERROR(2, -EPERM, "EPERM", "Insufficient permissions", + "FWCTL_RPC_DEBUG_WRITE_FULL requires CAP_SYS_RAWIO") + KAPI_ERROR(3, -ENOMEM, "ENOMEM", "Memory allocation failed", + "Unable to allocate buffers for RPC") + KAPI_ERROR(4, -EFAULT, "EFAULT", "Failed to copy data to/from user space", + "Check that provided pointers are valid user space addresses") + KAPI_ERROR(5, -ENODEV, "ENODEV", "Device has been hot-unplugged", + "The underlying device is no longer available") + + .error_count =3D 6, + .param_count =3D 6, + .since_version =3D "6.13", + .notes =3D "FWCTL_RPC_DEBUG_WRITE and FWCTL_RPC_DEBUG_WRITE_FULL will " + "taint the kernel with TAINT_FWCTL on first use", + + /* Structure specifications */ + KAPI_STRUCT_SPEC(0, fwctl_rpc, "RPC request/response structure") + KAPI_STRUCT_SIZE(sizeof(struct fwctl_rpc), __alignof__(struct fwctl_rpc)) + KAPI_STRUCT_FIELD_COUNT(6) + + KAPI_STRUCT_FIELD(0, "size", KAPI_TYPE_UINT, "__u32", + "Structure size for versioning") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_rpc, size)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(1, "scope", KAPI_TYPE_UINT, "__u32", + "Access scope level") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_rpc, scope)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_FIELD_CONSTRAINT_ENUM((const s64[]){FWCTL_RPC_CONFIGURATION, + FWCTL_RPC_DEBUG_READ_ONLY, + FWCTL_RPC_DEBUG_WRITE, + FWCTL_RPC_DEBUG_WRITE_FULL}, 4) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(2, "in_len", KAPI_TYPE_UINT, "__u32", + "Input data length") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_rpc, in_len)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(3, "out_len", KAPI_TYPE_UINT, "__u32", + "Output buffer length") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_rpc, out_len)) + KAPI_FIELD_SIZE(sizeof(__u32)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(4, "in", KAPI_TYPE_PTR, "__aligned_u64", + "Pointer to input data") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_rpc, in)) + KAPI_FIELD_SIZE(sizeof(__aligned_u64)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(5, "out", KAPI_TYPE_PTR, "__aligned_u64", + "Pointer to output buffer") + KAPI_FIELD_OFFSET(offsetof(struct fwctl_rpc, out)) + KAPI_FIELD_SIZE(sizeof(__aligned_u64)) + KAPI_STRUCT_FIELD_END + KAPI_STRUCT_SPEC_END + + KAPI_STRUCT_SPEC_COUNT(1) + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_HARDWARE | KAPI_EFFECT_MODIFY_STATE, + "device firmware", + "May modify device configuration or firmware state") + KAPI_EFFECT_CONDITION("scope >=3D FWCTL_RPC_DEBUG_WRITE") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE, + "kernel taint", + "Taints kernel with TAINT_FWCTL on first debug write") + KAPI_EFFECT_CONDITION("scope >=3D FWCTL_RPC_DEBUG_WRITE && first use") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_SCHEDULE, + "process", + "May block while firmware processes the RPC") + KAPI_EFFECT_CONDITION("firmware operation takes time") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(3) + + /* State transitions */ + KAPI_STATE_TRANS(0, "device state", + "current configuration", "modified configuration", + "Device configuration changed by RPC command") + KAPI_STATE_TRANS_COND("RPC modifies device settings") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "kernel taint state", + "untainted", "TAINT_FWCTL set", + "Kernel marked as tainted due to firmware modification") + KAPI_STATE_TRANS_COND("First debug write operation") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(2) + }, +}; + +/* Register all fwctl IOCTL specifications */ +KAPI_IOCTL_SPEC_DRIVER("fwctl", { + &fwctl_info_spec, + &fwctl_rpc_spec, +}) + static void fwctl_device_release(struct device *device) { struct fwctl_device *fwctl =3D @@ -325,7 +605,7 @@ struct fwctl_device *_fwctl_alloc_device(struct device = *parent, if (!fwctl) return NULL; =20 - cdev_init(&fwctl->cdev, &fwctl_fops); + cdev_init(&fwctl->cdev, fwctl_fops); /* * The driver module is protected by fwctl_register/unregister(), * unregister won't complete until we are done with the driver's module. @@ -395,6 +675,9 @@ static int __init fwctl_init(void) { int ret; =20 + /* Initialize the wrapped file_operations */ + kapi_init_fops_fwctl_fops(); + ret =3D alloc_chrdev_region(&fwctl_dev, 0, FWCTL_MAX_DEVICES, "fwctl"); if (ret) return ret; @@ -402,8 +685,15 @@ static int __init fwctl_init(void) ret =3D class_register(&fwctl_class); if (ret) goto err_chrdev; + + ret =3D kapi_ioctl_specs_init(); + if (ret) + goto err_class; + return 0; =20 +err_class: + class_unregister(&fwctl_class); err_chrdev: unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES); return ret; @@ -411,6 +701,7 @@ static int __init fwctl_init(void) =20 static void __exit fwctl_exit(void) { + kapi_ioctl_specs_exit(); class_unregister(&fwctl_class); unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES); } --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 930C12EACE4; Sat, 14 Jun 2025 13:49:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908957; cv=none; b=R6//Mkbpc3nAxNj2AHY41iZcTT+6/t2nmcqSjBy3HatT9of/2mkhKI4iwIth1fiBOhIvWlP4uxr7XhHcUCJ/+25Vl/dWfx9kbZUqL5LYZZvhjl9iU/2MuroBGYkZCHwh6ioa8NjZELInSNJ4JjDMmaX/Or4ht6hahSF57MZ4wzQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908957; c=relaxed/simple; bh=xpm4SrrZ3ZJ0RDo4NgssIWkYzLDT+qKUSusys7hGU00=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=S3n6Vwpen301Yq6dBxwtq2Fwat9ecBJzyu50B6XNQ8gSSLvJAUS4fFwaJntHXIzKvrvuCYiftqq34E6d7VGYJe0J6cCYXSqahk8c8cAktHfYZgCdAuDarHRR0YSEapcKQGbxfbmPEKSF6atVSEqtRgf8ir3j5v1i17r61mYDBro= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=iSLwpMe+; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="iSLwpMe+" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D1D28C4CEEB; Sat, 14 Jun 2025 13:49:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908957; bh=xpm4SrrZ3ZJ0RDo4NgssIWkYzLDT+qKUSusys7hGU00=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=iSLwpMe+c9grQy2zeI4BGb5WU9CWPXW37xmkpxdJDjTy1h4iSWx8JTOtc4kEYeJo9 iDbLKpCqShSy5OWO7WkfJq0cZ/InF9dehRFlma/Jo50OI1krLccuJ+ieXrT15UfyeM ezCZRpSdgSaF9fMK6TXo/1QoHzS8FEUo9C6Px48HEwLHxWYICyFMf+Q4H9IahRksGQ mBTbs6JL6d6UYBtL9gIU7yFfXphP/H/nC0W1RnRCNCGSxhr0NME2XGUDDRD/lxU8Dc 4aWe96Hlvt1V/YX39WwZe3hLtaVjNZEW8gBY8a1HhPpwtbiNdMAbEZP6JeJVyKpaOH Ykbm5gMRma6HQ== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 18/19] binder: add detailed IOCTL API specifications Date: Sat, 14 Jun 2025 09:48:57 -0400 Message-Id: <20250614134858.790460-19-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add kernel API specifications to the binder driver using the IOCTL specification framework. This provides detailed documentation and enables runtime validation of all binder IOCTL interfaces. Signed-off-by: Sasha Levin --- drivers/android/binder.c | 758 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 758 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c463ca4a8fff8..975f07216724b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -67,6 +67,8 @@ #include #include #include +#include +#include =20 #include =20 @@ -6930,6 +6932,7 @@ static int transaction_log_show(struct seq_file *m, v= oid *unused) return 0; } =20 +/* Define the actual binder_fops structure */ const struct file_operations binder_fops =3D { .owner =3D THIS_MODULE, .poll =3D binder_poll, @@ -6941,6 +6944,751 @@ const struct file_operations binder_fops =3D { .release =3D binder_release, }; =20 +/* Define wrapper for KAPI validation */ +#ifdef CONFIG_KAPI_RUNTIME_CHECKS +static struct file_operations __kapi_wrapped_binder_fops; +static struct kapi_fops_wrapper __kapi_wrapper_binder_fops; + +static void kapi_init_fops_binder_fops(void) +{ + if (binder_fops.unlocked_ioctl) { + __kapi_wrapped_binder_fops =3D binder_fops; + __kapi_wrapper_binder_fops.real_fops =3D &binder_fops; + __kapi_wrapper_binder_fops.wrapped_fops =3D &__kapi_wrapped_binder_fops; + __kapi_wrapper_binder_fops.real_ioctl =3D binder_fops.unlocked_ioctl; + __kapi_wrapped_binder_fops.unlocked_ioctl =3D kapi_ioctl_validation_wrap= per; + kapi_register_wrapper(&__kapi_wrapper_binder_fops); + } +} +#else +static inline void kapi_init_fops_binder_fops(void) {} +#endif + +/* IOCTL API Specifications for Binder */ + +static const struct kapi_ioctl_spec binder_write_read_spec =3D { + .cmd =3D BINDER_WRITE_READ, + .cmd_name =3D "BINDER_WRITE_READ", + .input_size =3D sizeof(struct binder_write_read), + .output_size =3D sizeof(struct binder_write_read), + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_write_read", + KAPI_DESCRIPTION("Perform read/write operations on binder") + KAPI_LONG_DESC("Main workhorse of binder IPC. Allows writing commands to " + "binder driver and reading responses. Commands are encoded " + "in a special protocol format. Both read and write operations " + "can be performed in a single ioctl call.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "write_size", "binder_size_t", "Bytes to write") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D SZ_4M, /* Reasonable limit for IPC */ + KAPI_PARAM_END + + KAPI_PARAM(1, "write_consumed", "binder_size_t", "Bytes consumed by drive= r") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D SZ_4M, + KAPI_PARAM_END + + KAPI_PARAM(2, "write_buffer", "binder_uintptr_t", "User buffer with comma= nds") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + .size_param_idx =3D 0, + KAPI_PARAM_END + + KAPI_PARAM(3, "read_size", "binder_size_t", "Bytes to read") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D SZ_4M, + KAPI_PARAM_END + + KAPI_PARAM(4, "read_consumed", "binder_size_t", "Bytes consumed by driver= ") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D SZ_4M, + KAPI_PARAM_END + + KAPI_PARAM(5, "read_buffer", "binder_uintptr_t", "User buffer for respons= es") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT | KAPI_PARAM_USER) + .type =3D KAPI_TYPE_USER_PTR, + .size_param_idx =3D 3, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EFAULT, -EINVAL, -EAGAIN, -EINTR, + -ENOMEM, -ECONNREFUSED}, + .error_count =3D 6, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EFAULT, "EFAULT", "Failed to copy data to/from user space", + "Check buffer pointers are valid user space addresses") + KAPI_ERROR(1, -EINVAL, "EINVAL", "Invalid parameters", + "Buffer sizes or commands are invalid") + KAPI_ERROR(2, -EAGAIN, "EAGAIN", "Try again", + "Non-blocking read with no data available") + KAPI_ERROR(3, -EINTR, "EINTR", "Interrupted by signal", + "Operation interrupted, should be retried") + KAPI_ERROR(4, -ENOMEM, "ENOMEM", "Out of memory", + "Unable to allocate memory for operation") + KAPI_ERROR(5, -ECONNREFUSED, "ECONNREFUSED", "Connection refused", + "Process is being destroyed, no further operations allowed") + + .error_count =3D 6, + .param_count =3D 6, + .since_version =3D "3.0", + .notes =3D "This is the primary interface for binder IPC. Most other " + "ioctls are for configuration and management.", + + /* Structure specifications */ + KAPI_STRUCT_SPEC(0, binder_write_read, "Read/write operation structure") + KAPI_STRUCT_SIZE(sizeof(struct binder_write_read), __alignof__(struct bi= nder_write_read)) + KAPI_STRUCT_FIELD_COUNT(6) + + KAPI_STRUCT_FIELD(0, "write_size", KAPI_TYPE_UINT, "binder_size_t", + "Number of bytes to write") + KAPI_FIELD_OFFSET(offsetof(struct binder_write_read, write_size)) + KAPI_FIELD_SIZE(sizeof(binder_size_t)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(1, "write_consumed", KAPI_TYPE_UINT, "binder_size_t", + "Number of bytes consumed by driver") + KAPI_FIELD_OFFSET(offsetof(struct binder_write_read, write_consumed)) + KAPI_FIELD_SIZE(sizeof(binder_size_t)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(2, "write_buffer", KAPI_TYPE_PTR, "binder_uintptr_t", + "Pointer to write buffer") + KAPI_FIELD_OFFSET(offsetof(struct binder_write_read, write_buffer)) + KAPI_FIELD_SIZE(sizeof(binder_uintptr_t)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(3, "read_size", KAPI_TYPE_UINT, "binder_size_t", + "Number of bytes to read") + KAPI_FIELD_OFFSET(offsetof(struct binder_write_read, read_size)) + KAPI_FIELD_SIZE(sizeof(binder_size_t)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(4, "read_consumed", KAPI_TYPE_UINT, "binder_size_t", + "Number of bytes consumed by driver") + KAPI_FIELD_OFFSET(offsetof(struct binder_write_read, read_consumed)) + KAPI_FIELD_SIZE(sizeof(binder_size_t)) + KAPI_STRUCT_FIELD_END + + KAPI_STRUCT_FIELD(5, "read_buffer", KAPI_TYPE_PTR, "binder_uintptr_t", + "Pointer to read buffer") + KAPI_FIELD_OFFSET(offsetof(struct binder_write_read, read_buffer)) + KAPI_FIELD_SIZE(sizeof(binder_uintptr_t)) + KAPI_STRUCT_FIELD_END + KAPI_STRUCT_SPEC_END + + KAPI_STRUCT_SPEC_COUNT(1) + + /* Side effects */ + KAPI_SIDE_EFFECT(0, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_NETWORK, + "binder transaction queue", + "Enqueues transactions or commands to target process") + KAPI_EFFECT_CONDITION("write_size > 0") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(1, KAPI_EFFECT_MODIFY_STATE | KAPI_EFFECT_SCHEDULE, + "process state", + "May block waiting for incoming transactions") + KAPI_EFFECT_CONDITION("read_size > 0 && no data available") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(2, KAPI_EFFECT_RESOURCE_CREATE, + "binder nodes/refs", + "May create or destroy binder nodes and references") + KAPI_EFFECT_CONDITION("specific commands") + KAPI_EFFECT_REVERSIBLE + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT(3, KAPI_EFFECT_SIGNAL_SEND, + "target process", + "May trigger death notifications to linked processes") + KAPI_EFFECT_CONDITION("death notification") + KAPI_SIDE_EFFECT_END + + KAPI_SIDE_EFFECT_COUNT(4) + + /* State transitions */ + KAPI_STATE_TRANS(0, "transaction", + "pending in sender", "queued in target", + "Transaction moves from sender to target's queue") + KAPI_STATE_TRANS_COND("BC_TRANSACTION command") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(1, "thread state", + "running", "waiting for work", + "Thread blocks waiting for incoming transactions") + KAPI_STATE_TRANS_COND("read with no work available") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS(2, "binder ref", + "active", "released", + "Reference count decremented, may trigger cleanup") + KAPI_STATE_TRANS_COND("BC_RELEASE command") + KAPI_STATE_TRANS_END + + KAPI_STATE_TRANS_COUNT(3) + }, +}; + +static const struct kapi_ioctl_spec binder_set_max_threads_spec =3D { + .cmd =3D BINDER_SET_MAX_THREADS, + .cmd_name =3D "BINDER_SET_MAX_THREADS", + .input_size =3D sizeof(__u32), + .output_size =3D 0, + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_set_max_threads", + KAPI_DESCRIPTION("Set maximum number of binder threads") + KAPI_LONG_DESC("Sets the maximum number of threads that the binder driver= " + "will request this process to spawn for handling incoming " + "transactions. The driver sends BR_SPAWN_LOOPER when it needs " + "more threads.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "max_threads", "__u32", "Maximum number of threads") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D INT_MAX, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EINVAL, -EFAULT}, + .error_count =3D 2, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid thread count", + "Thread count exceeds system limits") + KAPI_ERROR(1, -EFAULT, "EFAULT", "Failed to copy from user", + "Invalid user pointer provided") + + .error_count =3D 2, + .param_count =3D 1, + .since_version =3D "3.0", + }, +}; + +static const struct kapi_ioctl_spec binder_set_context_mgr_spec =3D { + .cmd =3D BINDER_SET_CONTEXT_MGR, + .cmd_name =3D "BINDER_SET_CONTEXT_MGR", + .input_size =3D 0, + .output_size =3D 0, + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_set_context_mgr", + KAPI_DESCRIPTION("Become the context manager (handle 0)") + KAPI_LONG_DESC("Registers the calling process as the context manager for " + "this binder domain. The context manager has special handle 0 " + "and typically implements the service manager. Only one process " + "per binder domain can be the context manager.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EBUSY, -EPERM, -ENOMEM}, + .error_count =3D 3, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EBUSY, "EBUSY", "Context manager already set", + "Another process is already the context manager") + KAPI_ERROR(1, -EPERM, "EPERM", "Permission denied", + "Caller lacks permission or wrong UID") + KAPI_ERROR(2, -ENOMEM, "ENOMEM", "Out of memory", + "Unable to allocate context manager node") + + .error_count =3D 3, + .param_count =3D 0, + .since_version =3D "3.0", + .notes =3D "Requires CAP_SYS_NICE or proper SELinux permissions", + }, +}; + +static const struct kapi_ioctl_spec binder_set_context_mgr_ext_spec =3D { + .cmd =3D BINDER_SET_CONTEXT_MGR_EXT, + .cmd_name =3D "BINDER_SET_CONTEXT_MGR_EXT", + .input_size =3D sizeof(struct flat_binder_object), + .output_size =3D 0, + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_set_context_mgr_ext", + KAPI_DESCRIPTION("Become context manager with extended info") + KAPI_LONG_DESC("Extended version of BINDER_SET_CONTEXT_MGR that allows " + "specifying additional properties of the context manager " + "through a flat_binder_object structure.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "object", "struct flat_binder_object", "Context manager pro= perties") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_STRUCT, + .size =3D sizeof(struct flat_binder_object), + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EINVAL, -EFAULT, -EBUSY, -EPERM, -ENOME= M}, + .error_count =3D 5, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid parameters", + "Invalid flat_binder_object structure") + KAPI_ERROR(1, -EFAULT, "EFAULT", "Failed to copy from user", + "Invalid user pointer provided") + KAPI_ERROR(2, -EBUSY, "EBUSY", "Context manager already set", + "Another process is already the context manager") + KAPI_ERROR(3, -EPERM, "EPERM", "Permission denied", + "Caller lacks permission or wrong UID") + KAPI_ERROR(4, -ENOMEM, "ENOMEM", "Out of memory", + "Unable to allocate context manager node") + + .error_count =3D 5, + .param_count =3D 1, + .since_version =3D "4.14", + }, +}; + +static const struct kapi_ioctl_spec binder_thread_exit_spec =3D { + .cmd =3D BINDER_THREAD_EXIT, + .cmd_name =3D "BINDER_THREAD_EXIT", + .input_size =3D 0, + .output_size =3D 0, + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_thread_exit", + KAPI_DESCRIPTION("Exit binder thread") + KAPI_LONG_DESC("Notifies the binder driver that this thread is exiting. " + "The driver will clean up any pending transactions and " + "remove the thread from the thread pool.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){}, + .error_count =3D 0, + KAPI_RETURN_END + + .error_count =3D 0, + .param_count =3D 0, + .since_version =3D "3.0", + .notes =3D "Should be called before thread termination to ensure clean sh= utdown", + }, +}; + +static const struct kapi_ioctl_spec binder_version_spec =3D { + .cmd =3D BINDER_VERSION, + .cmd_name =3D "BINDER_VERSION", + .input_size =3D 0, + .output_size =3D sizeof(struct binder_version), + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_version", + KAPI_DESCRIPTION("Get binder protocol version") + KAPI_LONG_DESC("Returns the current binder protocol version supported " + "by the driver. Used for compatibility checking.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "protocol_version", "__s32", "Binder protocol version") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_ENUM, + .enum_values =3D (const s64[]){BINDER_CURRENT_PROTOCOL_VERSION}, + .enum_count =3D 1, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EINVAL, -EFAULT}, + .error_count =3D 2, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid version structure", + "Invalid user pointer for version structure") + KAPI_ERROR(1, -EFAULT, "EFAULT", "Failed to copy to user", + "Unable to write version to user space") + + .error_count =3D 2, + .param_count =3D 1, + .since_version =3D "3.0", + }, +}; + +static const struct kapi_ioctl_spec binder_get_node_info_for_ref_spec =3D { + .cmd =3D BINDER_GET_NODE_INFO_FOR_REF, + .cmd_name =3D "BINDER_GET_NODE_INFO_FOR_REF", + .input_size =3D sizeof(struct binder_node_info_for_ref), + .output_size =3D sizeof(struct binder_node_info_for_ref), + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_get_node_info_for_ref", + KAPI_DESCRIPTION("Get node information for a reference") + KAPI_LONG_DESC("Retrieves information about a binder node given its handl= e. " + "Returns the current strong and weak reference counts.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "handle", "__u32", "Binder handle") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_END + + KAPI_PARAM(1, "strong_count", "__u32", "Strong reference count") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_END + + KAPI_PARAM(2, "weak_count", "__u32", "Weak reference count") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EINVAL, -EFAULT, -ENOENT}, + .error_count =3D 3, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid parameters", + "Reserved fields must be zero") + KAPI_ERROR(1, -EFAULT, "EFAULT", "Failed to copy data", + "Invalid user pointer provided") + KAPI_ERROR(2, -ENOENT, "ENOENT", "Handle not found", + "No node exists for the given handle") + + .error_count =3D 3, + .param_count =3D 3, + .since_version =3D "4.14", + }, +}; + +static const struct kapi_ioctl_spec binder_get_node_debug_info_spec =3D { + .cmd =3D BINDER_GET_NODE_DEBUG_INFO, + .cmd_name =3D "BINDER_GET_NODE_DEBUG_INFO", + .input_size =3D sizeof(struct binder_node_debug_info), + .output_size =3D sizeof(struct binder_node_debug_info), + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_get_node_debug_info", + KAPI_DESCRIPTION("Get debug info for binder nodes") + KAPI_LONG_DESC("Iterates through all binder nodes in the process. " + "Start with ptr=3DNULL to get first node, then use " + "returned ptr for next call. Returns ptr=3D0 when done.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "ptr", "binder_uintptr_t", "Node pointer (NULL for first)") + KAPI_PARAM_FLAGS(KAPI_PARAM_INOUT) + .type =3D KAPI_TYPE_PTR, + KAPI_PARAM_END + + KAPI_PARAM(1, "cookie", "binder_uintptr_t", "Node cookie value") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_END + + KAPI_PARAM(2, "has_strong_ref", "__u32", "Has strong references") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D 1, + KAPI_PARAM_END + + KAPI_PARAM(3, "has_weak_ref", "__u32", "Has weak references") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D 1, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EFAULT, -EINVAL}, + .error_count =3D 2, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EFAULT, "EFAULT", "Failed to copy data", + "Invalid user pointer provided") + KAPI_ERROR(1, -EINVAL, "EINVAL", "Invalid node pointer", + "Provided ptr is not a valid node") + + .error_count =3D 2, + .param_count =3D 4, + .since_version =3D "4.14", + }, +}; + +static const struct kapi_ioctl_spec binder_freeze_spec =3D { + .cmd =3D BINDER_FREEZE, + .cmd_name =3D "BINDER_FREEZE", + .input_size =3D sizeof(struct binder_freeze_info), + .output_size =3D 0, + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_freeze", + KAPI_DESCRIPTION("Freeze or unfreeze a binder process") + KAPI_LONG_DESC("Controls whether a process can receive binder transaction= s. " + "When frozen, new transactions are blocked. Can wait for " + "existing transactions to complete with timeout.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "pid", "__u32", "Process ID to freeze/unfreeze") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 1, + .max_value =3D PID_MAX_LIMIT, + KAPI_PARAM_END + + KAPI_PARAM(1, "enable", "__u32", "1 to freeze, 0 to unfreeze") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D 1, + KAPI_PARAM_END + + KAPI_PARAM(2, "timeout_ms", "__u32", "Timeout in milliseconds (0 =3D no w= ait)") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D 60000, /* 1 minute max */ + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EINVAL, -EAGAIN, -EFAULT, -ENOMEM}, + .error_count =3D 4, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Invalid process", + "Process not found or invalid parameters") + KAPI_ERROR(1, -EAGAIN, "EAGAIN", "Timeout waiting for transactions", + "Existing transactions did not complete within timeout") + KAPI_ERROR(2, -EFAULT, "EFAULT", "Failed to copy from user", + "Invalid user pointer provided") + KAPI_ERROR(3, -ENOMEM, "ENOMEM", "Out of memory", + "Unable to allocate memory for freeze operation") + + .error_count =3D 4, + .param_count =3D 3, + .since_version =3D "5.9", + .notes =3D "Requires appropriate permissions to freeze other processes", + }, +}; + +static const struct kapi_ioctl_spec binder_get_frozen_info_spec =3D { + .cmd =3D BINDER_GET_FROZEN_INFO, + .cmd_name =3D "BINDER_GET_FROZEN_INFO", + .input_size =3D sizeof(struct binder_frozen_status_info), + .output_size =3D sizeof(struct binder_frozen_status_info), + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_get_frozen_info", + KAPI_DESCRIPTION("Get frozen status of a process") + KAPI_LONG_DESC("Queries whether a process is frozen and if it has " + "received transactions while frozen. Useful for " + "debugging frozen process issues.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "pid", "__u32", "Process ID to query") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 1, + .max_value =3D PID_MAX_LIMIT, + KAPI_PARAM_END + + KAPI_PARAM(1, "sync_recv", "__u32", "Sync transactions received while fro= zen") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + .constraints =3D "Bit 0: received after frozen, Bit 1: pending during fr= eeze", + KAPI_PARAM_END + + KAPI_PARAM(2, "async_recv", "__u32", "Async transactions received while f= rozen") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EINVAL, -EFAULT}, + .error_count =3D 2, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EINVAL, "EINVAL", "Process not found", + "No binder process found with given PID") + KAPI_ERROR(1, -EFAULT, "EFAULT", "Failed to copy data", + "Invalid user pointer provided") + + .error_count =3D 2, + .param_count =3D 3, + .since_version =3D "5.9", + }, +}; + +static const struct kapi_ioctl_spec binder_enable_oneway_spam_detection_sp= ec =3D { + .cmd =3D BINDER_ENABLE_ONEWAY_SPAM_DETECTION, + .cmd_name =3D "BINDER_ENABLE_ONEWAY_SPAM_DETECTION", + .input_size =3D sizeof(__u32), + .output_size =3D 0, + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_enable_oneway_spam_detection", + KAPI_DESCRIPTION("Enable/disable oneway spam detection") + KAPI_LONG_DESC("Controls whether the driver monitors for excessive " + "oneway transactions that might indicate spam or abuse. " + "When enabled, BR_ONEWAY_SPAM_SUSPECT is sent when threshold exce= eded.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "enable", "__u32", "1 to enable, 0 to disable") + KAPI_PARAM_FLAGS(KAPI_PARAM_IN) + .type =3D KAPI_TYPE_UINT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D 0, + .max_value =3D 1, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EFAULT}, + .error_count =3D 1, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EFAULT, "EFAULT", "Failed to copy from user", + "Invalid user pointer provided") + + .error_count =3D 1, + .param_count =3D 1, + .since_version =3D "5.13", + }, +}; + +static const struct kapi_ioctl_spec binder_get_extended_error_spec =3D { + .cmd =3D BINDER_GET_EXTENDED_ERROR, + .cmd_name =3D "BINDER_GET_EXTENDED_ERROR", + .input_size =3D 0, + .output_size =3D sizeof(struct binder_extended_error), + .file_ops_name =3D "binder_fops", + .api_spec =3D { + .name =3D "binder_get_extended_error", + KAPI_DESCRIPTION("Get extended error information") + KAPI_LONG_DESC("Retrieves detailed error information from the last " + "failed binder operation on this thread. Clears the " + "error after reading.") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + /* Parameters */ + KAPI_PARAM(0, "id", "__u32", "Error identifier") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_END + + KAPI_PARAM(1, "command", "__u32", "Binder command that failed") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_UINT, + KAPI_PARAM_END + + KAPI_PARAM(2, "param", "__s32", "Error parameter (negative errno)") + KAPI_PARAM_FLAGS(KAPI_PARAM_OUT) + .type =3D KAPI_TYPE_INT, + .constraint_type =3D KAPI_CONSTRAINT_RANGE, + .min_value =3D -MAX_ERRNO, + .max_value =3D 0, + KAPI_PARAM_END + + /* Return value */ + KAPI_RETURN("int", "0 on success, negative errno on failure") + .type =3D KAPI_TYPE_INT, + .check_type =3D KAPI_RETURN_ERROR_CHECK, + .error_values =3D (const s64[]){-EFAULT}, + .error_count =3D 1, + KAPI_RETURN_END + + /* Errors */ + KAPI_ERROR(0, -EFAULT, "EFAULT", "Failed to copy to user", + "Invalid user pointer provided") + + .error_count =3D 1, + .param_count =3D 3, + .since_version =3D "5.16", + .notes =3D "Error is cleared after reading, subsequent calls return BR_OK= ", + }, +}; + +/* Register all binder IOCTL specifications */ +KAPI_IOCTL_SPEC_DRIVER("binder", { + &binder_write_read_spec, + &binder_set_max_threads_spec, + &binder_set_context_mgr_spec, + &binder_set_context_mgr_ext_spec, + &binder_thread_exit_spec, + &binder_version_spec, + &binder_get_node_info_for_ref_spec, + &binder_get_node_debug_info_spec, + &binder_freeze_spec, + &binder_get_frozen_info_spec, + &binder_enable_oneway_spam_detection_spec, + &binder_get_extended_error_spec, +}) + DEFINE_SHOW_ATTRIBUTE(state); DEFINE_SHOW_ATTRIBUTE(state_hashed); DEFINE_SHOW_ATTRIBUTE(stats); @@ -7050,6 +7798,13 @@ static int __init binder_init(void) if (ret) return ret; =20 + /* Initialize the wrapped file_operations */ + kapi_init_fops_binder_fops(); + + ret =3D kapi_ioctl_specs_init(); + if (ret) + goto err_kapi_init; + atomic_set(&binder_transaction_log.cur, ~0U); atomic_set(&binder_transaction_log_failed.cur, ~0U); =20 @@ -7102,6 +7857,9 @@ static int __init binder_init(void) =20 err_alloc_device_names_failed: debugfs_remove_recursive(binder_debugfs_dir_entry_root); + kapi_ioctl_specs_exit(); + +err_kapi_init: binder_alloc_shrinker_exit(); =20 return ret; --=20 2.39.5 From nobody Fri Oct 10 09:18:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 965DB2EACFE; Sat, 14 Jun 2025 13:49:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908958; cv=none; b=s6VEDrYAZJGz1yl9EP6Ty15zlsNNco1yBIysl0SPUrfNABI9Z7i0SQuwPjVt/KnDUoMvYHdUT82v55WENrLZmPmrOXoa5YCZfdxrJfDg4/lxZAKUdiQZiDN3J/HcoWZOopdrzJ6HsTzKL+33ZA/vFxT4xFWiR8P/lpU6KLJo2D4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1749908958; c=relaxed/simple; bh=G4abd4n8OMhquGl8B0vC4XK2aT/vUVMRQ4+VJ7cqekM=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=EZRXJSybwak5467+zVjilCN3G/zB1Ea1p/jsAjsnTOoHWb2pIThNt2hsiZFVTcK6WGRyTinRFLjlF0mO/Njehwin6oLnxrXOpZoKX/s1xxxTktxjzpHD375+vm7pRokhS7fxdyOdRq4IMPVlk9D9IS5IwUXbUvou7XE9qmwYh4M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=teM5oIFY; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="teM5oIFY" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BD81FC4CEEB; Sat, 14 Jun 2025 13:49:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1749908958; bh=G4abd4n8OMhquGl8B0vC4XK2aT/vUVMRQ4+VJ7cqekM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=teM5oIFYMQGExhd5Njc/ditHNtwUwxkUK17fEJfuCvrEee6KWZI+8o3jh3z6Vyp0k wz8sDtf8p0Pxd25T7WYyOIlTEsCq8lDEHmEyxA0V6n5I0ohXnE2Il6910Km0gBLxdv EHuj4eAtXxj1zlpAZ5at/SvoOf7mub+MWj/EfV4+YTEByrfxXSbq7/FZQYOM/59rNn F/hiK/zKbRRVI5cBjEwhuvf+rUo9C728jaubiOAw6LB26PqO6d3kn2acMpoTlPRhDQ +9SF6klx43sRhfjXTqe7vR4WhF0sWPLIzJTn77hjevvDsBpVT6IFYg1ptvl326m+xZ dZbwAeYU34+WQ== From: Sasha Levin To: linux-kernel@vger.kernel.org Cc: linux-api@vger.kernel.org, workflows@vger.kernel.org, tools@kernel.org, Sasha Levin Subject: [RFC 19/19] tools/kapi: Add kernel API specification extraction tool Date: Sat, 14 Jun 2025 09:48:58 -0400 Message-Id: <20250614134858.790460-20-sashal@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250614134858.790460-1-sashal@kernel.org> References: <20250614134858.790460-1-sashal@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The kapi tool extracts and displays kernel API specifications. Signed-off-by: Sasha Levin --- Documentation/admin-guide/kernel-api-spec.rst | 198 ++++++- tools/kapi/.gitignore | 4 + tools/kapi/Cargo.toml | 19 + tools/kapi/src/extractor/debugfs.rs | 204 ++++++++ tools/kapi/src/extractor/mod.rs | 95 ++++ tools/kapi/src/extractor/source_parser.rs | 488 ++++++++++++++++++ .../src/extractor/vmlinux/binary_utils.rs | 130 +++++ tools/kapi/src/extractor/vmlinux/mod.rs | 372 +++++++++++++ tools/kapi/src/formatter/json.rs | 170 ++++++ tools/kapi/src/formatter/mod.rs | 68 +++ tools/kapi/src/formatter/plain.rs | 99 ++++ tools/kapi/src/formatter/rst.rs | 144 ++++++ tools/kapi/src/main.rs | 121 +++++ 13 files changed, 2109 insertions(+), 3 deletions(-) create mode 100644 tools/kapi/.gitignore create mode 100644 tools/kapi/Cargo.toml create mode 100644 tools/kapi/src/extractor/debugfs.rs create mode 100644 tools/kapi/src/extractor/mod.rs create mode 100644 tools/kapi/src/extractor/source_parser.rs create mode 100644 tools/kapi/src/extractor/vmlinux/binary_utils.rs create mode 100644 tools/kapi/src/extractor/vmlinux/mod.rs create mode 100644 tools/kapi/src/formatter/json.rs create mode 100644 tools/kapi/src/formatter/mod.rs create mode 100644 tools/kapi/src/formatter/plain.rs create mode 100644 tools/kapi/src/formatter/rst.rs create mode 100644 tools/kapi/src/main.rs diff --git a/Documentation/admin-guide/kernel-api-spec.rst b/Documentation/= admin-guide/kernel-api-spec.rst index 3a63f6711e27b..9b452753111ad 100644 --- a/Documentation/admin-guide/kernel-api-spec.rst +++ b/Documentation/admin-guide/kernel-api-spec.rst @@ -31,7 +31,9 @@ The framework aims to: common programming errors during development and testing. =20 3. **Support Tooling**: Export API specifications in machine-readable form= ats for - use by static analyzers, documentation generators, and development tool= s. + use by static analyzers, documentation generators, and development tool= s. The + ``kapi`` tool (see `The kapi Tool`_) provides comprehensive extraction = and + formatting capabilities. =20 4. **Enhance Debugging**: Provide detailed API information at runtime thro= ugh debugfs for debugging and introspection. @@ -71,6 +73,13 @@ The framework consists of several key components: - Type-safe parameter specifications - Context and constraint definitions =20 +5. **kapi Tool** (``tools/kapi/``) + + - Userspace utility for extracting specifications + - Multiple input sources (source, binary, debugfs) + - Multiple output formats (plain, JSON, RST) + - Testing and validation utilities + Data Model ---------- =20 @@ -344,8 +353,177 @@ Documentation Generation ------------------------ =20 The framework exports specifications via debugfs that can be used -to generate documentation. Tools for automatic documentation generation -from specifications are planned for future development. +to generate documentation. The ``kapi`` tool provides comprehensive +extraction and formatting capabilities for kernel API specifications. + +The kapi Tool +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +Overview +-------- + +The ``kapi`` tool is a userspace utility that extracts and displays kernel= API +specifications from multiple sources. It provides a unified interface to a= ccess +API documentation whether from compiled kernels, source code, or runtime s= ystems. + +Installation +------------ + +Build the tool from the kernel source tree:: + + $ cd tools/kapi + $ cargo build --release + + # Optional: Install system-wide + $ cargo install --path . + +The tool requires Rust and Cargo to build. The binary will be available at +``tools/kapi/target/release/kapi``. + +Command-Line Usage +------------------ + +Basic syntax:: + + kapi [OPTIONS] [API_NAME] + +Options: + +- ``--vmlinux ``: Extract from compiled kernel binary +- ``--source ``: Extract from kernel source code +- ``--debugfs ``: Extract from debugfs (default: /sys/kernel/debug) +- ``-f, --format ``: Output format (plain, json, rst) +- ``-h, --help``: Display help information +- ``-V, --version``: Display version information + +Input Modes +----------- + +**1. Source Code Mode** + +Extract specifications directly from kernel source:: + + # Scan entire kernel source tree + $ kapi --source /path/to/linux + + # Extract from specific file + $ kapi --source kernel/sched/core.c + + # Get details for specific API + $ kapi --source /path/to/linux sys_sched_yield + +**2. Vmlinux Mode** + +Extract from compiled kernel with debug symbols:: + + # List all APIs in vmlinux + $ kapi --vmlinux /boot/vmlinux-5.15.0 + + # Get specific syscall details + $ kapi --vmlinux ./vmlinux sys_read + +**3. Debugfs Mode** + +Extract from running kernel via debugfs:: + + # Use default debugfs path + $ kapi + + # Use custom debugfs mount + $ kapi --debugfs /mnt/debugfs + + # Get specific API from running kernel + $ kapi sys_write + +Output Formats +-------------- + +**Plain Text Format** (default):: + + $ kapi sys_read + + Detailed information for sys_read: + =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + Description: Read from a file descriptor + + Detailed Description: + Reads up to count bytes from file descriptor fd into the buffer starti= ng at buf. + + Execution Context: + - KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE + + Parameters (3): + + Available since: 1.0 + +**JSON Format**:: + + $ kapi --format json sys_read + { + "api_details": { + "name": "sys_read", + "description": "Read from a file descriptor", + "long_description": "Reads up to count bytes...", + "context_flags": ["KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE"], + "since_version": "1.0" + } + } + +**ReStructuredText Format**:: + + $ kapi --format rst sys_read + + sys_read + =3D=3D=3D=3D=3D=3D=3D=3D + + **Read from a file descriptor** + + Reads up to count bytes from file descriptor fd into the buffer... + +Usage Examples +-------------- + +**Generate complete API documentation**:: + + # Export all kernel APIs to JSON + $ kapi --source /path/to/linux --format json > kernel-apis.json + + # Generate RST documentation for all syscalls + $ kapi --vmlinux ./vmlinux --format rst > syscalls.rst + + # List APIs from specific subsystem + $ kapi --source drivers/gpu/drm/ + +**Integration with other tools**:: + + # Find all APIs that can sleep + $ kapi --format json | jq '.apis[] | select(.context_flags[] | contain= s("SLEEPABLE"))' + + # Generate markdown documentation + $ kapi --format rst sys_mmap | pandoc -f rst -t markdown + +**Debugging and analysis**:: + + # Compare API between kernel versions + $ diff <(kapi --vmlinux vmlinux-5.10) <(kapi --vmlinux vmlinux-5.15) + + # Check if specific API exists + $ kapi --source . my_custom_api || echo "API not found" + +Implementation Details +---------------------- + +The tool extracts API specifications from three sources: + +1. **Source Code**: Parses KAPI specification macros using regular express= ions +2. **Vmlinux**: Reads the ``.kapi_specs`` ELF section from compiled kernels +3. **Debugfs**: Reads from ``/sys/kernel/debug/kapi/`` filesystem interface + +The tool supports all KAPI specification types: + +- System calls (``DEFINE_KERNEL_API_SPEC``) +- IOCTLs (``DEFINE_IOCTL_API_SPEC``) +- Kernel functions (``KAPI_DEFINE_SPEC``) =20 IDE Integration --------------- @@ -357,6 +535,11 @@ Modern IDEs can use the JSON export for: - Context validation - Error code documentation =20 +Example IDE integration:: + + # Generate IDE completion data + $ kapi --format json > .vscode/kernel-apis.json + Testing Framework ----------------- =20 @@ -367,6 +550,15 @@ The framework includes test helpers:: kapi_test_api("kmalloc", test_cases); #endif =20 +The kapi tool can verify specifications against implementations:: + + # Run consistency tests + $ cd tools/kapi + $ ./test_consistency.sh + + # Compare source vs binary specifications + $ ./compare_all_syscalls.sh + Best Practices =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =20 diff --git a/tools/kapi/.gitignore b/tools/kapi/.gitignore new file mode 100644 index 0000000000000..1390bfc12686c --- /dev/null +++ b/tools/kapi/.gitignore @@ -0,0 +1,4 @@ +# Rust build artifacts +/target/ +**/*.rs.bk + diff --git a/tools/kapi/Cargo.toml b/tools/kapi/Cargo.toml new file mode 100644 index 0000000000000..4e6bcb10d132f --- /dev/null +++ b/tools/kapi/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name =3D "kapi" +version =3D "0.1.0" +edition =3D "2024" +authors =3D ["Sasha Levin "] +description =3D "Tool for extracting and displaying kernel API specificati= ons" +license =3D "GPL-2.0" + +[dependencies] +goblin =3D "0.10" +clap =3D { version =3D "4.4", features =3D ["derive"] } +anyhow =3D "1.0" +serde =3D { version =3D "1.0", features =3D ["derive"] } +serde_json =3D "1.0" +regex =3D "1.10" +walkdir =3D "2.4" + +[dev-dependencies] +tempfile =3D "3.8" diff --git a/tools/kapi/src/extractor/debugfs.rs b/tools/kapi/src/extractor= /debugfs.rs new file mode 100644 index 0000000000000..91775dea223f5 --- /dev/null +++ b/tools/kapi/src/extractor/debugfs.rs @@ -0,0 +1,204 @@ +use anyhow::{Context, Result, bail}; +use std::fs; +use std::io::Write; +use std::path::PathBuf; +use crate::formatter::OutputFormatter; + +use super::{ApiExtractor, ApiSpec, display_api_spec}; + +/// Extractor for kernel API specifications from debugfs +pub struct DebugfsExtractor { + debugfs_path: PathBuf, +} + +impl DebugfsExtractor { + /// Create a new debugfs extractor with the specified debugfs path + pub fn new(debugfs_path: Option) -> Result { + let path =3D match debugfs_path { + Some(p) =3D> PathBuf::from(p), + None =3D> PathBuf::from("/sys/kernel/debug"), + }; + + // Check if the debugfs path exists + if !path.exists() { + bail!("Debugfs path does not exist: {}", path.display()); + } + + // Check if kapi directory exists + let kapi_path =3D path.join("kapi"); + if !kapi_path.exists() { + bail!("Kernel API debugfs interface not found at: {}", kapi_pa= th.display()); + } + + Ok(Self { + debugfs_path: path, + }) + } + + /// Parse the list file to get all available API names + fn parse_list_file(&self) -> Result> { + let list_path =3D self.debugfs_path.join("kapi/list"); + let content =3D fs::read_to_string(&list_path) + .with_context(|| format!("Failed to read {}", list_path.displa= y()))?; + + let mut apis =3D Vec::new(); + let mut in_list =3D false; + + for line in content.lines() { + if line.contains("=3D=3D=3D") { + in_list =3D true; + continue; + } + + if in_list && line.starts_with("Total:") { + break; + } + + if in_list && !line.trim().is_empty() { + // Extract API name from lines like "sys_read - Read from = a file descriptor" + if let Some(name) =3D line.split(" - ").next() { + apis.push(name.trim().to_string()); + } + } + } + + Ok(apis) + } + + /// Parse a single API specification file + fn parse_spec_file(&self, api_name: &str) -> Result { + let spec_path =3D self.debugfs_path.join(format!("kapi/specs/{}", = api_name)); + let content =3D fs::read_to_string(&spec_path) + .with_context(|| format!("Failed to read {}", spec_path.displa= y()))?; + + let mut spec =3D ApiSpec { + name: api_name.to_string(), + api_type: "unknown".to_string(), + description: None, + long_description: None, + version: None, + context_flags: Vec::new(), + param_count: None, + error_count: None, + examples: None, + notes: None, + since_version: None, + }; + + // Parse the content + let mut collecting_multiline =3D false; + let mut multiline_buffer =3D String::new(); + let mut multiline_field =3D ""; + + for line in content.lines() { + // Handle section headers + if line.starts_with("Parameters (") { + if let Some(count_str) =3D line.strip_prefix("Parameters (= ").and_then(|s| s.strip_suffix("):")) { + spec.param_count =3D count_str.parse().ok(); + } + continue; + } else if line.starts_with("Errors (") { + if let Some(count_str) =3D line.strip_prefix("Errors (").a= nd_then(|s| s.strip_suffix("):")) { + spec.error_count =3D count_str.parse().ok(); + } + continue; + } else if line.starts_with("Examples:") { + collecting_multiline =3D true; + multiline_field =3D "examples"; + multiline_buffer.clear(); + continue; + } else if line.starts_with("Notes:") { + collecting_multiline =3D true; + multiline_field =3D "notes"; + multiline_buffer.clear(); + continue; + } + + // Handle multiline sections + if collecting_multiline { + if line.trim().is_empty() && multiline_buffer.ends_with("\= n\n") { + collecting_multiline =3D false; + match multiline_field { + "examples" =3D> spec.examples =3D Some(multiline_b= uffer.trim().to_string()), + "notes" =3D> spec.notes =3D Some(multiline_buffer.= trim().to_string()), + _ =3D> {} + } + multiline_buffer.clear(); + } else { + if !multiline_buffer.is_empty() { + multiline_buffer.push('\n'); + } + multiline_buffer.push_str(line); + } + continue; + } + + // Parse regular fields + if let Some(desc) =3D line.strip_prefix("Description: ") { + spec.description =3D Some(desc.to_string()); + } else if let Some(long_desc) =3D line.strip_prefix("Long desc= ription: ") { + spec.long_description =3D Some(long_desc.to_string()); + } else if let Some(version) =3D line.strip_prefix("Version: ")= { + spec.version =3D Some(version.to_string()); + } else if let Some(since) =3D line.strip_prefix("Since: ") { + spec.since_version =3D Some(since.to_string()); + } else if let Some(flags) =3D line.strip_prefix("Context flags= : ") { + spec.context_flags =3D flags.split_whitespace() + .map(|s| s.to_string()) + .collect(); + } + } + + // Determine API type based on name + if api_name.starts_with("sys_") { + spec.api_type =3D "syscall".to_string(); + } else if api_name.contains("_ioctl") || api_name.starts_with("ioc= tl_") { + spec.api_type =3D "ioctl".to_string(); + } else { + spec.api_type =3D "function".to_string(); + } + + Ok(spec) + } +} + +impl ApiExtractor for DebugfsExtractor { + fn extract_all(&self) -> Result> { + let api_names =3D self.parse_list_file()?; + let mut specs =3D Vec::new(); + + for name in api_names { + match self.parse_spec_file(&name) { + Ok(spec) =3D> specs.push(spec), + Err(e) =3D> eprintln!("Warning: Failed to parse spec for {= }: {}", name, e), + } + } + + Ok(specs) + } + + fn extract_by_name(&self, name: &str) -> Result> { + let api_names =3D self.parse_list_file()?; + + if api_names.contains(&name.to_string()) { + Ok(Some(self.parse_spec_file(name)?)) + } else { + Ok(None) + } + } + + fn display_api_details( + &self, + api_name: &str, + formatter: &mut dyn OutputFormatter, + writer: &mut dyn Write, + ) -> Result<()> { + if let Some(spec) =3D self.extract_by_name(api_name)? { + display_api_spec(&spec, formatter, writer)?; + } else { + writeln!(writer, "API '{}' not found in debugfs", api_name)?; + } + + Ok(()) + } +} \ No newline at end of file diff --git a/tools/kapi/src/extractor/mod.rs b/tools/kapi/src/extractor/mod= .rs new file mode 100644 index 0000000000000..bc55201152e3e --- /dev/null +++ b/tools/kapi/src/extractor/mod.rs @@ -0,0 +1,95 @@ +use anyhow::Result; +use std::io::Write; +use crate::formatter::OutputFormatter; + +pub mod vmlinux; +pub mod source_parser; +pub mod debugfs; + +pub use vmlinux::VmlinuxExtractor; +pub use source_parser::SourceExtractor; +pub use debugfs::DebugfsExtractor; + +/// Common API specification information that all extractors should provide +#[derive(Debug, Clone)] +pub struct ApiSpec { + pub name: String, + pub api_type: String, + pub description: Option, + pub long_description: Option, + pub version: Option, + pub context_flags: Vec, + pub param_count: Option, + pub error_count: Option, + pub examples: Option, + pub notes: Option, + pub since_version: Option, +} + +/// Trait for extracting API specifications from different sources +pub trait ApiExtractor { + /// Extract all API specifications from the source + fn extract_all(&self) -> Result>; + + /// Extract a specific API specification by name + fn extract_by_name(&self, name: &str) -> Result>; + + /// Display detailed information about a specific API + fn display_api_details( + &self, + api_name: &str, + formatter: &mut dyn OutputFormatter, + writer: &mut dyn Write, + ) -> Result<()>; +} + +/// Helper function to display an ApiSpec using a formatter +pub fn display_api_spec( + spec: &ApiSpec, + formatter: &mut dyn OutputFormatter, + writer: &mut dyn Write, +) -> Result<()> { + formatter.begin_api_details(writer, &spec.name)?; + + if let Some(desc) =3D &spec.description { + formatter.description(writer, desc)?; + } + + if let Some(long_desc) =3D &spec.long_description { + formatter.long_description(writer, long_desc)?; + } + + if let Some(version) =3D &spec.since_version { + formatter.since_version(writer, version)?; + } + + if !spec.context_flags.is_empty() { + formatter.begin_context_flags(writer)?; + for flag in &spec.context_flags { + formatter.context_flag(writer, flag)?; + } + formatter.end_context_flags(writer)?; + } + + if let Some(param_count) =3D spec.param_count { + formatter.begin_parameters(writer, param_count)?; + formatter.end_parameters(writer)?; + } + + if let Some(error_count) =3D spec.error_count { + formatter.begin_errors(writer, error_count)?; + formatter.end_errors(writer)?; + } + + if let Some(notes) =3D &spec.notes { + formatter.notes(writer, notes)?; + } + + if let Some(examples) =3D &spec.examples { + formatter.examples(writer, examples)?; + } + + formatter.end_api_details(writer)?; + + Ok(()) +} \ No newline at end of file diff --git a/tools/kapi/src/extractor/source_parser.rs b/tools/kapi/src/ext= ractor/source_parser.rs new file mode 100644 index 0000000000000..8de35f5a73916 --- /dev/null +++ b/tools/kapi/src/extractor/source_parser.rs @@ -0,0 +1,488 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use std::fs; +use std::path::Path; +use std::collections::HashMap; +use walkdir::WalkDir; +use std::io::Write; +use crate::formatter::OutputFormatter; +use super::{ApiExtractor, ApiSpec, display_api_spec}; + +#[derive(Debug, Clone)] +pub struct SourceApiSpec { + pub name: String, + pub api_type: ApiType, + pub parsed_fields: HashMap, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ApiType { + Syscall, + Ioctl, + Function, + Unknown, +} + +impl ApiType { + fn from_name(name: &str) -> Self { + if name.starts_with("sys_") { + ApiType::Syscall + } else if name.contains("ioctl") || name.contains("IOCTL") { + ApiType::Ioctl + } else if name.starts_with("do_") || name.starts_with("__") { + ApiType::Function + } else { + ApiType::Unknown + } + } +} + +pub struct SourceParser { + // Regex patterns for matching KAPI specifications + spec_start_pattern: Regex, + spec_end_pattern: Regex, + ioctl_spec_pattern: Regex, +} + +impl SourceParser { + pub fn new() -> Result { + Ok(SourceParser { + // Match DEFINE_KERNEL_API_SPEC(function_name) + spec_start_pattern: Regex::new(r"DEFINE_KERNEL_API_SPEC\s*\(\s= *([a-zA-Z_][a-zA-Z0-9_]*)\s*\)")?, + // Match KAPI_END_SPEC + spec_end_pattern: Regex::new(r"KAPI_END_SPEC")?, + // Match IOCTL specifications + ioctl_spec_pattern: Regex::new(r#"DEFINE_IOCTL_API_SPEC\s*\(\s= *([a-zA-Z_][a-zA-Z0-9_]*)\s*,\s*([^,]+)\s*,\s*"([^"]+)"\s*\)"#)?, + }) + } + + /// Parse a single source file for KAPI specifications + pub fn parse_file(&self, path: &Path) -> Result> { + let content =3D fs::read_to_string(path) + .with_context(|| format!("Failed to read file: {}", path.displ= ay()))?; + + self.parse_content(&content, path) + } + + /// Parse file content for KAPI specifications + pub fn parse_content(&self, content: &str, _file_path: &Path) -> Resul= t> { + let mut specs =3D Vec::new(); + let lines: Vec<&str> =3D content.lines().collect(); + + // First, look for standard KAPI specs + for (i, line) in lines.iter().enumerate() { + if let Some(captures) =3D self.spec_start_pattern.captures(lin= e) { + let api_name =3D captures.get(1).unwrap().as_str().to_stri= ng(); + + // Find the end of this specification + if let Some(spec_content) =3D self.extract_spec_block(&lin= es, i) { + let mut spec =3D SourceApiSpec { + name: api_name.clone(), + api_type: ApiType::from_name(&api_name), + parsed_fields: HashMap::new(), + }; + + // Parse the fields + self.parse_spec_fields(&spec_content, &mut spec.parsed= _fields)?; + + specs.push(spec); + } + } + + // Also look for IOCTL specs + if let Some(captures) =3D self.ioctl_spec_pattern.captures(lin= e) { + let spec_name =3D captures.get(1).unwrap().as_str().to_str= ing(); + let cmd =3D captures.get(2).unwrap().as_str().to_string(); + let cmd_name =3D captures.get(3).unwrap().as_str().to_stri= ng(); + + // Find the end of this IOCTL specification + if let Some(spec_content) =3D self.extract_ioctl_spec_bloc= k(&lines, i) { + let mut spec =3D SourceApiSpec { + name: spec_name, + api_type: ApiType::Ioctl, + parsed_fields: HashMap::new(), + }; + + // Add IOCTL-specific fields + spec.parsed_fields.insert("cmd".to_string(), cmd); + spec.parsed_fields.insert("cmd_name".to_string(), cmd_= name); + + // Parse other fields + self.parse_spec_fields(&spec_content, &mut spec.parsed= _fields)?; + + specs.push(spec); + } + } + } + + Ok(specs) + } + + /// Extract a complete KAPI specification block from the source + fn extract_spec_block(&self, lines: &[&str], start_idx: usize) -> Opti= on { + let mut spec_lines =3D Vec::new(); + let mut brace_count =3D 0; + let mut in_spec =3D false; + + for (_i, line) in lines.iter().enumerate().skip(start_idx) { + spec_lines.push(line.to_string()); + + // Count braces to handle nested structures + for ch in line.chars() { + match ch { + '{' =3D> { + brace_count +=3D 1; + in_spec =3D true; + } + '}' =3D> { + brace_count -=3D 1; + } + _ =3D> {} + } + } + + // Check for end of spec + if self.spec_end_pattern.is_match(line) { + return Some(spec_lines.join("\n")); + } + + // Alternative end: closing brace with semicolon + if in_spec && brace_count =3D=3D 0 && line.contains("};") { + return Some(spec_lines.join("\n")); + } + } + + None + } + + /// Extract a complete IOCTL specification block + fn extract_ioctl_spec_block(&self, lines: &[&str], start_idx: usize) -= > Option { + let mut spec_lines =3D Vec::new(); + let mut brace_count =3D 0; + + for (i, line) in lines.iter().enumerate().skip(start_idx) { + spec_lines.push(line.to_string()); + + // Count braces + for ch in line.chars() { + match ch { + '{' =3D> brace_count +=3D 1, + '}' =3D> brace_count -=3D 1, + _ =3D> {} + } + } + + // Check for end patterns + if line.contains("KAPI_END_IOCTL_SPEC") || line.contains("KAPI= _IOCTL_END_SPEC") { + return Some(spec_lines.join("\n")); + } + + // Alternative end: closing brace with semicolon at top level + if brace_count =3D=3D 0 && line.contains("};") && i > start_id= x { + return Some(spec_lines.join("\n")); + } + } + + None + } + + /// Parse individual KAPI fields from the specification + fn parse_spec_fields(&self, content: &str, fields: &mut HashMap) -> Result<()> { + // Parse KAPI_DESCRIPTION + if let Some(captures) =3D Regex::new(r#"KAPI_DESCRIPTION\s*\(\s*"(= [^"]*)"\s*\)"#)?.captures(content) { + fields.insert("description".to_string(), captures.get(1).unwra= p().as_str().to_string()); + } + + // Parse KAPI_LONG_DESC (handle multi-line) + if let Some(captures) =3D Regex::new(r#"KAPI_LONG_DESC\s*\(\s*"([^= "]*(?:\s*"[^"]*)*?)"\s*\)"#)?.captures(content) { + let long_desc =3D captures.get(1).unwrap().as_str() + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t\"", " "); + fields.insert("long_description".to_string(), long_desc); + } + + // Parse KAPI_CONTEXT + if let Some(captures) =3D Regex::new(r"KAPI_CONTEXT\s*\(([^)]+)\)"= )?.captures(content) { + fields.insert("context".to_string(), captures.get(1).unwrap().= as_str().to_string()); + } + + // Parse KAPI_NOTES (handle multi-line) + if let Some(captures) =3D Regex::new(r#"KAPI_NOTES\s*\(\s*"([^"]*(= ?:\s*"[^"]*)*?)"\s*\)"#)?.captures(content) { + let notes =3D captures.get(1).unwrap().as_str() + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t\"", " ") + .trim() + .to_string(); + fields.insert("notes".to_string(), notes); + } + + // Parse KAPI_EXAMPLES (handle multi-line) + if let Some(captures) =3D Regex::new(r#"KAPI_EXAMPLES\s*\(\s*"([^"= ]*(?:\s*"[^"]*)*?)"\s*\)"#)?.captures(content) { + let examples =3D captures.get(1).unwrap().as_str() + .replace("\\n\"\n\t\t \"", "\n") + .replace("\\n\"\n\t\t \"", "\n") + .replace("\\n\"\n\t\t \"", "\n") + .replace("\\n\"\n\t\t \"", "\n") + .replace("\\n\"\n\t\t\"", "\n") + .replace("\\n", "\n") + .trim() + .to_string(); + fields.insert("examples".to_string(), examples); + } + + // Parse KAPI_SINCE_VERSION + if let Some(captures) =3D Regex::new(r#"KAPI_SINCE_VERSION\s*\(\s*= "([^"]*)"\s*\)"#)?.captures(content) { + fields.insert("since_version".to_string(), captures.get(1).unw= rap().as_str().to_string()); + } + + // Parse parameter count + let param_regex =3D Regex::new(r"KAPI_PARAM\s*\(\s*(\d+)\s*,")?; + let mut max_param_idx =3D 0; + for captures in param_regex.captures_iter(content) { + if let Ok(idx) =3D captures.get(1).unwrap().as_str().parse::() { + max_param_idx =3D max_param_idx.max(idx + 1); + } + } + if max_param_idx > 0 { + fields.insert("param_count".to_string(), max_param_idx.to_stri= ng()); + } + + // Parse error count + let error_regex =3D Regex::new(r"KAPI_ERROR\s*\(\s*(\d+)\s*,")?; + let mut max_error_idx =3D 0; + for captures in error_regex.captures_iter(content) { + if let Ok(idx) =3D captures.get(1).unwrap().as_str().parse::() { + max_error_idx =3D max_error_idx.max(idx + 1); + } + } + if max_error_idx > 0 { + fields.insert("error_count".to_string(), max_error_idx.to_stri= ng()); + } + + // Parse other counts + if content.contains(".error_count =3D") { + if let Some(captures) =3D Regex::new(r"\.error_count\s*=3D\s*(= \d+)")?.captures(content) { + fields.insert("error_count".to_string(), captures.get(1).u= nwrap().as_str().to_string()); + } + } + + if content.contains(".param_count =3D") { + if let Some(captures) =3D Regex::new(r"\.param_count\s*=3D\s*(= \d+)")?.captures(content) { + fields.insert("param_count".to_string(), captures.get(1).u= nwrap().as_str().to_string()); + } + } + + // Parse .since_version + if let Some(captures) =3D Regex::new(r#"\.since_version\s*=3D\s*"(= [^"]*)""#)?.captures(content) { + fields.insert("since_version".to_string(), captures.get(1).unw= rap().as_str().to_string()); + } + + // Parse .notes (handle multi-line) + if let Some(captures) =3D Regex::new(r#"\.notes\s*=3D\s*"([^"]*(?:= \s*"[^"]*)*?)""#)?.captures(content) { + let notes =3D captures.get(1).unwrap().as_str() + .replace("\"\n\t\t \"", " ") + .replace("\"\n\t\t\"", " ") + .replace("\"\n\t \"", " ") // Handle single tab + space + .trim() + .to_string(); + fields.insert("notes".to_string(), notes); + } + + // Parse .examples (handle multi-line) + if let Some(captures) =3D Regex::new(r#"\.examples\s*=3D\s*"([^"]*= (?:\s*"[^"]*)*?)""#)?.captures(content) { + let examples =3D captures.get(1).unwrap().as_str() + .replace("\\n\"\n\t\t \"", "\n") + .replace("\\n", "\n"); + fields.insert("examples".to_string(), examples); + } + + Ok(()) + } + + /// Scan a directory tree for files containing KAPI specifications + pub fn scan_directory(&self, dir: &Path, extensions: &[&str]) -> Resul= t> { + let mut all_specs =3D Vec::new(); + + for entry in WalkDir::new(dir) + .follow_links(true) + .into_iter() + .filter_map(|e| e.ok()) + { + let path =3D entry.path(); + + // Skip non-files + if !path.is_file() { + continue; + } + + // Check file extension + if let Some(ext) =3D path.extension() { + if extensions.iter().any(|&e| ext =3D=3D e) { + // Try to parse the file + match self.parse_file(path) { + Ok(specs) =3D> { + if !specs.is_empty() { + all_specs.extend(specs); + } + } + Err(e) =3D> { + eprintln!("Warning: Failed to parse {}: {}", p= ath.display(), e); + } + } + } + } + } + + Ok(all_specs) + } + +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + #[test] + fn test_parse_syscall_spec() { + let parser =3D SourceParser::new().unwrap(); + + let content =3D r#" +DEFINE_KERNEL_API_SPEC(sys_mlock) + KAPI_DESCRIPTION("Lock pages in memory") + KAPI_LONG_DESC("Locks pages in the specified address range into RAM") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "start", "unsigned long", "Starting address") + KAPI_PARAM_END + + KAPI_PARAM(1, "len", "size_t", "Length of range") + KAPI_PARAM_END + + .param_count =3D 2, + .error_count =3D 3, + +KAPI_END_SPEC +"#; + + let mut temp_file =3D NamedTempFile::new().unwrap(); + write!(temp_file, "{}", content).unwrap(); + + let specs =3D parser.parse_content(content, temp_file.path()).unwr= ap(); + + assert_eq!(specs.len(), 1); + assert_eq!(specs[0].name, "sys_mlock"); + assert_eq!(specs[0].api_type, ApiType::Syscall); + assert_eq!(specs[0].parsed_fields.get("description").unwrap(), "Lo= ck pages in memory"); + assert_eq!(specs[0].parsed_fields.get("param_count").unwrap(), "2"= ); + } + + #[test] + fn test_parse_ioctl_spec() { + let parser =3D SourceParser::new().unwrap(); + + let content =3D r#" +DEFINE_IOCTL_API_SPEC(binder_write_read, BINDER_WRITE_READ, "BINDER_WRITE_= READ") + KAPI_DESCRIPTION("Perform read/write operations on binder") + KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE) + + KAPI_PARAM(0, "write_size", "binder_size_t", "Bytes to write") + KAPI_PARAM_END + +KAPI_END_IOCTL_SPEC +"#; + + let mut temp_file =3D NamedTempFile::new().unwrap(); + write!(temp_file, "{}", content).unwrap(); + + let specs =3D parser.parse_content(content, temp_file.path()).unwr= ap(); + + assert_eq!(specs.len(), 1); + assert_eq!(specs[0].name, "binder_write_read"); + assert_eq!(specs[0].api_type, ApiType::Ioctl); + assert_eq!(specs[0].parsed_fields.get("cmd_name").unwrap(), "BINDE= R_WRITE_READ"); + } +} + +// SourceExtractor implementation +pub struct SourceExtractor { + specs: Vec, +} + +impl SourceExtractor { + pub fn new(path: String) -> Result { + let parser =3D SourceParser::new()?; + let path_obj =3D Path::new(&path); + + let specs =3D if path_obj.is_file() { + parser.parse_file(path_obj)? + } else if path_obj.is_dir() { + parser.scan_directory(path_obj, &["c", "h"])? + } else { + anyhow::bail!("Path does not exist: {}", path_obj.display()) + }; + + Ok(SourceExtractor { specs }) + } + + fn convert_to_api_spec(&self, source_spec: &SourceApiSpec) -> ApiSpec { + ApiSpec { + name: source_spec.name.clone(), + api_type: match source_spec.api_type { + ApiType::Syscall =3D> "syscall".to_string(), + ApiType::Ioctl =3D> "ioctl".to_string(), + ApiType::Function =3D> "function".to_string(), + ApiType::Unknown =3D> "unknown".to_string(), + }, + description: source_spec.parsed_fields.get("description").clon= ed(), + long_description: source_spec.parsed_fields.get("long_descript= ion").cloned(), + version: source_spec.parsed_fields.get("version").cloned(), + context_flags: source_spec.parsed_fields.get("context") + .map(|c| vec![c.clone()]) + .unwrap_or_default(), + param_count: source_spec.parsed_fields.get("param_count") + .and_then(|s| s.parse::().ok()), + error_count: source_spec.parsed_fields.get("error_count") + .and_then(|s| s.parse::().ok()), + examples: source_spec.parsed_fields.get("examples").cloned(), + notes: source_spec.parsed_fields.get("notes").cloned(), + since_version: source_spec.parsed_fields.get("since_version").= cloned(), + } + } +} + +impl ApiExtractor for SourceExtractor { + fn extract_all(&self) -> Result> { + Ok(self.specs.iter() + .map(|s| self.convert_to_api_spec(s)) + .collect()) + } + + fn extract_by_name(&self, name: &str) -> Result> { + Ok(self.specs.iter() + .find(|s| s.name =3D=3D name) + .map(|s| self.convert_to_api_spec(s))) + } + + fn display_api_details( + &self, + api_name: &str, + formatter: &mut dyn OutputFormatter, + writer: &mut dyn Write, + ) -> Result<()> { + if let Some(spec) =3D self.specs.iter().find(|s| s.name =3D=3D api= _name) { + let api_spec =3D self.convert_to_api_spec(spec); + display_api_spec(&api_spec, formatter, writer)?; + } + Ok(()) + } +} \ No newline at end of file diff --git a/tools/kapi/src/extractor/vmlinux/binary_utils.rs b/tools/kapi/= src/extractor/vmlinux/binary_utils.rs new file mode 100644 index 0000000000000..02c8e3b8eda77 --- /dev/null +++ b/tools/kapi/src/extractor/vmlinux/binary_utils.rs @@ -0,0 +1,130 @@ +use anyhow::Result; +use std::io::Write; +use crate::formatter::OutputFormatter; + +// Constants for all structure field sizes +pub mod sizes { + pub const NAME: usize =3D 128; + pub const DESC: usize =3D 512; + pub const MAX_PARAMS: usize =3D 16; + pub const MAX_ERRORS: usize =3D 32; + pub const MAX_CONSTRAINTS: usize =3D 16; +} + +// Helper for reading data at specific offsets +pub struct DataReader<'a> { + data: &'a [u8], + pos: usize, +} + +impl<'a> DataReader<'a> { + pub fn new(data: &'a [u8], offset: usize) -> Self { + Self { data, pos: offset } + } + + pub fn read_bytes(&mut self, len: usize) -> Option<&'a [u8]> { + if self.pos + len <=3D self.data.len() { + let bytes =3D &self.data[self.pos..self.pos + len]; + self.pos +=3D len; + Some(bytes) + } else { + None + } + } + + pub fn read_cstring(&mut self, max_len: usize) -> Option { + let bytes =3D self.read_bytes(max_len)?; + if let Some(null_pos) =3D bytes.iter().position(|&b| b =3D=3D 0) { + if null_pos > 0 { + if let Ok(s) =3D std::str::from_utf8(&bytes[..null_pos]) { + return Some(s.to_string()); + } + } + } + None + } + + pub fn read_u32(&mut self) -> Option { + let bytes =3D self.read_bytes(4)?; + Some(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])) + } + + pub fn skip(&mut self, len: usize) { + self.pos =3D (self.pos + len).min(self.data.len()); + } +} + +#[allow(dead_code)] +pub fn parse_context_flags(flags: u32, formatter: &mut dyn OutputFormatter= , w: &mut dyn Write) -> Result<()> { + // Context flags from kernel headers + const KAPI_CTX_PROCESS: u32 =3D 1 << 0; + const KAPI_CTX_SOFTIRQ: u32 =3D 1 << 1; + const KAPI_CTX_HARDIRQ: u32 =3D 1 << 2; + const KAPI_CTX_NMI: u32 =3D 1 << 3; + const KAPI_CTX_USER: u32 =3D 1 << 4; + const KAPI_CTX_KERNEL: u32 =3D 1 << 5; + const KAPI_CTX_SLEEPABLE: u32 =3D 1 << 6; + const KAPI_CTX_ATOMIC: u32 =3D 1 << 7; + const KAPI_CTX_PREEMPTIBLE: u32 =3D 1 << 8; + const KAPI_CTX_MIGRATION_DISABLED: u32 =3D 1 << 9; + + if flags & KAPI_CTX_PROCESS !=3D 0 { formatter.context_flag(w, "Proces= s context")?; } + if flags & KAPI_CTX_SOFTIRQ !=3D 0 { formatter.context_flag(w, "Softir= q context")?; } + if flags & KAPI_CTX_HARDIRQ !=3D 0 { formatter.context_flag(w, "Hardir= q context")?; } + if flags & KAPI_CTX_NMI !=3D 0 { formatter.context_flag(w, "NMI contex= t")?; } + if flags & KAPI_CTX_USER !=3D 0 { formatter.context_flag(w, "User mode= ")?; } + if flags & KAPI_CTX_KERNEL !=3D 0 { formatter.context_flag(w, "Kernel = mode")?; } + if flags & KAPI_CTX_SLEEPABLE !=3D 0 { formatter.context_flag(w, "May = sleep")?; } + if flags & KAPI_CTX_ATOMIC !=3D 0 { formatter.context_flag(w, "Atomic = context")?; } + if flags & KAPI_CTX_PREEMPTIBLE !=3D 0 { formatter.context_flag(w, "Pr= eemptible")?; } + if flags & KAPI_CTX_MIGRATION_DISABLED !=3D 0 { formatter.context_flag= (w, "Migration disabled")?; } + + Ok(()) +} + +// Structure layout definitions for calculating sizes +pub fn param_spec_layout_size() -> usize { + // Packed structure + sizes::NAME * 2 + // name, type_name + 4 + 4 + // type, flags + 8 + 8 + // size, alignment + 8 + 8 + // min_value, max_value + 8 + // valid_mask + 8 + // enum_values pointer + 4 + 4 + // enum_count, constraint_type + 8 + // validate pointer + sizes::DESC * 2 + // description, constraints + 4 + 8 // size_param_idx, size_multiplier +} + +pub fn return_spec_layout_size() -> usize { + // Packed structure + sizes::NAME + // type_name + 4 + 4 + // type, check_type + 8 + 8 + 8 + // success_value, success_min, success_max + 8 + // error_values pointer + 4 + // error_count + 8 + // is_success pointer + sizes::DESC // description +} + +pub fn error_spec_layout_size() -> usize { + // Packed structure + 4 + // code + sizes::NAME + // name + sizes::DESC * 2 // condition, description +} + +pub fn lock_spec_layout_size() -> usize { + // Packed structure + sizes::NAME + // name + 4 + // lock_type + 1 + 1 + 1 + 1 + // bools + sizes::DESC // description +} + +pub fn constraint_spec_layout_size() -> usize { + // Packed structure + sizes::NAME + // name + sizes::DESC * 2 // description, expression +} \ No newline at end of file diff --git a/tools/kapi/src/extractor/vmlinux/mod.rs b/tools/kapi/src/extra= ctor/vmlinux/mod.rs new file mode 100644 index 0000000000000..5d5ca413d77a2 --- /dev/null +++ b/tools/kapi/src/extractor/vmlinux/mod.rs @@ -0,0 +1,372 @@ +use anyhow::{Context, Result}; +use goblin::elf::Elf; +use std::fs; +use std::io::Write; +use crate::formatter::OutputFormatter; +use super::{ApiExtractor, ApiSpec}; + +mod binary_utils; +use binary_utils::{sizes, DataReader, + param_spec_layout_size, return_spec_layout_size, error_spec_layout_siz= e, + lock_spec_layout_size, constraint_spec_layout_size}; + +pub struct VmlinuxExtractor { + kapi_data: Vec, + specs: Vec, +} + +#[derive(Debug)] +struct KapiSpec { + name: String, + api_type: String, + offset: usize, +} + +impl VmlinuxExtractor { + pub fn new(vmlinux_path: String) -> Result { + let vmlinux_data =3D fs::read(&vmlinux_path) + .with_context(|| format!("Failed to read vmlinux file: {}", vm= linux_path))?; + + let elf =3D Elf::parse(&vmlinux_data) + .context("Failed to parse ELF file")?; + + // Find the .kapi_specs section + let kapi_section =3D elf.section_headers + .iter() + .find(|sh| { + if let Some(name) =3D elf.shdr_strtab.get_at(sh.sh_name) { + name =3D=3D ".kapi_specs" + } else { + false + } + }) + .context("Could not find .kapi_specs section in vmlinux")?; + + // Find __start_kapi_specs and __stop_kapi_specs symbols + let mut start_addr =3D None; + let mut stop_addr =3D None; + + for sym in &elf.syms { + if let Some(name) =3D elf.strtab.get_at(sym.st_name) { + match name { + "__start_kapi_specs" =3D> start_addr =3D Some(sym.st_v= alue), + "__stop_kapi_specs" =3D> stop_addr =3D Some(sym.st_val= ue), + _ =3D> {} + } + } + } + + let start =3D start_addr.context("Could not find __start_kapi_spec= s symbol")?; + let stop =3D stop_addr.context("Could not find __stop_kapi_specs s= ymbol")?; + + if stop <=3D start { + anyhow::bail!("No kernel API specifications found in vmlinux"); + } + + // Calculate the offset within the file + let section_vaddr =3D kapi_section.sh_addr; + let file_offset =3D kapi_section.sh_offset + (start - section_vadd= r); + let data_size =3D (stop - start) as usize; + + if file_offset as usize + data_size > vmlinux_data.len() { + anyhow::bail!("Invalid offset/size for .kapi_specs data"); + } + + // Extract the raw data + let kapi_data =3D vmlinux_data[file_offset as usize..(file_offset = as usize + data_size)].to_vec(); + + // Parse the specifications + let specs =3D parse_kapi_specs(&kapi_data)?; + + Ok(VmlinuxExtractor { + kapi_data, + specs, + }) + } + +} + +impl ApiExtractor for VmlinuxExtractor { + fn extract_all(&self) -> Result> { + // For vmlinux extractor, we return basic info only + // Detailed parsing happens in display_api_details + Ok(self.specs.iter().map(|spec| { + ApiSpec { + name: spec.name.clone(), + api_type: spec.api_type.clone(), + description: None, + long_description: None, + version: None, + context_flags: vec![], + param_count: None, + error_count: None, + examples: None, + notes: None, + since_version: None, + } + }).collect()) + } + + fn extract_by_name(&self, name: &str) -> Result> { + Ok(self.specs.iter() + .find(|s| s.name =3D=3D name) + .map(|spec| ApiSpec { + name: spec.name.clone(), + api_type: spec.api_type.clone(), + description: None, + long_description: None, + version: None, + context_flags: vec![], + param_count: None, + error_count: None, + examples: None, + notes: None, + since_version: None, + })) + } + + fn display_api_details( + &self, + api_name: &str, + formatter: &mut dyn OutputFormatter, + writer: &mut dyn Write, + ) -> Result<()> { + if let Some(spec) =3D self.specs.iter().find(|s| s.name =3D=3D api= _name) { + // Parse the binary data into an ApiSpec + let api_spec =3D parse_binary_to_api_spec(&self.kapi_data, spe= c.offset)?; + // Use the common display function + super::display_api_spec(&api_spec, formatter, writer)?; + } + Ok(()) + } +} + +fn parse_kapi_specs(data: &[u8]) -> Result> { + let mut specs =3D Vec::new(); + + // The kernel_api_spec struct size in the kernel is 308064 bytes + // This is calculated as sizeof(struct kernel_api_spec) which includes: + // - Basic fields (name, version, description, etc.) + // - Arrays for parameters, errors, locks, constraints + // - Additional metadata fields + // TODO: This should ideally be read from kernel headers or made confi= gurable + let struct_size =3D 308064; + + let mut offset =3D 0; + while offset + struct_size <=3D data.len() { + // Try to read the name at this offset + if let Some(name) =3D read_cstring(data, offset, 128) { + if is_valid_api_name(&name) { + let api_type =3D if name.starts_with("sys_") { + "syscall" + } else if name.contains("ioctl") || name.contains("IOCTL")= { + "ioctl" + } else { + "other" + }; + + specs.push(KapiSpec { + name: name.to_string(), + api_type: api_type.to_string(), + offset, + }); + } + } + + offset +=3D struct_size; + } + + // Handle any remaining data that might be a partial spec + if offset < data.len() && data.len() - offset >=3D 128 { + if let Some(name) =3D read_cstring(data, offset, 128) { + if is_valid_api_name(&name) { + let api_type =3D if name.starts_with("sys_") { + "syscall" + } else if name.contains("ioctl") || name.contains("IOCTL")= { + "ioctl" + } else { + "other" + }; + + specs.push(KapiSpec { + name: name.to_string(), + api_type: api_type.to_string(), + offset, + }); + } + } + } + + Ok(specs) +} + +fn read_cstring(data: &[u8], offset: usize, max_len: usize) -> Option { + if offset + max_len > data.len() { + return None; + } + + let bytes =3D &data[offset..offset + max_len]; + if let Some(null_pos) =3D bytes.iter().position(|&b| b =3D=3D 0) { + if null_pos > 0 { + if let Ok(s) =3D std::str::from_utf8(&bytes[..null_pos]) { + return Some(s.to_string()); + } + } + } + None +} + +fn is_valid_api_name(name: &str) -> bool { + if name.is_empty() || name.len() > 100 { + return false; + } + + name.chars().all(|c| c.is_ascii_alphanumeric() || c =3D=3D '_') + && (name.starts_with("sys_") + || name.contains("ioctl") + || name.contains("IOCTL") + || name.starts_with("do_") + || name.starts_with("__")) +} + +fn parse_binary_to_api_spec(data: &[u8], offset: usize) -> Result= { + let mut reader =3D DataReader::new(data, offset); + + // Read name + let name =3D reader.read_cstring(sizes::NAME) + .ok_or_else(|| anyhow::anyhow!("Failed to read API name"))?; + + // Read version + let version =3D reader.read_u32() + .map(|v| v.to_string()); + + // Read description + let description =3D reader.read_cstring(sizes::DESC) + .filter(|s| !s.is_empty()); + + // Read long description + let long_description =3D reader.read_cstring(sizes::DESC * 4) + .filter(|s| !s.is_empty()); + + // Read context flags + let context_flags =3D if let Some(flags) =3D reader.read_u32() { + let mut flag_strings =3D Vec::new(); + + const KAPI_CTX_PROCESS: u32 =3D 1 << 0; + const KAPI_CTX_SOFTIRQ: u32 =3D 1 << 1; + const KAPI_CTX_HARDIRQ: u32 =3D 1 << 2; + const KAPI_CTX_NMI: u32 =3D 1 << 3; + const KAPI_CTX_USER: u32 =3D 1 << 4; + const KAPI_CTX_KERNEL: u32 =3D 1 << 5; + const KAPI_CTX_SLEEPABLE: u32 =3D 1 << 6; + const KAPI_CTX_ATOMIC: u32 =3D 1 << 7; + const KAPI_CTX_PREEMPTIBLE: u32 =3D 1 << 8; + const KAPI_CTX_MIGRATION_DISABLED: u32 =3D 1 << 9; + + // Build the flag string similar to source format + let mut parts =3D Vec::new(); + if flags & KAPI_CTX_PROCESS !=3D 0 { parts.push("KAPI_CTX_PROCESS"= ); } + if flags & KAPI_CTX_SOFTIRQ !=3D 0 { parts.push("KAPI_CTX_SOFTIRQ"= ); } + if flags & KAPI_CTX_HARDIRQ !=3D 0 { parts.push("KAPI_CTX_HARDIRQ"= ); } + if flags & KAPI_CTX_NMI !=3D 0 { parts.push("KAPI_CTX_NMI"); } + if flags & KAPI_CTX_USER !=3D 0 { parts.push("KAPI_CTX_USER"); } + if flags & KAPI_CTX_KERNEL !=3D 0 { parts.push("KAPI_CTX_KERNEL");= } + if flags & KAPI_CTX_SLEEPABLE !=3D 0 { parts.push("KAPI_CTX_SLEEPA= BLE"); } + if flags & KAPI_CTX_ATOMIC !=3D 0 { parts.push("KAPI_CTX_ATOMIC");= } + if flags & KAPI_CTX_PREEMPTIBLE !=3D 0 { parts.push("KAPI_CTX_PREE= MPTIBLE"); } + if flags & KAPI_CTX_MIGRATION_DISABLED !=3D 0 { parts.push("KAPI_C= TX_MIGRATION_DISABLED"); } + + if !parts.is_empty() { + flag_strings.push(parts.join(" | ")); + } + flag_strings + } else { + vec![] + }; + + // Read parameter count + let param_count =3D reader.read_u32(); + + // Skip parameters for now (to match source output) + if let Some(count) =3D param_count { + if count > 0 && count <=3D sizes::MAX_PARAMS as u32 { + reader.skip(param_spec_layout_size() * count as usize); + reader.skip(param_spec_layout_size() * (sizes::MAX_PARAMS - co= unt as usize)); + } else { + reader.skip(param_spec_layout_size() * sizes::MAX_PARAMS); + } + } + + // Skip return spec + reader.skip(return_spec_layout_size()); + + // Read error count + let error_count =3D reader.read_u32(); + + // Skip errors + if let Some(count) =3D error_count { + if count > 0 && count <=3D sizes::MAX_ERRORS as u32 { + reader.skip(error_spec_layout_size() * count as usize); + reader.skip(error_spec_layout_size() * (sizes::MAX_ERRORS - co= unt as usize)); + } else { + reader.skip(error_spec_layout_size() * sizes::MAX_ERRORS); + } + } + + // Skip locks + if let Some(lock_count) =3D reader.read_u32() { + if lock_count > 0 && lock_count <=3D sizes::MAX_CONSTRAINTS as u32= { + reader.skip(lock_spec_layout_size() * lock_count as usize); + reader.skip(lock_spec_layout_size() * (sizes::MAX_CONSTRAINTS = - lock_count as usize)); + } else { + reader.skip(lock_spec_layout_size() * sizes::MAX_CONSTRAINTS); + } + } + + // Skip constraints + if let Some(constraint_count) =3D reader.read_u32() { + if constraint_count > 0 && constraint_count <=3D sizes::MAX_CONSTR= AINTS as u32 { + reader.skip(constraint_spec_layout_size() * constraint_count a= s usize); + reader.skip(constraint_spec_layout_size() * (sizes::MAX_CONSTR= AINTS - constraint_count as usize)); + } else { + reader.skip(constraint_spec_layout_size() * sizes::MAX_CONSTRA= INTS); + } + } + + // Read examples + let examples =3D reader.read_cstring(sizes::DESC * 2) + .filter(|s| !s.is_empty()); + + // Read notes + let notes =3D reader.read_cstring(sizes::DESC) + .filter(|s| !s.is_empty()); + + // Read since_version + let since_version =3D reader.read_cstring(32) + .filter(|s| !s.is_empty()); + + // Determine API type from name + let api_type =3D if name.starts_with("sys_") { + "syscall" + } else if name.contains("ioctl") || name.contains("IOCTL") { + "ioctl" + } else { + "other" + }.to_string(); + + Ok(ApiSpec { + name, + api_type, + description, + long_description, + version, + context_flags, + param_count, + error_count, + examples, + notes, + since_version, + }) +} + +// Old display_api_details_from_binary function removed - now using parse_= binary_to_api_spec + display_api_spec \ No newline at end of file diff --git a/tools/kapi/src/formatter/json.rs b/tools/kapi/src/formatter/js= on.rs new file mode 100644 index 0000000000000..44d2bbfc91133 --- /dev/null +++ b/tools/kapi/src/formatter/json.rs @@ -0,0 +1,170 @@ +use super::OutputFormatter; +use std::io::Write; +use serde::Serialize; + +pub struct JsonFormatter { + data: JsonData, +} + +#[derive(Serialize)] +struct JsonData { + #[serde(skip_serializing_if =3D "Option::is_none")] + apis: Option>, + #[serde(skip_serializing_if =3D "Option::is_none")] + api_details: Option, +} + +#[derive(Serialize)] +struct JsonApi { + name: String, + api_type: String, +} + +#[derive(Serialize)] +struct JsonApiDetails { + name: String, + #[serde(skip_serializing_if =3D "Option::is_none")] + description: Option, + #[serde(skip_serializing_if =3D "Option::is_none")] + long_description: Option, + #[serde(skip_serializing_if =3D "Vec::is_empty")] + context_flags: Vec, + #[serde(skip_serializing_if =3D "Option::is_none")] + examples: Option, + #[serde(skip_serializing_if =3D "Option::is_none")] + notes: Option, + #[serde(skip_serializing_if =3D "Option::is_none")] + since_version: Option, +} + + +impl JsonFormatter { + pub fn new() -> Self { + JsonFormatter { + data: JsonData { + apis: None, + api_details: None, + } + } + } +} + +impl OutputFormatter for JsonFormatter { + fn begin_document(&mut self, _w: &mut dyn Write) -> std::io::Result<()= > { + Ok(()) + } + + fn end_document(&mut self, w: &mut dyn Write) -> std::io::Result<()> { + let json =3D serde_json::to_string_pretty(&self.data)?; + writeln!(w, "{}", json)?; + Ok(()) + } + + fn begin_api_list(&mut self, _w: &mut dyn Write, _title: &str) -> std:= :io::Result<()> { + self.data.apis =3D Some(Vec::new()); + Ok(()) + } + + fn api_item(&mut self, _w: &mut dyn Write, name: &str, api_type: &str)= -> std::io::Result<()> { + if let Some(apis) =3D &mut self.data.apis { + apis.push(JsonApi { + name: name.to_string(), + api_type: api_type.to_string(), + }); + } + Ok(()) + } + + fn end_api_list(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn total_specs(&mut self, _w: &mut dyn Write, _count: usize) -> std::i= o::Result<()> { + Ok(()) + } + + fn begin_api_details(&mut self, _w: &mut dyn Write, name: &str) -> std= ::io::Result<()> { + self.data.api_details =3D Some(JsonApiDetails { + name: name.to_string(), + description: None, + long_description: None, + context_flags: Vec::new(), + examples: None, + notes: None, + since_version: None, + }); + Ok(()) + } + + fn end_api_details(&mut self, _w: &mut dyn Write) -> std::io::Result<(= )> { + Ok(()) + } + + + fn description(&mut self, _w: &mut dyn Write, desc: &str) -> std::io::= Result<()> { + if let Some(details) =3D &mut self.data.api_details { + details.description =3D Some(desc.to_string()); + } + Ok(()) + } + + fn long_description(&mut self, _w: &mut dyn Write, desc: &str) -> std:= :io::Result<()> { + if let Some(details) =3D &mut self.data.api_details { + details.long_description =3D Some(desc.to_string()); + } + Ok(()) + } + + fn begin_context_flags(&mut self, _w: &mut dyn Write) -> std::io::Resu= lt<()> { + Ok(()) + } + + fn context_flag(&mut self, _w: &mut dyn Write, flag: &str) -> std::io:= :Result<()> { + if let Some(details) =3D &mut self.data.api_details { + details.context_flags.push(flag.to_string()); + } + Ok(()) + } + + fn end_context_flags(&mut self, _w: &mut dyn Write) -> std::io::Result= <()> { + Ok(()) + } + + fn begin_parameters(&mut self, _w: &mut dyn Write, _count: u32) -> std= ::io::Result<()> { + Ok(()) + } + + + fn end_parameters(&mut self, _w: &mut dyn Write) -> std::io::Result<()= > { + Ok(()) + } + + fn begin_errors(&mut self, _w: &mut dyn Write, _count: u32) -> std::io= ::Result<()> { + Ok(()) + } + + fn end_errors(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn examples(&mut self, _w: &mut dyn Write, examples: &str) -> std::io:= :Result<()> { + if let Some(details) =3D &mut self.data.api_details { + details.examples =3D Some(examples.to_string()); + } + Ok(()) + } + + fn notes(&mut self, _w: &mut dyn Write, notes: &str) -> std::io::Resul= t<()> { + if let Some(details) =3D &mut self.data.api_details { + details.notes =3D Some(notes.to_string()); + } + Ok(()) + } + + fn since_version(&mut self, _w: &mut dyn Write, version: &str) -> std:= :io::Result<()> { + if let Some(details) =3D &mut self.data.api_details { + details.since_version =3D Some(version.to_string()); + } + Ok(()) + } +} \ No newline at end of file diff --git a/tools/kapi/src/formatter/mod.rs b/tools/kapi/src/formatter/mod= .rs new file mode 100644 index 0000000000000..6eb42e8b404d0 --- /dev/null +++ b/tools/kapi/src/formatter/mod.rs @@ -0,0 +1,68 @@ +use std::io::Write; + +mod plain; +mod json; +mod rst; + +pub use plain::PlainFormatter; +pub use json::JsonFormatter; +pub use rst::RstFormatter; + + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum OutputFormat { + Plain, + Json, + Rst, +} + +impl std::str::FromStr for OutputFormat { + type Err =3D String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "plain" =3D> Ok(OutputFormat::Plain), + "json" =3D> Ok(OutputFormat::Json), + "rst" =3D> Ok(OutputFormat::Rst), + _ =3D> Err(format!("Unknown output format: {}", s)), + } + } +} + +pub trait OutputFormatter { + fn begin_document(&mut self, w: &mut dyn Write) -> std::io::Result<()>; + fn end_document(&mut self, w: &mut dyn Write) -> std::io::Result<()>; + + fn begin_api_list(&mut self, w: &mut dyn Write, title: &str) -> std::i= o::Result<()>; + fn api_item(&mut self, w: &mut dyn Write, name: &str, api_type: &str) = -> std::io::Result<()>; + fn end_api_list(&mut self, w: &mut dyn Write) -> std::io::Result<()>; + + fn total_specs(&mut self, w: &mut dyn Write, count: usize) -> std::io:= :Result<()>; + + fn begin_api_details(&mut self, w: &mut dyn Write, name: &str) -> std:= :io::Result<()>; + fn end_api_details(&mut self, w: &mut dyn Write) -> std::io::Result<()= >; + fn description(&mut self, w: &mut dyn Write, desc: &str) -> std::io::R= esult<()>; + fn long_description(&mut self, w: &mut dyn Write, desc: &str) -> std::= io::Result<()>; + + fn begin_context_flags(&mut self, w: &mut dyn Write) -> std::io::Resul= t<()>; + fn context_flag(&mut self, w: &mut dyn Write, flag: &str) -> std::io::= Result<()>; + fn end_context_flags(&mut self, w: &mut dyn Write) -> std::io::Result<= ()>; + + fn begin_parameters(&mut self, w: &mut dyn Write, count: u32) -> std::= io::Result<()>; + fn end_parameters(&mut self, w: &mut dyn Write) -> std::io::Result<()>; + + fn begin_errors(&mut self, w: &mut dyn Write, count: u32) -> std::io::= Result<()>; + fn end_errors(&mut self, w: &mut dyn Write) -> std::io::Result<()>; + + fn examples(&mut self, w: &mut dyn Write, examples: &str) -> std::io::= Result<()>; + fn notes(&mut self, w: &mut dyn Write, notes: &str) -> std::io::Result= <()>; + fn since_version(&mut self, w: &mut dyn Write, version: &str) -> std::= io::Result<()>; +} + +pub fn create_formatter(format: OutputFormat) -> Box { + match format { + OutputFormat::Plain =3D> Box::new(PlainFormatter::new()), + OutputFormat::Json =3D> Box::new(JsonFormatter::new()), + OutputFormat::Rst =3D> Box::new(RstFormatter::new()), + } +} \ No newline at end of file diff --git a/tools/kapi/src/formatter/plain.rs b/tools/kapi/src/formatter/p= lain.rs new file mode 100644 index 0000000000000..4ccbfcbbc8416 --- /dev/null +++ b/tools/kapi/src/formatter/plain.rs @@ -0,0 +1,99 @@ +use super::OutputFormatter; +use std::io::Write; + +pub struct PlainFormatter; + +impl PlainFormatter { + pub fn new() -> Self { + PlainFormatter + } +} + +impl OutputFormatter for PlainFormatter { + fn begin_document(&mut self, _w: &mut dyn Write) -> std::io::Result<()= > { + Ok(()) + } + + fn end_document(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn begin_api_list(&mut self, w: &mut dyn Write, title: &str) -> std::i= o::Result<()> { + writeln!(w, "\n{}:", title)?; + writeln!(w, "{}", "-".repeat(title.len() + 1)) + } + + fn api_item(&mut self, w: &mut dyn Write, name: &str, _api_type: &str)= -> std::io::Result<()> { + writeln!(w, " {}", name) + } + + fn end_api_list(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn total_specs(&mut self, w: &mut dyn Write, count: usize) -> std::io:= :Result<()> { + writeln!(w, "\nTotal specifications found: {}", count) + } + + fn begin_api_details(&mut self, w: &mut dyn Write, name: &str) -> std:= :io::Result<()> { + writeln!(w, "\nDetailed information for {}:", name)?; + writeln!(w, "{}=3D", "=3D".repeat(25 + name.len())) + } + + fn end_api_details(&mut self, _w: &mut dyn Write) -> std::io::Result<(= )> { + Ok(()) + } + + + fn description(&mut self, w: &mut dyn Write, desc: &str) -> std::io::R= esult<()> { + writeln!(w, "Description: {}", desc) + } + + fn long_description(&mut self, w: &mut dyn Write, desc: &str) -> std::= io::Result<()> { + writeln!(w, "\nDetailed Description:")?; + writeln!(w, "{}", desc) + } + + fn begin_context_flags(&mut self, w: &mut dyn Write) -> std::io::Resul= t<()> { + writeln!(w, "\nExecution Context:") + } + + fn context_flag(&mut self, w: &mut dyn Write, flag: &str) -> std::io::= Result<()> { + writeln!(w, " - {}", flag) + } + + fn end_context_flags(&mut self, _w: &mut dyn Write) -> std::io::Result= <()> { + Ok(()) + } + + fn begin_parameters(&mut self, w: &mut dyn Write, count: u32) -> std::= io::Result<()> { + writeln!(w, "\nParameters ({}):", count) + } + + + fn end_parameters(&mut self, _w: &mut dyn Write) -> std::io::Result<()= > { + Ok(()) + } + + fn begin_errors(&mut self, w: &mut dyn Write, count: u32) -> std::io::= Result<()> { + writeln!(w, "\nPossible Errors ({}):", count) + } + + fn end_errors(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn examples(&mut self, w: &mut dyn Write, examples: &str) -> std::io::= Result<()> { + writeln!(w, "\nExamples:")?; + writeln!(w, "{}", examples) + } + + fn notes(&mut self, w: &mut dyn Write, notes: &str) -> std::io::Result= <()> { + writeln!(w, "\nNotes:")?; + writeln!(w, "{}", notes) + } + + fn since_version(&mut self, w: &mut dyn Write, version: &str) -> std::= io::Result<()> { + writeln!(w, "\nAvailable since: {}", version) + } +} \ No newline at end of file diff --git a/tools/kapi/src/formatter/rst.rs b/tools/kapi/src/formatter/rst= .rs new file mode 100644 index 0000000000000..96be83bf208dd --- /dev/null +++ b/tools/kapi/src/formatter/rst.rs @@ -0,0 +1,144 @@ +use super::OutputFormatter; +use std::io::Write; + +pub struct RstFormatter { + current_section_level: usize, +} + +impl RstFormatter { + pub fn new() -> Self { + RstFormatter { + current_section_level: 0, + } + } + + fn section_char(&self, level: usize) -> char { + match level { + 0 =3D> '=3D', + 1 =3D> '-', + 2 =3D> '~', + 3 =3D> '^', + _ =3D> '"', + } + } +} + +impl OutputFormatter for RstFormatter { + fn begin_document(&mut self, _w: &mut dyn Write) -> std::io::Result<()= > { + Ok(()) + } + + fn end_document(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn begin_api_list(&mut self, w: &mut dyn Write, title: &str) -> std::i= o::Result<()> { + writeln!(w, "\n{}", title)?; + writeln!(w, "{}", self.section_char(0).to_string().repeat(title.le= n()))?; + writeln!(w) + } + + fn api_item(&mut self, w: &mut dyn Write, name: &str, api_type: &str) = -> std::io::Result<()> { + writeln!(w, "* **{}** (*{}*)", name, api_type) + } + + fn end_api_list(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn total_specs(&mut self, w: &mut dyn Write, count: usize) -> std::io:= :Result<()> { + writeln!(w, "\n**Total specifications found:** {}", count) + } + + fn begin_api_details(&mut self, w: &mut dyn Write, name: &str) -> std:= :io::Result<()> { + self.current_section_level =3D 0; + writeln!(w, "\n{}", name)?; + writeln!(w, "{}", self.section_char(0).to_string().repeat(name.len= ()))?; + writeln!(w) + } + + fn end_api_details(&mut self, _w: &mut dyn Write) -> std::io::Result<(= )> { + Ok(()) + } + + + fn description(&mut self, w: &mut dyn Write, desc: &str) -> std::io::R= esult<()> { + writeln!(w, "**{}**", desc)?; + writeln!(w) + } + + fn long_description(&mut self, w: &mut dyn Write, desc: &str) -> std::= io::Result<()> { + writeln!(w, "{}", desc)?; + writeln!(w) + } + + fn begin_context_flags(&mut self, w: &mut dyn Write) -> std::io::Resul= t<()> { + self.current_section_level =3D 1; + let title =3D "Execution Context"; + writeln!(w, "{}", title)?; + writeln!(w, "{}", self.section_char(1).to_string().repeat(title.le= n()))?; + writeln!(w) + } + + fn context_flag(&mut self, w: &mut dyn Write, flag: &str) -> std::io::= Result<()> { + writeln!(w, "* {}", flag) + } + + fn end_context_flags(&mut self, w: &mut dyn Write) -> std::io::Result<= ()> { + writeln!(w) + } + + fn begin_parameters(&mut self, w: &mut dyn Write, count: u32) -> std::= io::Result<()> { + self.current_section_level =3D 1; + let title =3D format!("Parameters ({})", count); + writeln!(w, "{}", title)?; + writeln!(w, "{}", self.section_char(1).to_string().repeat(title.le= n()))?; + writeln!(w) + } + + + fn end_parameters(&mut self, _w: &mut dyn Write) -> std::io::Result<()= > { + Ok(()) + } + + fn begin_errors(&mut self, w: &mut dyn Write, count: u32) -> std::io::= Result<()> { + self.current_section_level =3D 1; + let title =3D format!("Possible Errors ({})", count); + writeln!(w, "{}", title)?; + writeln!(w, "{}", self.section_char(1).to_string().repeat(title.le= n()))?; + writeln!(w) + } + + fn end_errors(&mut self, _w: &mut dyn Write) -> std::io::Result<()> { + Ok(()) + } + + fn examples(&mut self, w: &mut dyn Write, examples: &str) -> std::io::= Result<()> { + self.current_section_level =3D 1; + let title =3D "Examples"; + writeln!(w, "{}", title)?; + writeln!(w, "{}", self.section_char(1).to_string().repeat(title.le= n()))?; + writeln!(w)?; + writeln!(w, ".. code-block:: c")?; + writeln!(w)?; + for line in examples.lines() { + writeln!(w, " {}", line)?; + } + writeln!(w) + } + + fn notes(&mut self, w: &mut dyn Write, notes: &str) -> std::io::Result= <()> { + self.current_section_level =3D 1; + let title =3D "Notes"; + writeln!(w, "{}", title)?; + writeln!(w, "{}", self.section_char(1).to_string().repeat(title.le= n()))?; + writeln!(w)?; + writeln!(w, "{}", notes)?; + writeln!(w) + } + + fn since_version(&mut self, w: &mut dyn Write, version: &str) -> std::= io::Result<()> { + writeln!(w, ":Available since: {}", version)?; + writeln!(w) + } +} \ No newline at end of file diff --git a/tools/kapi/src/main.rs b/tools/kapi/src/main.rs new file mode 100644 index 0000000000000..9d6533cbc7dd1 --- /dev/null +++ b/tools/kapi/src/main.rs @@ -0,0 +1,121 @@ +//! kapi - Kernel API Specification Tool +//! +//! This tool extracts and displays kernel API specifications from multipl= e sources: +//! - Kernel source code (KAPI macros) +//! - Compiled vmlinux binaries (.kapi_specs ELF section) +//! - Running kernel via debugfs + +use anyhow::Result; +use clap::Parser; +use std::io::{self, Write}; + +mod formatter; +mod extractor; + +use formatter::{OutputFormat, create_formatter}; +use extractor::{ApiExtractor, VmlinuxExtractor, SourceExtractor, DebugfsEx= tractor}; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about =3D None)] +struct Args { + /// Path to the vmlinux file + #[arg(long, value_name =3D "PATH", group =3D "input")] + vmlinux: Option, + + /// Path to kernel source directory or file + #[arg(long, value_name =3D "PATH", group =3D "input")] + source: Option, + + /// Path to debugfs (defaults to /sys/kernel/debug if not specified) + #[arg(long, value_name =3D "PATH", group =3D "input")] + debugfs: Option, + + /// Optional: Name of specific API to show details for + api_name: Option, + + /// Output format + #[arg(long, short =3D 'f', default_value =3D "plain")] + format: String, +} + +fn main() -> Result<()> { + let args =3D Args::parse(); + + let output_format: OutputFormat =3D args.format.parse() + .map_err(|e: String| anyhow::anyhow!(e))?; + + let extractor: Box =3D match (args.vmlinux, args.sou= rce, args.debugfs.clone()) { + (Some(vmlinux_path), None, None) =3D> { + Box::new(VmlinuxExtractor::new(vmlinux_path)?) + } + (None, Some(source_path), None) =3D> { + Box::new(SourceExtractor::new(source_path)?) + } + (None, None, Some(_)) | (None, None, None) =3D> { + // If debugfs is specified or no input is provided, use debugfs + Box::new(DebugfsExtractor::new(args.debugfs)?) + } + _ =3D> { + anyhow::bail!("Please specify only one of --vmlinux, --source,= or --debugfs") + } + }; + + display_apis(extractor.as_ref(), args.api_name, output_format) +} + +fn display_apis(extractor: &dyn ApiExtractor, api_name: Option, ou= tput_format: OutputFormat) -> Result<()> { + let mut formatter =3D create_formatter(output_format); + let mut stdout =3D io::stdout(); + + formatter.begin_document(&mut stdout)?; + + if let Some(api_name_req) =3D api_name { + // Use the extractor to display API details + if let Some(_spec) =3D extractor.extract_by_name(&api_name_req)? { + extractor.display_api_details(&api_name_req, &mut *formatter, = &mut stdout)?; + } else if output_format =3D=3D OutputFormat::Plain { + writeln!(stdout, "\nAPI '{}' not found.", api_name_req)?; + writeln!(stdout, "\nAvailable APIs:")?; + for spec in extractor.extract_all()? { + writeln!(stdout, " {} ({})", spec.name, spec.api_type)?; + } + } + } else { + // Display list of APIs using the extractor + let all_specs =3D extractor.extract_all()?; + let syscalls: Vec<_> =3D all_specs.iter().filter(|s| s.api_type = =3D=3D "syscall").collect(); + let ioctls: Vec<_> =3D all_specs.iter().filter(|s| s.api_type =3D= =3D "ioctl").collect(); + let functions: Vec<_> =3D all_specs.iter().filter(|s| s.api_type = =3D=3D "function").collect(); + + if !syscalls.is_empty() { + formatter.begin_api_list(&mut stdout, "System Calls")?; + for spec in syscalls { + formatter.api_item(&mut stdout, &spec.name, &spec.api_type= )?; + } + formatter.end_api_list(&mut stdout)?; + } + + if !ioctls.is_empty() { + formatter.begin_api_list(&mut stdout, "IOCTLs")?; + for spec in ioctls { + formatter.api_item(&mut stdout, &spec.name, &spec.api_type= )?; + } + formatter.end_api_list(&mut stdout)?; + } + + if !functions.is_empty() { + formatter.begin_api_list(&mut stdout, "Functions")?; + for spec in functions { + formatter.api_item(&mut stdout, &spec.name, &spec.api_type= )?; + } + formatter.end_api_list(&mut stdout)?; + } + + formatter.total_specs(&mut stdout, all_specs.len())?; + } + + formatter.end_document(&mut stdout)?; + + Ok(()) +} + --=20 2.39.5