From nobody Sun Feb 8 18:49:34 2026 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A34B73AC25 for ; Fri, 1 Mar 2024 16:38:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.129.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1709311124; cv=none; b=tF/Rj6/rtvUCokk3U3HfZttt0zmplL7O+QiYta/flaZCcrIZyqxLo5Tez5uXmlPDq/qIUTsbNJoZ10X6HYCYkFGpoOEjLPusJEKjmj2hIJTkhofmRVdZi9mco2ypbWmBXWLhD5eNdlUa51nILVvDf0mrHVeFiOJLbKO5Z3Gt/04= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1709311124; c=relaxed/simple; bh=bHAPEpzQhcJdunutbccJkA4/6xJwnaEM0dCpZ4ZhXVE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=iE4Q83kSQAbZEeobJLAztOiToLOJyYqpR7J1T1E5zLotgux3Irtrs8JVgyX+jDXHvOYabC/fxhAmFz4mvP9Us7SP735VGNu4CC6Bq5U/Y2WR4AvGtg4f/wgBUW7+vDLF2DnE7qpNE6woUdCA9S9hhb4ZsXcqUwHEOZZksRFSm6Q= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=CNXgsn7z; arc=none smtp.client-ip=170.10.129.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="CNXgsn7z" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1709311119; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=X6mTfawOhdIFVp9CCHZgH6IKLXr2HcHhQLCmewqWNgQ=; b=CNXgsn7zqJkRZhqwPpFjJ1N+D+x+cQ9O6HkGeiMywSfOlvoz/kMsJ8ha1MbWZ1VWMYyUnc 3FvMk1WlncjFhUE6a0heJ+BWmmQ4K22IyAfGpayE6bubXa3cLr+PuYoi2LFTksW8Lp9D2N qrXdP5Es2/+YvxMJCnROAa5a51w1eV4= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-323-ajzNL7x5NOWfssvUWBu7IQ-1; Fri, 01 Mar 2024 11:38:37 -0500 X-MC-Unique: ajzNL7x5NOWfssvUWBu7IQ-1 Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.rdu2.redhat.com [10.11.54.4]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 360433C2E0A2; Fri, 1 Mar 2024 16:38:37 +0000 (UTC) Received: from warthog.procyon.org.com (unknown [10.42.28.114]) by smtp.corp.redhat.com (Postfix) with ESMTP id D64FD2022AAC; Fri, 1 Mar 2024 16:38:35 +0000 (UTC) From: David Howells To: netdev@vger.kernel.org Cc: David Howells , Marc Dionne , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , linux-afs@lists.infradead.org, linux-kernel@vger.kernel.org Subject: [PATCH net-next 14/21] rxrpc: Do zerocopy using MSG_SPLICE_PAGES and page frags Date: Fri, 1 Mar 2024 16:37:46 +0000 Message-ID: <20240301163807.385573-15-dhowells@redhat.com> In-Reply-To: <20240301163807.385573-1-dhowells@redhat.com> References: <20240301163807.385573-1-dhowells@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.4 Content-Type: text/plain; charset="utf-8" Switch from keeping the transmission buffers in the rxrpc_txbuf struct and allocated from the slab, to allocating them using page fragment allocators (which uses raw pages), thereby allowing them to be passed to MSG_SPLICE_PAGES and avoid copying into the UDP buffers. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 32 +++---- net/rxrpc/conn_object.c | 4 + net/rxrpc/insecure.c | 11 +-- net/rxrpc/local_object.c | 3 + net/rxrpc/output.c | 66 +++++++------- net/rxrpc/rxkad.c | 47 +++++----- net/rxrpc/sendmsg.c | 22 ++--- net/rxrpc/txbuf.c | 180 ++++++++++++++++++++++++++++++--------- 8 files changed, 219 insertions(+), 146 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9ea4e7e9d9f7..47f4689379ca 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -248,10 +248,9 @@ struct rxrpc_security { struct rxrpc_key_token *); =20 /* Work out how much data we can store in a packet, given an estimate - * of the amount of data remaining. + * of the amount of data remaining and allocate a data buffer. */ - int (*how_much_data)(struct rxrpc_call *, size_t, - size_t *, size_t *, size_t *); + struct rxrpc_txbuf *(*alloc_txbuf)(struct rxrpc_call *call, size_t remain= ing, gfp_t gfp); =20 /* impose security on a packet */ int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *); @@ -292,6 +291,7 @@ struct rxrpc_local { struct socket *socket; /* my UDP socket */ struct task_struct *io_thread; struct completion io_thread_ready; /* Indication that the I/O thread star= ted */ + struct page_frag_cache tx_alloc; /* Tx control packet allocation (I/O thr= ead only) */ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY struct sk_buff_head rx_delay_queue; /* Delay injection queue */ @@ -500,6 +500,8 @@ struct rxrpc_connection { struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ + struct page_frag_cache tx_data_alloc; /* Tx DATA packet allocation */ + struct mutex tx_data_alloc_lock; =20 struct mutex security_lock; /* Lock for security management */ const struct rxrpc_security *security; /* applied security module */ @@ -788,7 +790,6 @@ struct rxrpc_send_params { * Buffer of data to be output as a packet. */ struct rxrpc_txbuf { - struct rcu_head rcu; struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */ struct list_head tx_link; /* Link in live Enc queue or Tx queue */ ktime_t last_sent; /* Time at which last transmitted */ @@ -806,22 +807,8 @@ struct rxrpc_txbuf { __be16 cksum; /* Checksum to go in header */ unsigned short ack_rwind; /* ACK receive window */ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ - u8 nr_kvec; - struct kvec kvec[1]; - struct { - /* The packet for encrypting and DMA'ing. We align it such - * that data[] aligns correctly for any crypto blocksize. - */ - u8 pad[64 - sizeof(struct rxrpc_wire_header)]; - struct rxrpc_wire_header _wire; /* Network-ready header */ - union { - u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */ - struct { - struct rxrpc_ackpacket _ack; - DECLARE_FLEX_ARRAY(u8, acks); - }; - }; - } __aligned(64); + u8 nr_kvec; /* Amount of kvec[] used */ + struct kvec kvec[3]; }; =20 static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) @@ -1299,8 +1286,9 @@ static inline void rxrpc_sysctl_exit(void) {} * txbuf.c */ extern atomic_t rxrpc_nr_txbuf; -struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_t= ype, - gfp_t gfp); +struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t= data_size, + size_t data_align, gfp_t gfp); +struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t = sack_size); void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index df8a271948a1..0af4642aeec4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -68,6 +68,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rx= rpc_net *rxnet, INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); mutex_init(&conn->security_lock); + mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); conn->rxnet =3D rxnet; conn->security =3D &rxrpc_no_security; @@ -341,6 +342,9 @@ static void rxrpc_clean_up_connection(struct work_struc= t *work) */ rxrpc_purge_queue(&conn->rx_queue); =20 + if (conn->tx_data_alloc.va) + __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va), + conn->tx_data_alloc.pagecnt_bias); call_rcu(&conn->rcu, rxrpc_rcu_free_connection); } =20 diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 34353b6e584b..f2701068ed9e 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -15,14 +15,11 @@ static int none_init_connection_security(struct rxrpc_c= onnection *conn, } =20 /* - * Work out how much data we can put in an unsecured packet. + * Allocate an appropriately sized buffer for the amount of data remaining. */ -static int none_how_much_data(struct rxrpc_call *call, size_t remain, - size_t *_buf_size, size_t *_data_size, size_t *_offset) +static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_= t remain, gfp_t gfp) { - *_buf_size =3D *_data_size =3D min_t(size_t, remain, RXRPC_JUMBO_DATALEN); - *_offset =3D 0; - return 0; + return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DAT= ALEN), 0, gfp); } =20 static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf = *txb) @@ -79,7 +76,7 @@ const struct rxrpc_security rxrpc_no_security =3D { .exit =3D none_exit, .init_connection_security =3D none_init_connection_security, .free_call_crypto =3D none_free_call_crypto, - .how_much_data =3D none_how_much_data, + .alloc_txbuf =3D none_alloc_txbuf, .secure_packet =3D none_secure_packet, .verify_packet =3D none_verify_packet, .respond_to_challenge =3D none_respond_to_challenge, diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 34d307368135..504453c688d7 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -452,6 +452,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local) #endif rxrpc_purge_queue(&local->rx_queue); rxrpc_purge_client_connections(local); + if (local->tx_alloc.va) + __page_frag_cache_drain(virt_to_page(local->tx_alloc.va), + local->tx_alloc.pagecnt_bias); } =20 /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index b84b40562e5b..0a317498b8e0 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -83,18 +83,16 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, rxrpc_serial_t serial) { struct rxrpc_wire_header *whdr =3D txb->kvec[0].iov_base; + struct rxrpc_acktrailer *trailer =3D txb->kvec[2].iov_base + 3; struct rxrpc_ackpacket *ack =3D (struct rxrpc_ackpacket *)(whdr + 1); - struct rxrpc_acktrailer trailer; unsigned int qsize, sack, wrap, to; rxrpc_seq_t window, wtop; int rsize; u32 mtu, jmax; - u8 *ackp =3D txb->acks; + u8 *filler =3D txb->kvec[2].iov_base; + u8 *sackp =3D txb->kvec[1].iov_base; =20 - call->ackr_nr_unacked =3D 0; - atomic_set(&call->ackr_nr_consumed, 0); rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); - clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); =20 window =3D call->ackr_window; wtop =3D call->ackr_wtop; @@ -110,20 +108,27 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *cal= l, ack->serial =3D htonl(serial); ack->reason =3D ack_reason; ack->nAcks =3D wtop - window; + filler[0] =3D 0; + filler[1] =3D 0; + filler[2] =3D 0; + + if (ack_reason =3D=3D RXRPC_ACK_PING) + txb->flags |=3D RXRPC_REQUEST_ACK; =20 if (after(wtop, window)) { + txb->len +=3D ack->nAcks; + txb->kvec[1].iov_base =3D sackp; + txb->kvec[1].iov_len =3D ack->nAcks; + wrap =3D RXRPC_SACK_SIZE - sack; to =3D min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE); =20 if (sack + ack->nAcks <=3D RXRPC_SACK_SIZE) { - memcpy(txb->acks, call->ackr_sack_table + sack, ack->nAcks); + memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks); } else { - memcpy(txb->acks, call->ackr_sack_table + sack, wrap); - memcpy(txb->acks + wrap, call->ackr_sack_table, - to - wrap); + memcpy(sackp, call->ackr_sack_table + sack, wrap); + memcpy(sackp + wrap, call->ackr_sack_table, to - wrap); } - - ackp +=3D to; } else if (before(wtop, window)) { pr_warn("ack window backward %x %x", window, wtop); } else if (ack->reason =3D=3D RXRPC_ACK_DELAY) { @@ -135,18 +140,11 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *cal= l, jmax =3D rxrpc_rx_jumbo_max; qsize =3D (window - 1) - call->rx_consumed; rsize =3D max_t(int, call->rx_winsize - qsize, 0); - txb->ack_rwind =3D rsize; - trailer.maxMTU =3D htonl(rxrpc_rx_mtu); - trailer.ifMTU =3D htonl(mtu); - trailer.rwind =3D htonl(rsize); - trailer.jumbo_max =3D htonl(jmax); - - *ackp++ =3D 0; - *ackp++ =3D 0; - *ackp++ =3D 0; - memcpy(ackp, &trailer, sizeof(trailer)); - txb->kvec[0].iov_len +=3D sizeof(*ack) + ack->nAcks + 3 + sizeof(trailer); - txb->len =3D txb->kvec[0].iov_len; + txb->ack_rwind =3D rsize; + trailer->maxMTU =3D htonl(rxrpc_rx_mtu); + trailer->ifMTU =3D htonl(mtu); + trailer->rwind =3D htonl(rsize); + trailer->jumbo_max =3D htonl(jmax); } =20 /* @@ -195,7 +193,7 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *c= all, /* * Transmit an ACK packet. */ -static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txb= uf *txb) +static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_tx= buf *txb) { struct rxrpc_wire_header *whdr =3D txb->kvec[0].iov_base; struct rxrpc_connection *conn; @@ -204,7 +202,7 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *cal= l, struct rxrpc_txbuf *tx int ret, rtt_slot =3D -1; =20 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) - return -ECONNRESET; + return; =20 conn =3D call->conn; =20 @@ -212,10 +210,8 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *ca= ll, struct rxrpc_txbuf *tx msg.msg_namelen =3D call->peer->srx.transport_len; msg.msg_control =3D NULL; msg.msg_controllen =3D 0; - msg.msg_flags =3D 0; + msg.msg_flags =3D MSG_SPLICE_PAGES; =20 - if (ack->reason =3D=3D RXRPC_ACK_PING) - txb->flags |=3D RXRPC_REQUEST_ACK; whdr->flags =3D txb->flags & RXRPC_TXBUF_WIRE_FLAGS; =20 txb->serial =3D rxrpc_get_next_serial(conn); @@ -250,8 +246,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *cal= l, struct rxrpc_txbuf *tx rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); rxrpc_set_keepalive(call); } - - return ret; } =20 /* @@ -260,7 +254,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *cal= l, struct rxrpc_txbuf *tx void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) { - struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; =20 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) @@ -268,16 +261,19 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_r= eason, =20 rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); =20 - txb =3D rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK, - rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS); + txb =3D rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window); if (!txb) { kleave(" =3D -ENOMEM"); return; } =20 + txb->ack_why =3D why; + rxrpc_fill_out_ack(call, txb, ack_reason, serial); + call->ackr_nr_unacked =3D 0; + atomic_set(&call->ackr_nr_consumed, 0); + clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); =20 - txb->ack_why =3D why; trace_rxrpc_send_ack(call, why, ack_reason, serial); rxrpc_send_ack_packet(call, txb); rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); @@ -466,7 +462,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *ca= ll, struct rxrpc_txbuf *t msg.msg_namelen =3D call->peer->srx.transport_len; msg.msg_control =3D NULL; msg.msg_controllen =3D 0; - msg.msg_flags =3D 0; + msg.msg_flags =3D MSG_SPLICE_PAGES; =20 /* Track what we've attempted to transmit at least once so that the * retransmission algorithm doesn't try to resend what we haven't sent diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ef0849c8329c..e540501a20ad 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -145,16 +145,17 @@ static int rxkad_init_connection_security(struct rxrp= c_connection *conn, /* * Work out how much data we can put in a packet. */ -static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain, - size_t *_buf_size, size_t *_data_size, size_t *_offset) +static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size= _t remain, gfp_t gfp) { - size_t shdr, buf_size, chunk; + struct rxrpc_txbuf *txb; + size_t shdr, space; + + remain =3D min(remain, 65535 - sizeof(struct rxrpc_wire_header)); =20 switch (call->conn->security_level) { default: - buf_size =3D chunk =3D min_t(size_t, remain, RXRPC_JUMBO_DATALEN); - shdr =3D 0; - goto out; + space =3D min_t(size_t, remain, RXRPC_JUMBO_DATALEN); + return rxrpc_alloc_data_txbuf(call, space, 0, GFP_KERNEL); case RXRPC_SECURITY_AUTH: shdr =3D sizeof(struct rxkad_level1_hdr); break; @@ -163,17 +164,15 @@ static int rxkad_how_much_data(struct rxrpc_call *cal= l, size_t remain, break; } =20 - buf_size =3D round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN); - - chunk =3D buf_size - shdr; - if (remain < chunk) - buf_size =3D round_up(shdr + remain, RXKAD_ALIGN); + space =3D min_t(size_t, round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), rem= ain + shdr); + space =3D round_up(space, RXKAD_ALIGN); =20 -out: - *_buf_size =3D buf_size; - *_data_size =3D chunk; - *_offset =3D shdr; - return 0; + txb =3D rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, GFP_KERNEL); + if (txb) { + txb->offset +=3D shdr; + txb->space -=3D shdr; + } + return txb; } =20 /* @@ -251,7 +250,8 @@ static int rxkad_secure_packet_auth(const struct rxrpc_= call *call, struct rxrpc_txbuf *txb, struct skcipher_request *req) { - struct rxkad_level1_hdr *hdr =3D (void *)txb->data; + struct rxrpc_wire_header *whdr =3D txb->kvec[0].iov_base; + struct rxkad_level1_hdr *hdr =3D (void *)(whdr + 1); struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -267,14 +267,14 @@ static int rxkad_secure_packet_auth(const struct rxrp= c_call *call, pad =3D RXKAD_ALIGN - pad; pad &=3D RXKAD_ALIGN - 1; if (pad) { - memset(txb->data + txb->offset, 0, pad); + memset(txb->kvec[0].iov_base + txb->offset, 0, pad); txb->len +=3D pad; } =20 /* start the encryption afresh */ memset(&iv, 0, sizeof(iv)); =20 - sg_init_one(&sg, txb->data, 8); + sg_init_one(&sg, hdr, 8); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); @@ -293,7 +293,8 @@ static int rxkad_secure_packet_encrypt(const struct rxr= pc_call *call, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxkad_level2_hdr *rxkhdr =3D (void *)txb->data; + struct rxrpc_wire_header *whdr =3D txb->kvec[0].iov_base; + struct rxkad_level2_hdr *rxkhdr =3D (void *)(whdr + 1); struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -312,7 +313,7 @@ static int rxkad_secure_packet_encrypt(const struct rxr= pc_call *call, pad =3D RXKAD_ALIGN - pad; pad &=3D RXKAD_ALIGN - 1; if (pad) { - memset(txb->data + txb->offset, 0, pad); + memset(txb->kvec[0].iov_base + txb->offset, 0, pad); txb->len +=3D pad; } =20 @@ -320,7 +321,7 @@ static int rxkad_secure_packet_encrypt(const struct rxr= pc_call *call, token =3D call->conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); =20 - sg_init_one(&sg, txb->data, txb->len); + sg_init_one(&sg, rxkhdr, txb->len); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x); @@ -1255,7 +1256,7 @@ const struct rxrpc_security rxkad =3D { .free_preparse_server_key =3D rxkad_free_preparse_server_key, .destroy_server_key =3D rxkad_destroy_server_key, .init_connection_security =3D rxkad_init_connection_security, - .how_much_data =3D rxkad_how_much_data, + .alloc_txbuf =3D rxkad_alloc_txbuf, .secure_packet =3D rxkad_secure_packet, .verify_packet =3D rxkad_verify_packet, .free_call_crypto =3D rxkad_free_call_crypto, diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1e81046ea8a6..4d152f06b039 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -336,7 +336,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, =20 do { if (!txb) { - size_t remain, bufsize, chunk, offset; + size_t remain; =20 _debug("alloc"); =20 @@ -348,23 +348,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, * region (enc blocksize), but the trailer is not. */ remain =3D more ? INT_MAX : msg_data_left(msg); - ret =3D call->conn->security->how_much_data(call, remain, - &bufsize, &chunk, &offset); - if (ret < 0) - goto maybe_error; - - _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset); - - /* create a buffer that we can retain until it's ACK'd */ - ret =3D -ENOMEM; - txb =3D rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA, - GFP_KERNEL); - if (!txb) + txb =3D call->conn->security->alloc_txbuf(call, remain, sk->sk_allocati= on); + if (IS_ERR(txb)) { + ret =3D PTR_ERR(txb); goto maybe_error; - - txb->offset =3D offset + sizeof(struct rxrpc_wire_header); - txb->space -=3D offset; - txb->space =3D min_t(size_t, chunk, txb->space); + } } =20 _debug("append"); diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 2e8c5b15a84f..b2a82ab756c2 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -14,53 +14,146 @@ static atomic_t rxrpc_txbuf_debug_ids; atomic_t rxrpc_nr_txbuf; =20 /* - * Allocate and partially initialise an I/O request structure. + * Allocate and partially initialise a data transmission buffer. */ -struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_t= ype, - gfp_t gfp) +struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t= data_size, + size_t data_align, gfp_t gfp) { struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; + size_t total, hoff =3D 0; + void *buf; =20 txb =3D kmalloc(sizeof(*txb), gfp); - if (txb) { - whdr =3D &txb->_wire; - - INIT_LIST_HEAD(&txb->call_link); - INIT_LIST_HEAD(&txb->tx_link); - refcount_set(&txb->ref, 1); - txb->call_debug_id =3D call->debug_id; - txb->debug_id =3D atomic_inc_return(&rxrpc_txbuf_debug_ids); - txb->space =3D sizeof(txb->data); - txb->len =3D 0; - txb->offset =3D 0; - txb->flags =3D call->conn->out_clientflag; - txb->ack_why =3D 0; - txb->seq =3D call->tx_prepared + 1; - txb->serial =3D 0; - txb->cksum =3D 0; - txb->nr_kvec =3D 1; - txb->kvec[0].iov_base =3D whdr; - txb->kvec[0].iov_len =3D sizeof(*whdr); - whdr->epoch =3D htonl(call->conn->proto.epoch); - whdr->cid =3D htonl(call->cid); - whdr->callNumber =3D htonl(call->call_id); - whdr->seq =3D htonl(txb->seq); - whdr->type =3D packet_type; - whdr->flags =3D 0; - whdr->userStatus =3D 0; - whdr->securityIndex =3D call->security_ix; - whdr->_rsvd =3D 0; - whdr->serviceId =3D htons(call->dest_srx.srx_service); - - trace_rxrpc_txbuf(txb->debug_id, - txb->call_debug_id, txb->seq, 1, - packet_type =3D=3D RXRPC_PACKET_TYPE_DATA ? - rxrpc_txbuf_alloc_data : - rxrpc_txbuf_alloc_ack); - atomic_inc(&rxrpc_nr_txbuf); + if (!txb) + return NULL; + + if (data_align) + hoff =3D round_up(sizeof(*whdr), data_align) - sizeof(*whdr); + total =3D hoff + sizeof(*whdr) + data_size; + + mutex_lock(&call->conn->tx_data_alloc_lock); + buf =3D page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, + ~(data_align - 1) & ~(L1_CACHE_BYTES - 1)); + mutex_unlock(&call->conn->tx_data_alloc_lock); + if (!buf) { + kfree(txb); + return NULL; + } + + whdr =3D buf + hoff; + + INIT_LIST_HEAD(&txb->call_link); + INIT_LIST_HEAD(&txb->tx_link); + refcount_set(&txb->ref, 1); + txb->last_sent =3D KTIME_MIN; + txb->call_debug_id =3D call->debug_id; + txb->debug_id =3D atomic_inc_return(&rxrpc_txbuf_debug_ids); + txb->space =3D data_size; + txb->len =3D 0; + txb->offset =3D sizeof(*whdr); + txb->flags =3D call->conn->out_clientflag; + txb->ack_why =3D 0; + txb->seq =3D call->tx_prepared + 1; + txb->serial =3D 0; + txb->cksum =3D 0; + txb->nr_kvec =3D 1; + txb->kvec[0].iov_base =3D whdr; + txb->kvec[0].iov_len =3D sizeof(*whdr); + + whdr->epoch =3D htonl(call->conn->proto.epoch); + whdr->cid =3D htonl(call->cid); + whdr->callNumber =3D htonl(call->call_id); + whdr->seq =3D htonl(txb->seq); + whdr->type =3D RXRPC_PACKET_TYPE_DATA; + whdr->flags =3D 0; + whdr->userStatus =3D 0; + whdr->securityIndex =3D call->security_ix; + whdr->_rsvd =3D 0; + whdr->serviceId =3D htons(call->dest_srx.srx_service); + + trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, + rxrpc_txbuf_alloc_data); + + atomic_inc(&rxrpc_nr_txbuf); + return txb; +} + +/* + * Allocate and partially initialise an ACK packet. + */ +struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t = sack_size) +{ + struct rxrpc_wire_header *whdr; + struct rxrpc_acktrailer *trailer; + struct rxrpc_ackpacket *ack; + struct rxrpc_txbuf *txb; + gfp_t gfp =3D rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; + void *buf, *buf2 =3D NULL; + u8 *filler; + + txb =3D kmalloc(sizeof(*txb), gfp); + if (!txb) + return NULL; + + buf =3D page_frag_alloc(&call->local->tx_alloc, + sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); + if (!buf) { + kfree(txb); + return NULL; + } + + if (sack_size) { + buf2 =3D page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); + if (!buf2) { + page_frag_free(buf); + kfree(txb); + return NULL; + } } =20 + whdr =3D buf; + ack =3D buf + sizeof(*whdr); + filler =3D buf + sizeof(*whdr) + sizeof(*ack) + 1; + trailer =3D buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; + + INIT_LIST_HEAD(&txb->call_link); + INIT_LIST_HEAD(&txb->tx_link); + refcount_set(&txb->ref, 1); + txb->call_debug_id =3D call->debug_id; + txb->debug_id =3D atomic_inc_return(&rxrpc_txbuf_debug_ids); + txb->space =3D 0; + txb->len =3D sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer); + txb->offset =3D 0; + txb->flags =3D call->conn->out_clientflag; + txb->ack_rwind =3D 0; + txb->seq =3D 0; + txb->serial =3D 0; + txb->cksum =3D 0; + txb->nr_kvec =3D 3; + txb->kvec[0].iov_base =3D whdr; + txb->kvec[0].iov_len =3D sizeof(*whdr) + sizeof(*ack); + txb->kvec[1].iov_base =3D buf2; + txb->kvec[1].iov_len =3D sack_size; + txb->kvec[2].iov_base =3D filler; + txb->kvec[2].iov_len =3D 3 + sizeof(*trailer); + + whdr->epoch =3D htonl(call->conn->proto.epoch); + whdr->cid =3D htonl(call->cid); + whdr->callNumber =3D htonl(call->call_id); + whdr->seq =3D 0; + whdr->type =3D RXRPC_PACKET_TYPE_ACK; + whdr->flags =3D 0; + whdr->userStatus =3D 0; + whdr->securityIndex =3D call->security_ix; + whdr->_rsvd =3D 0; + whdr->serviceId =3D htons(call->dest_srx.srx_service); + + get_page(virt_to_head_page(trailer)); + + trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, + rxrpc_txbuf_alloc_ack); + atomic_inc(&rxrpc_nr_txbuf); return txb; } =20 @@ -79,12 +172,15 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxr= pc_txbuf_trace what) trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what); } =20 -static void rxrpc_free_txbuf(struct rcu_head *rcu) +static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) { - struct rxrpc_txbuf *txb =3D container_of(rcu, struct rxrpc_txbuf, rcu); + int i; =20 trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); + for (i =3D 0; i < txb->nr_kvec; i++) + if (txb->kvec[i].iov_base) + page_frag_free(txb->kvec[i].iov_base); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); } @@ -103,7 +199,7 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrp= c_txbuf_trace what) dead =3D __refcount_dec_and_test(&txb->ref, &r); trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what); if (dead) - call_rcu(&txb->rcu, rxrpc_free_txbuf); + rxrpc_free_txbuf(txb); } }