[PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason

Menglong Dong posted 10 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
Posted by Menglong Dong 1 month, 1 week ago
In this commit, we make __fib_validate_source return -reason instead of
errno on error.

The return value of __fib_validate_source can be -errno, 0, and 1.
It's hard to make __fib_validate_source() return drop reasons directly.

The __fib_validate_source() will return 1 if the scope of the
source(revert) route is HOST. And the __mkroute_input() will mark the skb
with IPSKB_DOREDIRECT in this case (combine with some other conditions).
And then, a REDIRECT ICMP will be sent in ip_forward() if this flag
exists. We can't pass this information to __mkroute_input if we make
__fib_validate_source() return drop reasons.

However, we can make fib_validate_source() return drop reasons, and call
__fib_validate_source() directly in __mkroute_input().

In the origin logic, LINUX_MIB_IPRPFILTER will be counted if
__fib_validate_source() return -EXDEV. And now, we need to adjust it by
checking "reason == SKB_DROP_REASON_IP_RPFILTER". However, this will take
effect only after the patch "net: ip: make ip_route_input_noref() return
drop reasons", as we can't pass the drop reasons from
fib_validate_source() to ip_rcv_finish_core() in this patch.

Following new drop reasons are added in this patch:

  SKB_DROP_REASON_IP_LOCAL_SOURCE
  SKB_DROP_REASON_IP_INVALID_SOURCE

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v2:
- make fib_validate_source() return drop reasons, instead of -reason.
---
 include/net/dropreason-core.h | 10 ++++++++++
 include/net/ip_fib.h          |  9 ++++++---
 net/ipv4/fib_frontend.c       | 19 ++++++++++++------
 net/ipv4/ip_input.c           |  4 +---
 net/ipv4/route.c              | 37 ++++++++++++++++++++---------------
 5 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index d59bb96c5a02..62a60be1db84 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -76,6 +76,8 @@
 	FN(INVALID_PROTO)		\
 	FN(IP_INADDRERRORS)		\
 	FN(IP_INNOROUTES)		\
+	FN(IP_LOCAL_SOURCE)		\
+	FN(IP_INVALID_SOURCE)		\
 	FN(PKT_TOO_BIG)			\
 	FN(DUP_FRAG)			\
 	FN(FRAG_REASM_TIMEOUT)		\
@@ -373,6 +375,14 @@ enum skb_drop_reason {
 	 * IPSTATS_MIB_INADDRERRORS
 	 */
 	SKB_DROP_REASON_IP_INNOROUTES,
+	/** @SKB_DROP_REASON_IP_LOCAL_SOURCE: the source ip is local */
+	SKB_DROP_REASON_IP_LOCAL_SOURCE,
+	/**
+	 * @SKB_DROP_REASON_IP_INVALID_SOURCE: the source ip is invalid:
+	 * 1) source ip is multicast or limited broadcast
+	 * 2) source ip is zero and not IGMP
+	 */
+	SKB_DROP_REASON_IP_INVALID_SOURCE,
 	/**
 	 * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
 	 * MTU)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 90ff815f212b..b3f7a1562140 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 			  dscp_t dscp, int oif, struct net_device *dev,
 			  struct in_device *idev, u32 *itag);
 
-static inline int
+static inline enum skb_drop_reason
 fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 		    dscp_t dscp, int oif, struct net_device *dev,
 		    struct in_device *idev, u32 *itag)
 {
-	return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
-				     itag);
+	int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
+					itag);
+	if (err < 0)
+		return -err;
+	return SKB_NOT_DROPPED_YET;
 }
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f74138f4d748..71fa9cee9149 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -347,6 +347,7 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 {
 	int rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
 	struct net *net = dev_net(dev);
+	enum skb_drop_reason reason;
 	struct flow_keys flkeys;
 	int ret, no_addr;
 	struct fib_result res;
@@ -369,7 +370,7 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 		 * and the same host but different containers are not.
 		 */
 		if (inet_lookup_ifaddr_rcu(net, src))
-			return -EINVAL;
+			return -SKB_DROP_REASON_IP_LOCAL_SOURCE;
 
 		goto last_resort;
 	}
@@ -400,9 +401,15 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 
 	if (fib_lookup(net, &fl4, &res, 0))
 		goto last_resort;
-	if (res.type != RTN_UNICAST &&
-	    (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
-		goto e_inval;
+	if (res.type != RTN_UNICAST) {
+		if (res.type != RTN_LOCAL) {
+			reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
+			goto e_inval;
+		} else if (!IN_DEV_ACCEPT_LOCAL(idev)) {
+			reason = SKB_DROP_REASON_IP_LOCAL_SOURCE;
+			goto e_inval;
+		}
+	}
 	fib_combine_itag(itag, &res);
 
 	dev_match = fib_info_nh_uses_dev(res.fi, dev);
@@ -435,9 +442,9 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	return 0;
 
 e_inval:
-	return -EINVAL;
+	return -reason;
 e_rpf:
-	return -EXDEV;
+	return -SKB_DROP_REASON_IP_RPFILTER;
 }
 
 static inline __be32 sk_extract_addr(struct sockaddr *addr)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 89bb63da6852..c40a26972884 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -425,10 +425,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 	return NET_RX_DROP;
 
 drop_error:
-	if (err == -EXDEV) {
-		drop_reason = SKB_DROP_REASON_IP_RPFILTER;
+	if (drop_reason == SKB_DROP_REASON_IP_RPFILTER)
 		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
-	}
 	goto drop;
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a0b091a7df87..df5401efbf56 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1669,7 +1669,7 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			  dscp_t dscp, struct net_device *dev,
 			  struct in_device *in_dev, u32 *itag)
 {
-	int err;
+	enum skb_drop_reason reason;
 
 	/* Primary sanity checks. */
 	if (!in_dev)
@@ -1687,10 +1687,10 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		    ip_hdr(skb)->protocol != IPPROTO_IGMP)
 			return -EINVAL;
 	} else {
-		err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev,
-					  itag);
-		if (err < 0)
-			return err;
+		reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
+					     in_dev, itag);
+		if (reason)
+			return -EINVAL;
 	}
 	return 0;
 }
@@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
 		return -EINVAL;
 	}
 
-	err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
-				  in_dev->dev, in_dev, &itag);
+	err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
+				    in_dev->dev, in_dev, &itag);
 	if (err < 0) {
+		err = -EINVAL;
 		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
 					 saddr);
 
@@ -2140,6 +2141,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct rtable *rt = skb_rtable(hint);
 	struct net *net = dev_net(dev);
+	enum skb_drop_reason reason;
 	int err = -EINVAL;
 	u32 tag = 0;
 
@@ -2158,9 +2160,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (rt->rt_type != RTN_LOCAL)
 		goto skip_validate_source;
 
-	err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev,
-				  &tag);
-	if (err < 0)
+	reason = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev,
+				     &tag);
+	if (reason)
 		goto martian_source;
 
 skip_validate_source:
@@ -2202,6 +2204,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			       dscp_t dscp, struct net_device *dev,
 			       struct fib_result *res)
 {
+	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct flow_keys *flkeys = NULL, _flkeys;
 	struct net    *net = dev_net(dev);
@@ -2296,10 +2299,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto brd_input;
 	}
 
+	err = -EINVAL;
 	if (res->type == RTN_LOCAL) {
-		err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev,
-					  in_dev, &itag);
-		if (err < 0)
+		reason = fib_validate_source(skb, saddr, daddr, dscp, 0, dev,
+					     in_dev, &itag);
+		if (reason)
 			goto martian_source;
 		goto local_input;
 	}
@@ -2320,9 +2324,10 @@ out:	return err;
 		goto e_inval;
 
 	if (!ipv4_is_zeronet(saddr)) {
-		err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev,
-					  &itag);
-		if (err < 0)
+		err = -EINVAL;
+		reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
+					     in_dev, &itag);
+		if (reason)
 			goto martian_source;
 	}
 	flags |= RTCF_BROADCAST;
-- 
2.39.5
Re: [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
Posted by Paolo Abeni 1 month ago
On 10/15/24 16:07, Menglong Dong wrote:
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 90ff815f212b..b3f7a1562140 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
>  			  dscp_t dscp, int oif, struct net_device *dev,
>  			  struct in_device *idev, u32 *itag);
>  
> -static inline int
> +static inline enum skb_drop_reason
>  fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
>  		    dscp_t dscp, int oif, struct net_device *dev,
>  		    struct in_device *idev, u32 *itag)
>  {
> -	return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> -				     itag);
> +	int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> +					itag);
> +	if (err < 0)
> +		return -err;
> +	return SKB_NOT_DROPPED_YET;
>  }

It looks like the code churn in patch 1 is not needed??? You could just
define here a fib_validate_source_reason() helper doing the above, and
replace fib_validate_source with the the new helper as needed. Would
that work?

> @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
>  		return -EINVAL;
>  	}
>  
> -	err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> -				  in_dev->dev, in_dev, &itag);
> +	err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> +				    in_dev->dev, in_dev, &itag);
>  	if (err < 0) {
> +		err = -EINVAL;
>  		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
>  					 saddr);

I'm sorry for not noticing this issue before, but must preserve (at
least) the -EXDEV error code from the unpatched version or RP Filter MIB
accounting in ip_rcv_finish_core() will be fooled.

Thanks,

Paolo
Re: [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
Posted by Menglong Dong 1 month ago
On Mon, Oct 21, 2024 at 6:20 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On 10/15/24 16:07, Menglong Dong wrote:
> > diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> > index 90ff815f212b..b3f7a1562140 100644
> > --- a/include/net/ip_fib.h
> > +++ b/include/net/ip_fib.h
> > @@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
> >                         dscp_t dscp, int oif, struct net_device *dev,
> >                         struct in_device *idev, u32 *itag);
> >
> > -static inline int
> > +static inline enum skb_drop_reason
> >  fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
> >                   dscp_t dscp, int oif, struct net_device *dev,
> >                   struct in_device *idev, u32 *itag)
> >  {
> > -     return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> > -                                  itag);
> > +     int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> > +                                     itag);
> > +     if (err < 0)
> > +             return -err;
> > +     return SKB_NOT_DROPPED_YET;
> >  }
>
> It looks like the code churn in patch 1 is not needed??? You could just
> define here a fib_validate_source_reason() helper doing the above, and
> replace fib_validate_source with the the new helper as needed. Would
> that work?
>

Of course, that works fine. I'm just trying to find a graceful way
for this part. Defining a fib_validate_source_reason() here looks
nice too, and we can ignore the 1st patch. I'll do it this way in
the next version.

Thanks!
Menglong Dong

> > @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
> >               return -EINVAL;
> >       }
> >
> > -     err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> > -                               in_dev->dev, in_dev, &itag);
> > +     err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> > +                                 in_dev->dev, in_dev, &itag);
> >       if (err < 0) {
> > +             err = -EINVAL;
> >               ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
> >                                        saddr);
>
> I'm sorry for not noticing this issue before, but must preserve (at
> least) the -EXDEV error code from the unpatched version or RP Filter MIB
> accounting in ip_rcv_finish_core() will be fooled.
>
> Thanks,
>
> Paolo
>
Re: [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
Posted by Paolo Abeni 1 month ago
On 10/21/24 12:20, Paolo Abeni wrote:
> On 10/15/24 16:07, Menglong Dong wrote:
>> @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
>>  		return -EINVAL;
>>  	}
>>  
>> -	err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
>> -				  in_dev->dev, in_dev, &itag);
>> +	err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
>> +				    in_dev->dev, in_dev, &itag);
>>  	if (err < 0) {
>> +		err = -EINVAL;
>>  		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
>>  					 saddr);
> 
> I'm sorry for not noticing this issue before, but must preserve (at
> least) the -EXDEV error code from the unpatched version or RP Filter MIB
> accounting in ip_rcv_finish_core() will be fooled.

Please, ignore this comment. ENOCOFFEE here, sorry.

/P