In this commit, we make __fib_validate_source return -reason instead of
errno on error.
The return value of __fib_validate_source can be -errno, 0, and 1.
It's hard to make __fib_validate_source() return drop reasons directly.
The __fib_validate_source() will return 1 if the scope of the
source(revert) route is HOST. And the __mkroute_input() will mark the skb
with IPSKB_DOREDIRECT in this case (combine with some other conditions).
And then, a REDIRECT ICMP will be sent in ip_forward() if this flag
exists. We can't pass this information to __mkroute_input if we make
__fib_validate_source() return drop reasons.
However, we can make fib_validate_source() return drop reasons, and call
__fib_validate_source() directly in __mkroute_input().
In the origin logic, LINUX_MIB_IPRPFILTER will be counted if
__fib_validate_source() return -EXDEV. And now, we need to adjust it by
checking "reason == SKB_DROP_REASON_IP_RPFILTER". However, this will take
effect only after the patch "net: ip: make ip_route_input_noref() return
drop reasons", as we can't pass the drop reasons from
fib_validate_source() to ip_rcv_finish_core() in this patch.
Following new drop reasons are added in this patch:
SKB_DROP_REASON_IP_LOCAL_SOURCE
SKB_DROP_REASON_IP_INVALID_SOURCE
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v2:
- make fib_validate_source() return drop reasons, instead of -reason.
---
include/net/dropreason-core.h | 10 ++++++++++
include/net/ip_fib.h | 9 ++++++---
net/ipv4/fib_frontend.c | 19 ++++++++++++------
net/ipv4/ip_input.c | 4 +---
net/ipv4/route.c | 37 ++++++++++++++++++++---------------
5 files changed, 51 insertions(+), 28 deletions(-)
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index d59bb96c5a02..62a60be1db84 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -76,6 +76,8 @@
FN(INVALID_PROTO) \
FN(IP_INADDRERRORS) \
FN(IP_INNOROUTES) \
+ FN(IP_LOCAL_SOURCE) \
+ FN(IP_INVALID_SOURCE) \
FN(PKT_TOO_BIG) \
FN(DUP_FRAG) \
FN(FRAG_REASM_TIMEOUT) \
@@ -373,6 +375,14 @@ enum skb_drop_reason {
* IPSTATS_MIB_INADDRERRORS
*/
SKB_DROP_REASON_IP_INNOROUTES,
+ /** @SKB_DROP_REASON_IP_LOCAL_SOURCE: the source ip is local */
+ SKB_DROP_REASON_IP_LOCAL_SOURCE,
+ /**
+ * @SKB_DROP_REASON_IP_INVALID_SOURCE: the source ip is invalid:
+ * 1) source ip is multicast or limited broadcast
+ * 2) source ip is zero and not IGMP
+ */
+ SKB_DROP_REASON_IP_INVALID_SOURCE,
/**
* @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
* MTU)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 90ff815f212b..b3f7a1562140 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
dscp_t dscp, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag);
-static inline int
+static inline enum skb_drop_reason
fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
dscp_t dscp, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag)
{
- return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
- itag);
+ int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
+ itag);
+ if (err < 0)
+ return -err;
+ return SKB_NOT_DROPPED_YET;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f74138f4d748..71fa9cee9149 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -347,6 +347,7 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
{
int rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
struct net *net = dev_net(dev);
+ enum skb_drop_reason reason;
struct flow_keys flkeys;
int ret, no_addr;
struct fib_result res;
@@ -369,7 +370,7 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
* and the same host but different containers are not.
*/
if (inet_lookup_ifaddr_rcu(net, src))
- return -EINVAL;
+ return -SKB_DROP_REASON_IP_LOCAL_SOURCE;
goto last_resort;
}
@@ -400,9 +401,15 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
- if (res.type != RTN_UNICAST &&
- (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
- goto e_inval;
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
+ goto e_inval;
+ } else if (!IN_DEV_ACCEPT_LOCAL(idev)) {
+ reason = SKB_DROP_REASON_IP_LOCAL_SOURCE;
+ goto e_inval;
+ }
+ }
fib_combine_itag(itag, &res);
dev_match = fib_info_nh_uses_dev(res.fi, dev);
@@ -435,9 +442,9 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
return 0;
e_inval:
- return -EINVAL;
+ return -reason;
e_rpf:
- return -EXDEV;
+ return -SKB_DROP_REASON_IP_RPFILTER;
}
static inline __be32 sk_extract_addr(struct sockaddr *addr)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 89bb63da6852..c40a26972884 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -425,10 +425,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV) {
- drop_reason = SKB_DROP_REASON_IP_RPFILTER;
+ if (drop_reason == SKB_DROP_REASON_IP_RPFILTER)
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
- }
goto drop;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a0b091a7df87..df5401efbf56 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1669,7 +1669,7 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
struct in_device *in_dev, u32 *itag)
{
- int err;
+ enum skb_drop_reason reason;
/* Primary sanity checks. */
if (!in_dev)
@@ -1687,10 +1687,10 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
ip_hdr(skb)->protocol != IPPROTO_IGMP)
return -EINVAL;
} else {
- err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev,
- itag);
- if (err < 0)
- return err;
+ reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
+ in_dev, itag);
+ if (reason)
+ return -EINVAL;
}
return 0;
}
@@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
return -EINVAL;
}
- err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
- in_dev->dev, in_dev, &itag);
+ err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
+ in_dev->dev, in_dev, &itag);
if (err < 0) {
+ err = -EINVAL;
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -2140,6 +2141,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
+ enum skb_drop_reason reason;
int err = -EINVAL;
u32 tag = 0;
@@ -2158,9 +2160,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev,
- &tag);
- if (err < 0)
+ reason = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev,
+ &tag);
+ if (reason)
goto martian_source;
skip_validate_source:
@@ -2202,6 +2204,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
struct fib_result *res)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flow_keys *flkeys = NULL, _flkeys;
struct net *net = dev_net(dev);
@@ -2296,10 +2299,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
}
+ err = -EINVAL;
if (res->type == RTN_LOCAL) {
- err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev,
- in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source(skb, saddr, daddr, dscp, 0, dev,
+ in_dev, &itag);
+ if (reason)
goto martian_source;
goto local_input;
}
@@ -2320,9 +2324,10 @@ out: return err;
goto e_inval;
if (!ipv4_is_zeronet(saddr)) {
- err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev,
- &itag);
- if (err < 0)
+ err = -EINVAL;
+ reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
+ in_dev, &itag);
+ if (reason)
goto martian_source;
}
flags |= RTCF_BROADCAST;
--
2.39.5
On 10/15/24 16:07, Menglong Dong wrote: > diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h > index 90ff815f212b..b3f7a1562140 100644 > --- a/include/net/ip_fib.h > +++ b/include/net/ip_fib.h > @@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, > dscp_t dscp, int oif, struct net_device *dev, > struct in_device *idev, u32 *itag); > > -static inline int > +static inline enum skb_drop_reason > fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, > dscp_t dscp, int oif, struct net_device *dev, > struct in_device *idev, u32 *itag) > { > - return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev, > - itag); > + int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev, > + itag); > + if (err < 0) > + return -err; > + return SKB_NOT_DROPPED_YET; > } It looks like the code churn in patch 1 is not needed??? You could just define here a fib_validate_source_reason() helper doing the above, and replace fib_validate_source with the the new helper as needed. Would that work? > @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, > return -EINVAL; > } > > - err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), > - in_dev->dev, in_dev, &itag); > + err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), > + in_dev->dev, in_dev, &itag); > if (err < 0) { > + err = -EINVAL; > ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, > saddr); I'm sorry for not noticing this issue before, but must preserve (at least) the -EXDEV error code from the unpatched version or RP Filter MIB accounting in ip_rcv_finish_core() will be fooled. Thanks, Paolo
On Mon, Oct 21, 2024 at 6:20 PM Paolo Abeni <pabeni@redhat.com> wrote: > > On 10/15/24 16:07, Menglong Dong wrote: > > diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h > > index 90ff815f212b..b3f7a1562140 100644 > > --- a/include/net/ip_fib.h > > +++ b/include/net/ip_fib.h > > @@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, > > dscp_t dscp, int oif, struct net_device *dev, > > struct in_device *idev, u32 *itag); > > > > -static inline int > > +static inline enum skb_drop_reason > > fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, > > dscp_t dscp, int oif, struct net_device *dev, > > struct in_device *idev, u32 *itag) > > { > > - return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev, > > - itag); > > + int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev, > > + itag); > > + if (err < 0) > > + return -err; > > + return SKB_NOT_DROPPED_YET; > > } > > It looks like the code churn in patch 1 is not needed??? You could just > define here a fib_validate_source_reason() helper doing the above, and > replace fib_validate_source with the the new helper as needed. Would > that work? > Of course, that works fine. I'm just trying to find a graceful way for this part. Defining a fib_validate_source_reason() here looks nice too, and we can ignore the 1st patch. I'll do it this way in the next version. Thanks! Menglong Dong > > @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, > > return -EINVAL; > > } > > > > - err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), > > - in_dev->dev, in_dev, &itag); > > + err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), > > + in_dev->dev, in_dev, &itag); > > if (err < 0) { > > + err = -EINVAL; > > ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, > > saddr); > > I'm sorry for not noticing this issue before, but must preserve (at > least) the -EXDEV error code from the unpatched version or RP Filter MIB > accounting in ip_rcv_finish_core() will be fooled. > > Thanks, > > Paolo >
On 10/21/24 12:20, Paolo Abeni wrote: > On 10/15/24 16:07, Menglong Dong wrote: >> @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, >> return -EINVAL; >> } >> >> - err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), >> - in_dev->dev, in_dev, &itag); >> + err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), >> + in_dev->dev, in_dev, &itag); >> if (err < 0) { >> + err = -EINVAL; >> ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, >> saddr); > > I'm sorry for not noticing this issue before, but must preserve (at > least) the -EXDEV error code from the unpatched version or RP Filter MIB > accounting in ip_rcv_finish_core() will be fooled. Please, ignore this comment. ENOCOFFEE here, sorry. /P
© 2016 - 2024 Red Hat, Inc.