diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h index 41b578ccd03b84..36cc956ead1ae8 100644 --- a/include/uapi/linux/netfilter/xt_connmark.h +++ b/include/uapi/linux/netfilter/xt_connmark.h @@ -1,37 +1,7 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - */ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _XT_CONNMARK_H_target +#define _XT_CONNMARK_H_target -#ifndef _XT_CONNMARK_H -#define _XT_CONNMARK_H +#include -#include - -enum { - XT_CONNMARK_SET = 0, - XT_CONNMARK_SAVE, - XT_CONNMARK_RESTORE -}; - -enum { - D_SHIFT_LEFT = 0, - D_SHIFT_RIGHT, -}; - -struct xt_connmark_tginfo1 { - __u32 ctmark, ctmask, nfmask; - __u8 mode; -}; - -struct xt_connmark_tginfo2 { - __u32 ctmark, ctmask, nfmask; - __u8 shift_dir, shift_bits, mode; -}; - -struct xt_connmark_mtinfo1 { - __u32 mark, mask; - __u8 invert; -}; - -#endif /*_XT_CONNMARK_H*/ +#endif /*_XT_CONNMARK_H_target*/ diff --git a/include/uapi/linux/netfilter/xt_dscp.h b/include/uapi/linux/netfilter/xt_dscp.h index 7594e4df8587ae..223d635e8b6f77 100644 --- a/include/uapi/linux/netfilter/xt_dscp.h +++ b/include/uapi/linux/netfilter/xt_dscp.h @@ -1,32 +1,27 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* x_tables module for matching the IPv4/IPv6 DSCP field +/* x_tables module for setting the IPv4/IPv6 DSCP field * * (C) 2002 Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh * This software is distributed under GNU GPL v2, 1991 * * See RFC2474 for a description of the DSCP field within the IP Header. * - * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp + * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp */ -#ifndef _XT_DSCP_H -#define _XT_DSCP_H - +#ifndef _XT_DSCP_TARGET_H +#define _XT_DSCP_TARGET_H +#include #include -#define XT_DSCP_MASK 0xfc /* 11111100 */ -#define XT_DSCP_SHIFT 2 -#define XT_DSCP_MAX 0x3f /* 00111111 */ - -/* match info */ -struct xt_dscp_info { +/* target info */ +struct xt_DSCP_info { __u8 dscp; - __u8 invert; }; -struct xt_tos_match_info { - __u8 tos_mask; +struct xt_tos_target_info { __u8 tos_value; - __u8 invert; + __u8 tos_mask; }; -#endif /* _XT_DSCP_H */ +#endif /* _XT_DSCP_TARGET_H */ diff --git a/include/uapi/linux/netfilter/xt_mark.h b/include/uapi/linux/netfilter/xt_mark.h index 9d0526ced8f0db..f1fe2b4be9332c 100644 --- a/include/uapi/linux/netfilter/xt_mark.h +++ b/include/uapi/linux/netfilter/xt_mark.h @@ -1,16 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _XT_MARK_H -#define _XT_MARK_H +#ifndef _XT_MARK_H_target +#define _XT_MARK_H_target -#include +#include -struct xt_mark_tginfo2 { - __u32 mark, mask; -}; - -struct xt_mark_mtinfo1 { - __u32 mark, mask; - __u8 invert; -}; - -#endif /*_XT_MARK_H*/ +#endif /*_XT_MARK_H_target */ diff --git a/include/uapi/linux/netfilter/xt_rateest.h b/include/uapi/linux/netfilter/xt_rateest.h index 52a37bdc1837a2..2b87a71e6266e4 100644 --- a/include/uapi/linux/netfilter/xt_rateest.h +++ b/include/uapi/linux/netfilter/xt_rateest.h @@ -1,39 +1,17 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _XT_RATEEST_MATCH_H -#define _XT_RATEEST_MATCH_H +#ifndef _XT_RATEEST_TARGET_H +#define _XT_RATEEST_TARGET_H #include #include -enum xt_rateest_match_flags { - XT_RATEEST_MATCH_INVERT = 1<<0, - XT_RATEEST_MATCH_ABS = 1<<1, - XT_RATEEST_MATCH_REL = 1<<2, - XT_RATEEST_MATCH_DELTA = 1<<3, - XT_RATEEST_MATCH_BPS = 1<<4, - XT_RATEEST_MATCH_PPS = 1<<5, -}; - -enum xt_rateest_match_mode { - XT_RATEEST_MATCH_NONE, - XT_RATEEST_MATCH_EQ, - XT_RATEEST_MATCH_LT, - XT_RATEEST_MATCH_GT, -}; - -struct xt_rateest_match_info { - char name1[IFNAMSIZ]; - char name2[IFNAMSIZ]; - __u16 flags; - __u16 mode; - __u32 bps1; - __u32 pps1; - __u32 bps2; - __u32 pps2; +struct xt_rateest_target_info { + char name[IFNAMSIZ]; + __s8 interval; + __u8 ewma_log; /* Used internally by the kernel */ - struct xt_rateest *est1 __attribute__((aligned(8))); - struct xt_rateest *est2 __attribute__((aligned(8))); + struct xt_rateest *est __attribute__((aligned(8))); }; -#endif /* _XT_RATEEST_MATCH_H */ +#endif /* _XT_RATEEST_TARGET_H */ diff --git a/include/uapi/linux/netfilter/xt_tcpmss.h b/include/uapi/linux/netfilter/xt_tcpmss.h index 2268f58b4dec79..65ea6c9dab4b64 100644 --- a/include/uapi/linux/netfilter/xt_tcpmss.h +++ b/include/uapi/linux/netfilter/xt_tcpmss.h @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _XT_TCPMSS_MATCH_H -#define _XT_TCPMSS_MATCH_H +#ifndef _XT_TCPMSS_H +#define _XT_TCPMSS_H #include -struct xt_tcpmss_match_info { - __u16 mss_min, mss_max; - __u8 invert; +struct xt_tcpmss_info { + __u16 mss; }; -#endif /*_XT_TCPMSS_MATCH_H*/ +#define XT_TCPMSS_CLAMP_PMTU 0xffff + +#endif /* _XT_TCPMSS_H */ diff --git a/include/uapi/linux/netfilter_ipv4/ipt_ecn.h b/include/uapi/linux/netfilter_ipv4/ipt_ecn.h index 8121bec470267a..e3630fd045b8dd 100644 --- a/include/uapi/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/uapi/linux/netfilter_ipv4/ipt_ecn.h @@ -1,16 +1,34 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IPT_ECN_H -#define _IPT_ECN_H +/* Header file for iptables ipt_ECN target + * + * (C) 2002 by Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * + * ipt_ECN.h,v 1.3 2002/05/29 12:17:40 laforge Exp +*/ +#ifndef _IPT_ECN_TARGET_H +#define _IPT_ECN_TARGET_H -#include -#define ipt_ecn_info xt_ecn_info +#include +#include -enum { - IPT_ECN_IP_MASK = XT_ECN_IP_MASK, - IPT_ECN_OP_MATCH_IP = XT_ECN_OP_MATCH_IP, - IPT_ECN_OP_MATCH_ECE = XT_ECN_OP_MATCH_ECE, - IPT_ECN_OP_MATCH_CWR = XT_ECN_OP_MATCH_CWR, - IPT_ECN_OP_MATCH_MASK = XT_ECN_OP_MATCH_MASK, +#define IPT_ECN_IP_MASK (~XT_DSCP_MASK) + +#define IPT_ECN_OP_SET_IP 0x01 /* set ECN bits of IPv4 header */ +#define IPT_ECN_OP_SET_ECE 0x10 /* set ECE bit of TCP header */ +#define IPT_ECN_OP_SET_CWR 0x20 /* set CWR bit of TCP header */ + +#define IPT_ECN_OP_MASK 0xce + +struct ipt_ECN_info { + __u8 operation; /* bitset of operations */ + __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ + union { + struct { + __u8 ece:1, cwr:1; /* TCP ECT bits */ + } tcp; + } proto; }; -#endif /* IPT_ECN_H */ +#endif /* _IPT_ECN_TARGET_H */ diff --git a/include/uapi/linux/netfilter_ipv4/ipt_ttl.h b/include/uapi/linux/netfilter_ipv4/ipt_ttl.h index ad0226a8629b21..57d2fc67a94371 100644 --- a/include/uapi/linux/netfilter_ipv4/ipt_ttl.h +++ b/include/uapi/linux/netfilter_ipv4/ipt_ttl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* IP tables module for matching the value of the TTL - * (C) 2000 by Harald Welte */ +/* TTL modification module for IP tables + * (C) 2000 by Harald Welte */ #ifndef _IPT_TTL_H #define _IPT_TTL_H @@ -8,14 +8,14 @@ #include enum { - IPT_TTL_EQ = 0, /* equals */ - IPT_TTL_NE, /* not equals */ - IPT_TTL_LT, /* less than */ - IPT_TTL_GT, /* greater than */ + IPT_TTL_SET = 0, + IPT_TTL_INC, + IPT_TTL_DEC }; +#define IPT_TTL_MAXMODE IPT_TTL_DEC -struct ipt_ttl_info { +struct ipt_TTL_info { __u8 mode; __u8 ttl; }; diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_hl.h b/include/uapi/linux/netfilter_ipv6/ip6t_hl.h index 6b62f9418eb213..eaed56a287b47c 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_hl.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_hl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* ip6tables module for matching the Hop Limit value +/* Hop Limit modification module for ip6tables * Maciej Soltysiak - * Based on HW's ttl module */ + * Based on HW's TTL module */ #ifndef _IP6T_HL_H #define _IP6T_HL_H @@ -9,14 +9,14 @@ #include enum { - IP6T_HL_EQ = 0, /* equals */ - IP6T_HL_NE, /* not equals */ - IP6T_HL_LT, /* less than */ - IP6T_HL_GT, /* greater than */ + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC }; +#define IP6T_HL_MAXMODE IP6T_HL_DEC -struct ip6t_hl_info { +struct ip6t_HL_info { __u8 mode; __u8 hop_limit; }; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index bc6bc8bb871d41..fb8123cfa5ecdf 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1056,7 +1056,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, for_each_possible_cpu(cpu) { if (cpu == current_cpu) - copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); + copy_map_value(&htab->map, per_cpu_ptr(pptr, cpu), value); else /* Since elem is preallocated, we cannot touch special fields */ zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); } diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index fb0169a8f9bbd3..cfa44515ab72d7 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only -/* IP tables module for matching the value of the IPv4/IPv6 DSCP field +/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8 * * (C) 2002 by Harald Welte - */ + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh + * + * See RFC2474 for a description of the DSCP field within the IP Header. +*/ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -11,100 +14,148 @@ #include #include -#include +#include MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: DSCP/TOS field match"); +MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_dscp"); -MODULE_ALIAS("ip6t_dscp"); -MODULE_ALIAS("ipt_tos"); -MODULE_ALIAS("ip6t_tos"); +MODULE_ALIAS("ipt_DSCP"); +MODULE_ALIAS("ip6t_DSCP"); +MODULE_ALIAS("ipt_TOS"); +MODULE_ALIAS("ip6t_TOS"); + +#define XT_DSCP_ECN_MASK 3u -static bool -dscp_mt(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_dscp_info *info = par->matchinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; - return (dscp == info->dscp) ^ !!info->invert; + if (dscp != dinfo->dscp) { + if (skb_ensure_writable(skb, sizeof(struct iphdr))) + return NF_DROP; + + ipv4_change_dsfield(ip_hdr(skb), XT_DSCP_ECN_MASK, + dinfo->dscp << XT_DSCP_SHIFT); + + } + return XT_CONTINUE; } -static bool -dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_dscp_info *info = par->matchinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; - return (dscp == info->dscp) ^ !!info->invert; + if (dscp != dinfo->dscp) { + if (skb_ensure_writable(skb, sizeof(struct ipv6hdr))) + return NF_DROP; + + ipv6_change_dsfield(ipv6_hdr(skb), XT_DSCP_ECN_MASK, + dinfo->dscp << XT_DSCP_SHIFT); + } + return XT_CONTINUE; } -static int dscp_mt_check(const struct xt_mtchk_param *par) +static int dscp_tg_check(const struct xt_tgchk_param *par) { - const struct xt_dscp_info *info = par->matchinfo; + const struct xt_DSCP_info *info = par->targinfo; if (info->dscp > XT_DSCP_MAX) return -EDOM; - return 0; } -static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +tos_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_tos_match_info *info = par->matchinfo; - - if (xt_family(par) == NFPROTO_IPV4) - return ((ip_hdr(skb)->tos & info->tos_mask) == - info->tos_value) ^ !!info->invert; - else - return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) == - info->tos_value) ^ !!info->invert; + const struct xt_tos_target_info *info = par->targinfo; + struct iphdr *iph = ip_hdr(skb); + u_int8_t orig, nv; + + orig = ipv4_get_dsfield(iph); + nv = (orig & ~info->tos_mask) ^ info->tos_value; + + if (orig != nv) { + if (skb_ensure_writable(skb, sizeof(struct iphdr))) + return NF_DROP; + iph = ip_hdr(skb); + ipv4_change_dsfield(iph, 0, nv); + } + + return XT_CONTINUE; +} + +static unsigned int +tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_tos_target_info *info = par->targinfo; + struct ipv6hdr *iph = ipv6_hdr(skb); + u_int8_t orig, nv; + + orig = ipv6_get_dsfield(iph); + nv = (orig & ~info->tos_mask) ^ info->tos_value; + + if (orig != nv) { + if (skb_ensure_writable(skb, sizeof(struct iphdr))) + return NF_DROP; + iph = ipv6_hdr(skb); + ipv6_change_dsfield(iph, 0, nv); + } + + return XT_CONTINUE; } -static struct xt_match dscp_mt_reg[] __read_mostly = { +static struct xt_target dscp_tg_reg[] __read_mostly = { { - .name = "dscp", + .name = "DSCP", .family = NFPROTO_IPV4, - .checkentry = dscp_mt_check, - .match = dscp_mt, - .matchsize = sizeof(struct xt_dscp_info), + .checkentry = dscp_tg_check, + .target = dscp_tg, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", .me = THIS_MODULE, }, { - .name = "dscp", + .name = "DSCP", .family = NFPROTO_IPV6, - .checkentry = dscp_mt_check, - .match = dscp_mt6, - .matchsize = sizeof(struct xt_dscp_info), + .checkentry = dscp_tg_check, + .target = dscp_tg6, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", .me = THIS_MODULE, }, { - .name = "tos", + .name = "TOS", .revision = 1, .family = NFPROTO_IPV4, - .match = tos_mt, - .matchsize = sizeof(struct xt_tos_match_info), + .table = "mangle", + .target = tos_tg, + .targetsize = sizeof(struct xt_tos_target_info), .me = THIS_MODULE, }, { - .name = "tos", + .name = "TOS", .revision = 1, .family = NFPROTO_IPV6, - .match = tos_mt, - .matchsize = sizeof(struct xt_tos_match_info), + .table = "mangle", + .target = tos_tg6, + .targetsize = sizeof(struct xt_tos_target_info), .me = THIS_MODULE, }, }; -static int __init dscp_mt_init(void) +static int __init dscp_tg_init(void) { - return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg)); + return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg)); } -static void __exit dscp_mt_exit(void) +static void __exit dscp_tg_exit(void) { - xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg)); + xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg)); } -module_init(dscp_mt_init); -module_exit(dscp_mt_exit); +module_init(dscp_tg_init); +module_exit(dscp_tg_exit); diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 0d32d4841cb325..116a885adb3cd6 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -1,107 +1,345 @@ // SPDX-License-Identifier: GPL-2.0-only -/* Kernel module to match TCP MSS values. */ - -/* Copyright (C) 2000 Marc Boucher - * Portions (C) 2005 by Harald Welte +/* + * This is a module which is used for setting the MSS option in TCP packets. + * + * Copyright (C) 2000 Marc Boucher + * Copyright (C) 2007 Patrick McHardy */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include - #include #include +#include +#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); -MODULE_DESCRIPTION("Xtables: TCP MSS match"); -MODULE_ALIAS("ipt_tcpmss"); -MODULE_ALIAS("ip6t_tcpmss"); +MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment"); +MODULE_ALIAS("ipt_TCPMSS"); +MODULE_ALIAS("ip6t_TCPMSS"); -static bool -tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par) +static inline unsigned int +optlen(const u_int8_t *opt, unsigned int offset) { - const struct xt_tcpmss_match_info *info = par->matchinfo; - const struct tcphdr *th; - struct tcphdr _tcph; - /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - const u_int8_t *op; - u8 _opt[15 * 4 - sizeof(_tcph)]; - unsigned int i, optlen; - - /* If we don't have the whole header, drop packet. */ - th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); - if (th == NULL) - goto dropit; - - /* Malformed. */ - if (th->doff*4 < sizeof(*th)) - goto dropit; - - optlen = th->doff*4 - sizeof(*th); - if (!optlen) - goto out; - - /* Truncated options. */ - op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt); - if (op == NULL) - goto dropit; - - for (i = 0; i < optlen; ) { - if (op[i] == TCPOPT_MSS - && (optlen - i) >= TCPOLEN_MSS - && op[i+1] == TCPOLEN_MSS) { - u_int16_t mssval; - - mssval = (op[i+2] << 8) | op[i+3]; - - return (mssval >= info->mss_min && - mssval <= info->mss_max) ^ info->invert; + /* Beware zero-length options: make finite progress */ + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; +} + +static u_int32_t tcpmss_reverse_mtu(struct net *net, + const struct sk_buff *skb, + unsigned int family) +{ + struct flowi fl; + struct rtable *rt = NULL; + u_int32_t mtu = ~0U; + + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + + memset(fl6, 0, sizeof(*fl6)); + fl6->daddr = ipv6_hdr(skb)->saddr; + } + + nf_route(net, (struct dst_entry **)&rt, &fl, false, family); + if (rt != NULL) { + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); + } + return mtu; +} + +static int +tcpmss_mangle_packet(struct sk_buff *skb, + const struct xt_action_param *par, + unsigned int family, + unsigned int tcphoff, + unsigned int minlen) +{ + const struct xt_tcpmss_info *info = par->targinfo; + struct tcphdr *tcph; + int len, tcp_hdrlen; + unsigned int i; + __be16 oldval; + u16 newmss; + u8 *opt; + + /* This is a fragment, no TCP header is available */ + if (par->fragoff != 0) + return 0; + + if (skb_ensure_writable(skb, skb->len)) + return -1; + + len = skb->len - tcphoff; + if (len < (int)sizeof(struct tcphdr)) + return -1; + + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + tcp_hdrlen = tcph->doff * 4; + + if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr)) + return -1; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + struct net *net = xt_net(par); + unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); + + if (min_mtu <= minlen) { + net_err_ratelimited("unknown or invalid path-MTU (%u)\n", + min_mtu); + return -1; + } + newmss = min_mtu - minlen; + } else + newmss = info->mss; + + opt = (u_int8_t *)tcph; + for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) { + u_int16_t oldmss; + + oldmss = (opt[i+2] << 8) | opt[i+3]; + + /* Never increase MSS, even when setting it, as + * doing so results in problems for hosts that rely + * on MSS being set correctly. + */ + if (oldmss <= newmss) + return 0; + + opt[i+2] = (newmss & 0xff00) >> 8; + opt[i+3] = newmss & 0x00ff; + + inet_proto_csum_replace2(&tcph->check, skb, + htons(oldmss), htons(newmss), + false); + return 0; } - if (op[i] < 2 || i == optlen - 1) - i++; - else - i += op[i+1] ? : 1; } -out: - return info->invert; -dropit: - par->hotdrop = true; + /* There is data after the header so the option can't be added + * without moving it, and doing so may make the SYN packet + * itself too large. Accept the packet unmodified instead. + */ + if (len > tcp_hdrlen) + return 0; + + /* tcph->doff has 4 bits, do not wrap it to 0 */ + if (tcp_hdrlen >= 15 * 4) + return 0; + + /* + * MSS Option not found ?! add it.. + */ + if (skb_tailroom(skb) < TCPOLEN_MSS) { + if (pskb_expand_head(skb, 0, + TCPOLEN_MSS - skb_tailroom(skb), + GFP_ATOMIC)) + return -1; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + } + + skb_put(skb, TCPOLEN_MSS); + + /* + * IPv4: RFC 1122 states "If an MSS option is not received at + * connection setup, TCP MUST assume a default send MSS of 536". + * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum + * length IPv6 header of 60, ergo the default MSS value is 1220 + * Since no MSS was provided, we must use the default values + */ + if (xt_family(par) == NFPROTO_IPV4) + newmss = min(newmss, (u16)536); + else + newmss = min(newmss, (u16)1220); + + opt = (u_int8_t *)tcph + sizeof(struct tcphdr); + memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); + + inet_proto_csum_replace2(&tcph->check, skb, + htons(len), htons(len + TCPOLEN_MSS), true); + opt[0] = TCPOPT_MSS; + opt[1] = TCPOLEN_MSS; + opt[2] = (newmss & 0xff00) >> 8; + opt[3] = newmss & 0x00ff; + + inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false); + + oldval = ((__be16 *)tcph)[6]; + tcph->doff += TCPOLEN_MSS/4; + inet_proto_csum_replace2(&tcph->check, skb, + oldval, ((__be16 *)tcph)[6], false); + return TCPOLEN_MSS; +} + +static unsigned int +tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct iphdr *iph = ip_hdr(skb); + __be16 newlen; + int ret; + + ret = tcpmss_mangle_packet(skb, par, + PF_INET, + iph->ihl * 4, + sizeof(*iph) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + iph = ip_hdr(skb); + newlen = htons(ntohs(iph->tot_len) + ret); + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + } + return XT_CONTINUE; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static unsigned int +tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 nexthdr; + __be16 frag_off, oldlen, newlen; + int tcphoff; + int ret; + + nexthdr = ipv6h->nexthdr; + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off); + if (tcphoff < 0) + return NF_DROP; + ret = tcpmss_mangle_packet(skb, par, + PF_INET6, + tcphoff, + sizeof(*ipv6h) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + ipv6h = ipv6_hdr(skb); + oldlen = ipv6h->payload_len; + newlen = htons(ntohs(oldlen) + ret); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)oldlen), + (__force __wsum)newlen); + ipv6h->payload_len = newlen; + } + return XT_CONTINUE; +} +#endif + +/* Must specify -p tcp --syn */ +static inline bool find_syn_match(const struct xt_entry_match *m) +{ + const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; + + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TCPHDR_SYN && + !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) + return true; + return false; } -static struct xt_match tcpmss_mt_reg[] __read_mostly = { +static int tcpmss_tg4_check(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + if (par->nft_compat) + return 0; + + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return 0; + pr_info_ratelimited("Only works on TCP SYN packets\n"); + return -EINVAL; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static int tcpmss_tg6_check(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + const struct ip6t_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + if (par->nft_compat) + return 0; + + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return 0; + pr_info_ratelimited("Only works on TCP SYN packets\n"); + return -EINVAL; +} +#endif + +static struct xt_target tcpmss_tg_reg[] __read_mostly = { { - .name = "tcpmss", .family = NFPROTO_IPV4, - .match = tcpmss_mt, - .matchsize = sizeof(struct xt_tcpmss_match_info), + .name = "TCPMSS", + .checkentry = tcpmss_tg4_check, + .target = tcpmss_tg4, + .targetsize = sizeof(struct xt_tcpmss_info), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { - .name = "tcpmss", .family = NFPROTO_IPV6, - .match = tcpmss_mt, - .matchsize = sizeof(struct xt_tcpmss_match_info), + .name = "TCPMSS", + .checkentry = tcpmss_tg6_check, + .target = tcpmss_tg6, + .targetsize = sizeof(struct xt_tcpmss_info), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, +#endif }; -static int __init tcpmss_mt_init(void) +static int __init tcpmss_tg_init(void) { - return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg)); + return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg)); } -static void __exit tcpmss_mt_exit(void) +static void __exit tcpmss_tg_exit(void) { - xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg)); + xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg)); } -module_init(tcpmss_mt_init); -module_exit(tcpmss_mt_exit); +module_init(tcpmss_tg_init); +module_exit(tcpmss_tg_exit); diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus index 10a2aa04cd078d..415248fb669902 100644 --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus @@ -1,11 +1,12 @@ -C Z6.0+pooncelock+pooncelock+pombonce +C Z6.0+pooncelock+poonceLock+pombonce (* - * Result: Sometimes + * Result: Never * - * This example demonstrates that a pair of accesses made by different - * processes each while holding a given lock will not necessarily be - * seen as ordered by a third process not holding that lock. + * This litmus test demonstrates how smp_mb__after_spinlock() may be + * used to ensure that accesses in different critical sections for a + * given lock running on different CPUs are nevertheless seen in order + * by CPUs not holding that lock. *) {} @@ -23,6 +24,7 @@ P1(int *y, int *z, spinlock_t *mylock) int r0; spin_lock(mylock); + smp_mb__after_spinlock(); r0 = READ_ONCE(*y); WRITE_ONCE(*z, 1); spin_unlock(mylock);