@@ -270,6 +270,18 @@ static inline int inet_is_local_reserved_port(struct net *net, int port)
}
#endif
+#ifdef CONFIG_LOWPORT_SYSCTL
+static inline int inet_prot_sock(struct net *net)
+{
+ return net->ipv4.sysctl_ip_prot_sock;
+}
+#else
+static inline int inet_prot_sock(struct net *net)
+{
+ return PROT_SOCK;
+}
+#endif
+
__be32 inet_current_timestamp(void);
/* From inetpeer.c */
@@ -96,6 +96,9 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
+#ifdef CONFIG_LOWPORT_SYSCTL
+ int sysctl_ip_prot_sock;
+#endif
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
@@ -109,6 +109,13 @@ config NETWORK_PHY_TIMESTAMPING
If you are unsure how to answer this question, answer N.
+config LOWPORT_SYSCTL
+ bool "Adjust reserved port range via sysctl"
+ depends on SYSCTL
+ help
+ This allows the administrator to adjust the reserved port range
+ using a sysctl.
+
menuconfig NETFILTER
bool "Network packet filtering framework (Netfilter)"
---help---
@@ -479,7 +479,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
snum = ntohs(addr->sin_port);
err = -EACCES;
- if (snum && snum < PROT_SOCK &&
+ if (snum && snum < inet_prot_sock(net) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
goto out;
@@ -1700,6 +1700,9 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+#ifdef CONFIG_LOWPORT_SYSCTL
+ net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
+#endif
return 0;
}
@@ -35,6 +35,10 @@ static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
+#ifdef CONFIG_LOWPORT_SYSCTL
+static int ip_protected_port_min;
+static int ip_protected_port_max = 65535;
+#endif
static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
@@ -79,7 +83,17 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
+ /* Ensure that the upper limit is not smaller than the lower,
+ * and that the lower does not encroach upon the protected
+ * port limit.
+ */
+#ifdef CONFIG_LOWPORT_SYSCTL
+ if ((range[1] < range[0]) ||
+ (range[0] < net->ipv4.sysctl_ip_prot_sock))
+#else
+
if (range[1] < range[0])
+#endif
ret = -EINVAL;
else
set_local_port_range(net, range);
@@ -88,6 +102,42 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
return ret;
}
+#ifdef CONFIG_LOWPORT_SYSCTL
+/* Validate changes from /proc interface. */
+static int ipv4_protected_ports(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_ip_prot_sock);
+ int ret;
+ int pports;
+ int range[2];
+ struct ctl_table tmp = {
+ .data = &pports,
+ .maxlen = sizeof(pports),
+ .mode = table->mode,
+ .extra1 = &ip_protected_port_min,
+ .extra2 = &ip_protected_port_max,
+ };
+
+ pports = net->ipv4.sysctl_ip_prot_sock;
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ inet_get_local_port_range(net, &range[0], &range[1]);
+ /* Ensure that the local port range doesn't overlap with the
+ * protected port range.
+ */
+ if (range[0] < pports)
+ ret = -EINVAL;
+ else
+ net->ipv4.sysctl_ip_prot_sock = pports;
+ }
+
+ return ret;
+}
+#endif
static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
{
@@ -971,6 +1021,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = &one,
},
#endif
+#ifdef CONFIG_LOWPORT_SYSCTL
+ {
+ .procname = "ip_unprotected_port_start",
+ .maxlen = sizeof(int),
+ .data = &init_net.ipv4.sysctl_ip_prot_sock,
+ .mode = 0644,
+ .proc_handler = ipv4_protected_ports,
+ },
+#endif
{ }
};
@@ -302,7 +302,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return -EINVAL;
snum = ntohs(addr->sin6_port);
- if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+ if (snum && snum < inet_prot_sock(net) &&
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
lock_sock(sk);
@@ -426,10 +426,9 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol
*/
svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
- if (svc == NULL
- && protocol == IPPROTO_TCP
- && atomic_read(&ipvs->ftpsvc_counter)
- && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
+ if (!svc && protocol == IPPROTO_TCP &&
+ atomic_read(&ipvs->ftpsvc_counter) &&
+ (vport == FTPDATA || ntohs(vport) >= inet_prot_sock(ipvs->net))) {
/*
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
@@ -360,7 +360,7 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
}
}
- if (snum && snum < PROT_SOCK &&
+ if (snum && snum < inet_prot_sock(net) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -1152,8 +1152,10 @@ static int __sctp_connect(struct sock *sk,
* accept new associations, but it SHOULD NOT
* be permitted to open new associations.
*/
- if (ep->base.bind_addr.port < PROT_SOCK &&
- !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
+ if (ep->base.bind_addr.port <
+ inet_prot_sock(net) &&
+ !ns_capable(net->user_ns,
+ CAP_NET_BIND_SERVICE)) {
err = -EACCES;
goto out_free;
}
@@ -1818,7 +1820,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
* but it SHOULD NOT be permitted to open new
* associations.
*/
- if (ep->base.bind_addr.port < PROT_SOCK &&
+ if (ep->base.bind_addr.port < inet_prot_sock(net) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
err = -EACCES;
goto out_unlock;
@@ -4365,7 +4365,8 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
inet_get_local_port_range(sock_net(sk), &low, &high);
- if (snum < max(PROT_SOCK, low) || snum > high) {
+ if (snum < max(inet_prot_sock(sock_net(sk)), low) ||
+ snum > high) {
err = sel_netport_sid(sk->sk_protocol,
snum, &sid);
if (err)
Add net.ipv4.ip_unprotected_port_start, which is a per namespace sysctl that denotes the first unprotected inet port in the namespace. To disable all protected ports set this to zero. It also checks for overlap with the local port range. The protected and local range may not overlap. The use case for this change is to allow containerized processes to bind to priviliged ports, but prevent them from ever being allowed to modify their container's network configuration. The latter is accomplished by ensuring that the network namespace is not a child of the user namespace. This modification was needed to allow the container manager to disable a namespace's priviliged port restrictions without exposing control of the network namespace to processes in the user namespace. Signed-off-by: Krister Johansen <kjlx@templeofstupid.com> --- include/net/ip.h | 12 +++++++++ include/net/netns/ipv4.h | 3 +++ net/Kconfig | 7 +++++ net/ipv4/af_inet.c | 5 +++- net/ipv4/sysctl_net_ipv4.c | 59 ++++++++++++++++++++++++++++++++++++++++++ net/ipv6/af_inet6.c | 3 ++- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++-- net/sctp/socket.c | 10 ++++--- security/selinux/hooks.c | 3 ++- 9 files changed, 98 insertions(+), 11 deletions(-)