pkg/route,pkg/mesh: replace NAT with ip rules

This commit entirely replaces NAT in Kilo with a few iproute2 rules.
Previously, Kilo would source-NAT the majority of packets in order to
avoid problems with strict source checks in cloud providers causing
packets to be considered martians. This source-NAT-ing made it
difficult to correctly apply Kuberenetes NetworkPolicies based on source
IPs.

This rewrite instead relies on a handful of iproute2 rules to ensure
that packets get encapsulated in certain scenarios based on the source
network and/or source interface.

This has the benefit of avoiding extra iptables bloat as well as
enabling better compatibility with NetworkPolicies.

Signed-off-by: Lucas Servén Marín <lserven@gmail.com>
This commit is contained in:
Lucas Servén Marín
2020-02-20 21:27:50 +01:00
parent 4857d10da1
commit 134cbe90be
5 changed files with 605 additions and 151 deletions

View File

@@ -25,6 +25,8 @@ import (
"golang.org/x/sys/unix"
)
const kiloTableIndex = 1107
// Topology represents the logical structure of the overlay network.
type Topology struct {
// key is the private key of the node creating the topology.
@@ -40,8 +42,7 @@ type Topology struct {
// leader represents whether or not the local host
// is the segment leader.
leader bool
// subnet is the entire subnet from which IPs
// for the WireGuard interfaces will be allocated.
// subnet is the Pod subnet of the local node.
subnet *net.IPNet
// privateIP is the private IP address of the local node.
privateIP *net.IPNet
@@ -95,7 +96,7 @@ func NewTopology(nodes map[string]*Node, peers map[string]*Peer, granularity Gra
localLocation = hostname
}
t := Topology{key: key, port: port, hostname: hostname, location: localLocation, subnet: subnet, privateIP: nodes[hostname].InternalIP}
t := Topology{key: key, port: port, hostname: hostname, location: localLocation, subnet: nodes[hostname].Subnet, privateIP: nodes[hostname].InternalIP}
for location := range topoMap {
// Sort the location so the result is stable.
sort.Slice(topoMap[location], func(i, j int) bool {
@@ -156,7 +157,7 @@ func NewTopology(nodes map[string]*Node, peers map[string]*Peer, granularity Gra
segment.wireGuardIP = ipNet.IP
segment.allowedIPs = append(segment.allowedIPs, oneAddressCIDR(ipNet.IP))
if t.leader && segment.location == t.location {
t.wireGuardCIDR = &net.IPNet{IP: ipNet.IP, Mask: t.subnet.Mask}
t.wireGuardCIDR = &net.IPNet{IP: ipNet.IP, Mask: subnet.Mask}
}
}
@@ -164,8 +165,9 @@ func NewTopology(nodes map[string]*Node, peers map[string]*Peer, granularity Gra
}
// Routes generates a slice of routes for a given Topology.
func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc encapsulation.Encapsulator) []*netlink.Route {
func (t *Topology) Routes(kiloIfaceName string, kiloIface, privIface, tunlIface int, local bool, enc encapsulation.Encapsulator) ([]*netlink.Route, []*netlink.Rule) {
var routes []*netlink.Route
var rules []*netlink.Rule
if !t.leader {
// Find the GW for this segment.
// This will be the an IP of the leader.
@@ -201,6 +203,23 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
LinkIndex: privIface,
Protocol: unix.RTPROT_STATIC,
}, enc.Strategy(), t.privateIP, tunlIface))
// Encapsulate packets from the host's Pod subnet headed
// to private IPs.
if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
routes = append(routes, &netlink.Route{
Dst: oneAddressCIDR(segment.privateIPs[i]),
Flags: int(netlink.FLAG_ONLINK),
Gw: segment.privateIPs[i],
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
Table: kiloTableIndex,
})
rules = append(rules, defaultRule(&netlink.Rule{
Src: t.subnet,
Dst: oneAddressCIDR(segment.privateIPs[i]),
Table: kiloTableIndex,
}))
}
}
}
continue
@@ -238,7 +257,7 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
}, enc.Strategy(), t.privateIP, tunlIface))
}
}
return routes
return routes, rules
}
for _, segment := range t.segments {
// Add routes for the current segment if local is true.
@@ -256,6 +275,30 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
LinkIndex: privIface,
Protocol: unix.RTPROT_STATIC,
}, enc.Strategy(), t.privateIP, tunlIface))
// Encapsulate packets from the host's Pod subnet headed
// to private IPs.
if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
routes = append(routes, &netlink.Route{
Dst: oneAddressCIDR(segment.privateIPs[i]),
Flags: int(netlink.FLAG_ONLINK),
Gw: segment.privateIPs[i],
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
Table: kiloTableIndex,
})
rules = append(rules, defaultRule(&netlink.Rule{
Src: t.subnet,
Dst: oneAddressCIDR(segment.privateIPs[i]),
Table: kiloTableIndex,
}))
// Also encapsulate packets from the Kilo interface
// headed to private IPs.
rules = append(rules, defaultRule(&netlink.Rule{
Dst: oneAddressCIDR(segment.privateIPs[i]),
Table: kiloTableIndex,
IifName: kiloIfaceName,
}))
}
}
}
continue
@@ -298,7 +341,7 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
})
}
}
return routes
return routes, rules
}
func encapsulateRoute(route *netlink.Route, encapsulate encapsulation.Strategy, subnet *net.IPNet, tunlIface int) *netlink.Route {
@@ -448,3 +491,12 @@ func deduplicatePeerIPs(peers []*Peer) []*Peer {
}
return ps
}
func defaultRule(rule *netlink.Rule) *netlink.Rule {
base := netlink.NewRule()
base.Src = rule.Src
base.Dst = rule.Dst
base.IifName = rule.IifName
base.Table = rule.Table
return base
}