pkg/route,pkg/mesh: replace NAT with ip rules

This commit entirely replaces NAT in Kilo with a few iproute2 rules.
Previously, Kilo would source-NAT the majority of packets in order to
avoid problems with strict source checks in cloud providers causing
packets to be considered martians. This source-NAT-ing made it
difficult to correctly apply Kuberenetes NetworkPolicies based on source
IPs.

This rewrite instead relies on a handful of iproute2 rules to ensure
that packets get encapsulated in certain scenarios based on the source
network and/or source interface.

This has the benefit of avoiding extra iptables bloat as well as
enabling better compatibility with NetworkPolicies.

Signed-off-by: Lucas Servén Marín <lserven@gmail.com>
This commit is contained in:
Lucas Servén Marín
2020-02-20 21:27:50 +01:00
parent 4857d10da1
commit 134cbe90be
5 changed files with 605 additions and 151 deletions

View File

@@ -604,14 +604,7 @@ func (m *Mesh) applyTopology() {
m.errorCounter.WithLabelValues("apply").Inc()
return
}
rules := iptables.ForwardRules(m.subnet)
// Finx the Kilo interface name.
link, err := linkByIndex(m.kiloIface)
if err != nil {
level.Error(m.logger).Log("error", err)
m.errorCounter.WithLabelValues("apply").Inc()
return
}
ipRules := iptables.ForwardRules(m.subnet)
// If we are handling local routes, ensure the local
// tunnel has an IP address and IPIP traffic is allowed.
if m.enc.Strategy() != encapsulation.Never && m.local {
@@ -624,7 +617,7 @@ func (m *Mesh) applyTopology() {
break
}
}
rules = append(rules, m.enc.Rules(cidrs)...)
ipRules = append(ipRules, m.enc.Rules(cidrs)...)
// If we are handling local routes, ensure the local
// tunnel has an IP address.
@@ -634,7 +627,14 @@ func (m *Mesh) applyTopology() {
return
}
}
if err := m.ipTables.Set(rules); err != nil {
if err := m.ipTables.Set(ipRules); err != nil {
level.Error(m.logger).Log("error", err)
m.errorCounter.WithLabelValues("apply").Inc()
return
}
// Find the Kilo interface name.
link, err := linkByIndex(m.kiloIface)
if err != nil {
level.Error(m.logger).Log("error", err)
m.errorCounter.WithLabelValues("apply").Inc()
return
@@ -677,8 +677,8 @@ func (m *Mesh) applyTopology() {
}
// We need to add routes last since they may depend
// on the WireGuard interface.
routes := t.Routes(m.kiloIface, m.privIface, m.enc.Index(), m.local, m.enc)
if err := m.table.Set(routes); err != nil {
routes, rules := t.Routes(link.Attrs().Name, m.kiloIface, m.privIface, m.enc.Index(), m.local, m.enc)
if err := m.table.Set(routes, rules); err != nil {
level.Error(m.logger).Log("error", err)
m.errorCounter.WithLabelValues("apply").Inc()
}

View File

@@ -25,6 +25,8 @@ import (
"golang.org/x/sys/unix"
)
const kiloTableIndex = 1107
// Topology represents the logical structure of the overlay network.
type Topology struct {
// key is the private key of the node creating the topology.
@@ -40,8 +42,7 @@ type Topology struct {
// leader represents whether or not the local host
// is the segment leader.
leader bool
// subnet is the entire subnet from which IPs
// for the WireGuard interfaces will be allocated.
// subnet is the Pod subnet of the local node.
subnet *net.IPNet
// privateIP is the private IP address of the local node.
privateIP *net.IPNet
@@ -95,7 +96,7 @@ func NewTopology(nodes map[string]*Node, peers map[string]*Peer, granularity Gra
localLocation = hostname
}
t := Topology{key: key, port: port, hostname: hostname, location: localLocation, subnet: subnet, privateIP: nodes[hostname].InternalIP}
t := Topology{key: key, port: port, hostname: hostname, location: localLocation, subnet: nodes[hostname].Subnet, privateIP: nodes[hostname].InternalIP}
for location := range topoMap {
// Sort the location so the result is stable.
sort.Slice(topoMap[location], func(i, j int) bool {
@@ -156,7 +157,7 @@ func NewTopology(nodes map[string]*Node, peers map[string]*Peer, granularity Gra
segment.wireGuardIP = ipNet.IP
segment.allowedIPs = append(segment.allowedIPs, oneAddressCIDR(ipNet.IP))
if t.leader && segment.location == t.location {
t.wireGuardCIDR = &net.IPNet{IP: ipNet.IP, Mask: t.subnet.Mask}
t.wireGuardCIDR = &net.IPNet{IP: ipNet.IP, Mask: subnet.Mask}
}
}
@@ -164,8 +165,9 @@ func NewTopology(nodes map[string]*Node, peers map[string]*Peer, granularity Gra
}
// Routes generates a slice of routes for a given Topology.
func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc encapsulation.Encapsulator) []*netlink.Route {
func (t *Topology) Routes(kiloIfaceName string, kiloIface, privIface, tunlIface int, local bool, enc encapsulation.Encapsulator) ([]*netlink.Route, []*netlink.Rule) {
var routes []*netlink.Route
var rules []*netlink.Rule
if !t.leader {
// Find the GW for this segment.
// This will be the an IP of the leader.
@@ -201,6 +203,23 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
LinkIndex: privIface,
Protocol: unix.RTPROT_STATIC,
}, enc.Strategy(), t.privateIP, tunlIface))
// Encapsulate packets from the host's Pod subnet headed
// to private IPs.
if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
routes = append(routes, &netlink.Route{
Dst: oneAddressCIDR(segment.privateIPs[i]),
Flags: int(netlink.FLAG_ONLINK),
Gw: segment.privateIPs[i],
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
Table: kiloTableIndex,
})
rules = append(rules, defaultRule(&netlink.Rule{
Src: t.subnet,
Dst: oneAddressCIDR(segment.privateIPs[i]),
Table: kiloTableIndex,
}))
}
}
}
continue
@@ -238,7 +257,7 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
}, enc.Strategy(), t.privateIP, tunlIface))
}
}
return routes
return routes, rules
}
for _, segment := range t.segments {
// Add routes for the current segment if local is true.
@@ -256,6 +275,30 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
LinkIndex: privIface,
Protocol: unix.RTPROT_STATIC,
}, enc.Strategy(), t.privateIP, tunlIface))
// Encapsulate packets from the host's Pod subnet headed
// to private IPs.
if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
routes = append(routes, &netlink.Route{
Dst: oneAddressCIDR(segment.privateIPs[i]),
Flags: int(netlink.FLAG_ONLINK),
Gw: segment.privateIPs[i],
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
Table: kiloTableIndex,
})
rules = append(rules, defaultRule(&netlink.Rule{
Src: t.subnet,
Dst: oneAddressCIDR(segment.privateIPs[i]),
Table: kiloTableIndex,
}))
// Also encapsulate packets from the Kilo interface
// headed to private IPs.
rules = append(rules, defaultRule(&netlink.Rule{
Dst: oneAddressCIDR(segment.privateIPs[i]),
Table: kiloTableIndex,
IifName: kiloIfaceName,
}))
}
}
}
continue
@@ -298,7 +341,7 @@ func (t *Topology) Routes(kiloIface, privIface, tunlIface int, local bool, enc e
})
}
}
return routes
return routes, rules
}
func encapsulateRoute(route *netlink.Route, encapsulate encapsulation.Strategy, subnet *net.IPNet, tunlIface int) *netlink.Route {
@@ -448,3 +491,12 @@ func deduplicatePeerIPs(peers []*Peer) []*Peer {
}
return ps
}
func defaultRule(rule *netlink.Rule) *netlink.Rule {
base := netlink.NewRule()
base.Src = rule.Src
base.Dst = rule.Dst
base.IifName = rule.IifName
base.Table = rule.Table
return base
}

View File

@@ -113,7 +113,7 @@ func TestNewTopology(t *testing.T) {
hostname: nodes["a"].Name,
leader: true,
location: nodes["a"].Location,
subnet: DefaultKiloSubnet,
subnet: nodes["a"].Subnet,
privateIP: nodes["a"].InternalIP,
wireGuardCIDR: &net.IPNet{IP: w1, Mask: net.CIDRMask(16, 32)},
segments: []*segment{
@@ -150,7 +150,7 @@ func TestNewTopology(t *testing.T) {
hostname: nodes["b"].Name,
leader: true,
location: nodes["b"].Location,
subnet: DefaultKiloSubnet,
subnet: nodes["b"].Subnet,
privateIP: nodes["b"].InternalIP,
wireGuardCIDR: &net.IPNet{IP: w2, Mask: net.CIDRMask(16, 32)},
segments: []*segment{
@@ -187,7 +187,7 @@ func TestNewTopology(t *testing.T) {
hostname: nodes["c"].Name,
leader: false,
location: nodes["b"].Location,
subnet: DefaultKiloSubnet,
subnet: nodes["c"].Subnet,
privateIP: nodes["c"].InternalIP,
wireGuardCIDR: nil,
segments: []*segment{
@@ -224,7 +224,7 @@ func TestNewTopology(t *testing.T) {
hostname: nodes["a"].Name,
leader: true,
location: nodes["a"].Name,
subnet: DefaultKiloSubnet,
subnet: nodes["a"].Subnet,
privateIP: nodes["a"].InternalIP,
wireGuardCIDR: &net.IPNet{IP: w1, Mask: net.CIDRMask(16, 32)},
segments: []*segment{
@@ -271,7 +271,7 @@ func TestNewTopology(t *testing.T) {
hostname: nodes["b"].Name,
leader: true,
location: nodes["b"].Name,
subnet: DefaultKiloSubnet,
subnet: nodes["b"].Subnet,
privateIP: nodes["b"].InternalIP,
wireGuardCIDR: &net.IPNet{IP: w2, Mask: net.CIDRMask(16, 32)},
segments: []*segment{
@@ -318,7 +318,7 @@ func TestNewTopology(t *testing.T) {
hostname: nodes["c"].Name,
leader: true,
location: nodes["c"].Name,
subnet: DefaultKiloSubnet,
subnet: nodes["c"].Subnet,
privateIP: nodes["c"].InternalIP,
wireGuardCIDR: &net.IPNet{IP: w3, Mask: net.CIDRMask(16, 32)},
segments: []*segment{
@@ -382,7 +382,7 @@ func TestRoutes(t *testing.T) {
nodes, peers, key, port := setup(t)
kiloIface := 0
privIface := 1
pubIface := 2
tunlIface := 2
mustTopoForGranularityAndHost := func(granularity Granularity, hostname string) *Topology {
return mustTopo(t, nodes, peers, granularity, hostname, port, key, DefaultKiloSubnet)
}
@@ -391,12 +391,15 @@ func TestRoutes(t *testing.T) {
name string
local bool
topology *Topology
result []*netlink.Route
strategy encapsulation.Strategy
routes []*netlink.Route
rules []*netlink.Rule
}{
{
name: "logical from a",
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name).segments[1].cidrs[0],
Flags: int(netlink.FLAG_ONLINK),
@@ -445,7 +448,8 @@ func TestRoutes(t *testing.T) {
{
name: "logical from b",
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["b"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: mustTopoForGranularityAndHost(LogicalGranularity, nodes["b"].Name).segments[0].cidrs[0],
Flags: int(netlink.FLAG_ONLINK),
@@ -480,7 +484,8 @@ func TestRoutes(t *testing.T) {
{
name: "logical from c",
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["c"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: oneAddressCIDR(mustTopoForGranularityAndHost(LogicalGranularity, nodes["c"].Name).segments[0].wireGuardIP),
Flags: int(netlink.FLAG_ONLINK),
@@ -535,7 +540,8 @@ func TestRoutes(t *testing.T) {
{
name: "full from a",
topology: mustTopoForGranularityAndHost(FullGranularity, nodes["a"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: mustTopoForGranularityAndHost(FullGranularity, nodes["a"].Name).segments[1].cidrs[0],
Flags: int(netlink.FLAG_ONLINK),
@@ -584,7 +590,8 @@ func TestRoutes(t *testing.T) {
{
name: "full from b",
topology: mustTopoForGranularityAndHost(FullGranularity, nodes["b"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: mustTopoForGranularityAndHost(FullGranularity, nodes["b"].Name).segments[0].cidrs[0],
Flags: int(netlink.FLAG_ONLINK),
@@ -633,7 +640,8 @@ func TestRoutes(t *testing.T) {
{
name: "full from c",
topology: mustTopoForGranularityAndHost(FullGranularity, nodes["c"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: mustTopoForGranularityAndHost(FullGranularity, nodes["c"].Name).segments[0].cidrs[0],
Flags: int(netlink.FLAG_ONLINK),
@@ -683,7 +691,59 @@ func TestRoutes(t *testing.T) {
name: "logical from a local",
local: true,
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: nodes["b"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
Gw: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name).segments[1].wireGuardIP,
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: oneAddressCIDR(nodes["b"].InternalIP.IP),
Flags: int(netlink.FLAG_ONLINK),
Gw: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name).segments[1].wireGuardIP,
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: nodes["c"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
Gw: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name).segments[1].wireGuardIP,
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: oneAddressCIDR(nodes["c"].InternalIP.IP),
Flags: int(netlink.FLAG_ONLINK),
Gw: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name).segments[1].wireGuardIP,
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: peers["a"].AllowedIPs[0],
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: peers["a"].AllowedIPs[1],
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: peers["b"].AllowedIPs[0],
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
},
},
{
name: "logical from a local always",
local: true,
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["a"].Name),
strategy: encapsulation.Always,
routes: []*netlink.Route{
{
Dst: nodes["b"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
@@ -733,7 +793,8 @@ func TestRoutes(t *testing.T) {
name: "logical from b local",
local: true,
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["b"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: nodes["a"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
@@ -772,11 +833,76 @@ func TestRoutes(t *testing.T) {
},
},
},
{
name: "logical from b local always",
local: true,
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["b"].Name),
strategy: encapsulation.Always,
routes: []*netlink.Route{
{
Dst: nodes["a"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
Gw: mustTopoForGranularityAndHost(LogicalGranularity, nodes["b"].Name).segments[0].wireGuardIP,
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: oneAddressCIDR(nodes["a"].InternalIP.IP),
Flags: int(netlink.FLAG_ONLINK),
Gw: mustTopoForGranularityAndHost(LogicalGranularity, nodes["b"].Name).segments[0].wireGuardIP,
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: nodes["c"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["c"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: oneAddressCIDR(nodes["c"].InternalIP.IP),
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["c"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
Table: kiloTableIndex,
},
{
Dst: peers["a"].AllowedIPs[0],
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: peers["a"].AllowedIPs[1],
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: peers["b"].AllowedIPs[0],
LinkIndex: kiloIface,
Protocol: unix.RTPROT_STATIC,
},
},
rules: []*netlink.Rule{
defaultRule(&netlink.Rule{
Src: nodes["b"].Subnet,
Dst: nodes["c"].InternalIP,
Table: kiloTableIndex,
}),
defaultRule(&netlink.Rule{
Dst: nodes["c"].InternalIP,
IifName: DefaultKiloInterface,
Table: kiloTableIndex,
}),
},
},
{
name: "logical from c local",
local: true,
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["c"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: oneAddressCIDR(mustTopoForGranularityAndHost(LogicalGranularity, nodes["c"].Name).segments[0].wireGuardIP),
Flags: int(netlink.FLAG_ONLINK),
@@ -835,11 +961,91 @@ func TestRoutes(t *testing.T) {
},
},
},
{
name: "logical from c local always",
local: true,
topology: mustTopoForGranularityAndHost(LogicalGranularity, nodes["c"].Name),
strategy: encapsulation.Always,
routes: []*netlink.Route{
{
Dst: oneAddressCIDR(mustTopoForGranularityAndHost(LogicalGranularity, nodes["c"].Name).segments[0].wireGuardIP),
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: nodes["a"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: oneAddressCIDR(nodes["a"].InternalIP.IP),
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: oneAddressCIDR(mustTopoForGranularityAndHost(LogicalGranularity, nodes["c"].Name).segments[1].wireGuardIP),
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: nodes["b"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: nodes["b"].InternalIP,
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
Table: kiloTableIndex,
},
{
Dst: peers["a"].AllowedIPs[0],
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: peers["a"].AllowedIPs[1],
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
{
Dst: peers["b"].AllowedIPs[0],
Flags: int(netlink.FLAG_ONLINK),
Gw: nodes["b"].InternalIP.IP,
LinkIndex: tunlIface,
Protocol: unix.RTPROT_STATIC,
},
},
rules: []*netlink.Rule{
defaultRule(&netlink.Rule{
Src: nodes["c"].Subnet,
Dst: nodes["b"].InternalIP,
Table: kiloTableIndex,
}),
},
},
{
name: "full from a local",
local: true,
topology: mustTopoForGranularityAndHost(FullGranularity, nodes["a"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: nodes["b"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
@@ -889,7 +1095,8 @@ func TestRoutes(t *testing.T) {
name: "full from b local",
local: true,
topology: mustTopoForGranularityAndHost(FullGranularity, nodes["b"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: nodes["a"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
@@ -939,7 +1146,8 @@ func TestRoutes(t *testing.T) {
name: "full from c local",
local: true,
topology: mustTopoForGranularityAndHost(FullGranularity, nodes["c"].Name),
result: []*netlink.Route{
strategy: encapsulation.Never,
routes: []*netlink.Route{
{
Dst: nodes["a"].Subnet,
Flags: int(netlink.FLAG_ONLINK),
@@ -986,8 +1194,11 @@ func TestRoutes(t *testing.T) {
},
},
} {
routes := tc.topology.Routes(kiloIface, privIface, pubIface, tc.local, encapsulation.NewIPIP(encapsulation.Never))
if diff := pretty.Compare(routes, tc.result); diff != "" {
routes, rules := tc.topology.Routes(DefaultKiloInterface, kiloIface, privIface, tunlIface, tc.local, encapsulation.NewIPIP(tc.strategy))
if diff := pretty.Compare(routes, tc.routes); diff != "" {
t.Errorf("test case %q: got diff: %v", tc.name, diff)
}
if diff := pretty.Compare(rules, tc.rules); diff != "" {
t.Errorf("test case %q: got diff: %v", tc.name, diff)
}
}