From f248f45792bfd7e04dee78ed3440725c9f9210d3 Mon Sep 17 00:00:00 2001 From: Erik Sipsma Date: Thu, 19 Sep 2019 01:39:03 +0000 Subject: [PATCH] Add support for CNI-configured VM network interfaces. Signed-off-by: Erik Sipsma --- Makefile | 28 ++ docs/getting-started.md | 84 +++- docs/networking.md | 81 ++-- docs/quickstart.md | 21 +- examples/Makefile | 16 +- examples/taskworkflow.go | 77 ++-- go.mod | 12 +- go.sum | 77 +++- internal/network_test_utils.go | 150 +++++++ proto/events.pb.go | 2 +- proto/firecracker.pb.go | 2 +- proto/firecracker.proto | 2 +- proto/service/fccontrol/ttrpc/fccontrol.pb.go | 2 +- proto/types.pb.go | 413 +++++++++++++++--- proto/types.proto | 80 +++- runtime/Makefile | 2 +- runtime/cni_integ_test.go | 216 +++++++++ runtime/config.go | 6 + runtime/firecrackeroci/network.go | 43 ++ runtime/helpers.go | 53 ++- runtime/helpers_test.go | 87 +++- runtime/service.go | 17 +- runtime/service_integ_test.go | 75 ++-- tools/demo/fcnet.conflist | 19 + tools/docker/Dockerfile | 43 +- tools/docker/firecracker-runtime.json | 10 +- .../files_debootstrap/etc/resolv.conf | 1 + 27 files changed, 1378 insertions(+), 241 deletions(-) create mode 100644 internal/network_test_utils.go create mode 100644 runtime/cni_integ_test.go create mode 100644 runtime/firecrackeroci/network.go create mode 100644 tools/demo/fcnet.conflist create mode 120000 tools/image-builder/files_debootstrap/etc/resolv.conf diff --git a/Makefile b/Makefile index d4b6a0cea..dca30d252 100644 --- a/Makefile +++ b/Makefile @@ -131,6 +131,34 @@ firecracker-containerd-naive-integ-test-image: $(RUNC_BIN) $(FIRECRACKER_BIN) $( .PHONY: all $(SUBDIRS) clean proto deps lint install image test-images firecracker-container-test-image firecracker-containerd-naive-integ-test-image test test-in-docker $(TEST_SUBDIRS) integ-test $(INTEG_TEST_SUBDIRS) +########################## +# CNI Network +########################## + +CNI_BIN_ROOT?=/opt/cni/bin +$(CNI_BIN_ROOT): + mkdir -p $(CNI_BIN_ROOT) + +PTP_BIN?=$(CNI_BIN_ROOT)/ptp +$(PTP_BIN): $(CNI_BIN_ROOT) + GOBIN=$(CNI_BIN_ROOT) GO111MODULE=off go get -u github.com/containernetworking/plugins/plugins/main/ptp + +HOSTLOCAL_BIN?=$(CNI_BIN_ROOT)/host-local +$(HOSTLOCAL_BIN): $(CNI_BIN_ROOT) + GOBIN=$(CNI_BIN_ROOT) GO111MODULE=off go get -u github.com/containernetworking/plugins/plugins/ipam/host-local + +TC_REDIRECT_TAP_BIN?=$(CNI_BIN_ROOT)/tc-redirect-tap +$(TC_REDIRECT_TAP_BIN): $(CNI_BIN_ROOT) + GOBIN=$(CNI_BIN_ROOT) go install github.com/firecracker-microvm/firecracker-go-sdk/cni/cmd/tc-redirect-tap + +FCNET_CONFIG?=/etc/cni/conf.d/fcnet.conflist +$(FCNET_CONFIG): + mkdir -p $(dir $(FCNET_CONFIG)) + cp tools/demo/fcnet.conflist $(FCNET_CONFIG) + +.PHONY: demo-network +demo-network: $(PTP_BIN) $(HOSTLOCAL_BIN) $(TC_REDIRECT_TAP_BIN) $(FCNET_CONFIG) + ########################## # Firecracker submodule ########################## diff --git a/docs/getting-started.md b/docs/getting-started.md index 5cda2aa09..406daff67 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -193,6 +193,10 @@ configuration file has the following fields: delivered. * `ht_enabled` (unused) - Reserved for future use. * `debug` (optional) - Enable debug-level logging from the runtime. +* `default_network_interfaces` (optional) - a list of network interfaces to configure + a VM with if no list of network interfaces is provided with a CreateVM call. Defaults + to an empty list. The structure of the items in the list is the same as the Go API + FirecrackerNetworkInterface defined [in protobuf here](../proto/types.proto).
A reasonable example configuration @@ -206,8 +210,7 @@ configuration file has the following fields: "cpu_template": "T2", "log_fifo": "fc-logs.fifo", "log_level": "Debug", - "metrics_fifo": "fc-metrics.fifo", - + "metrics_fifo": "fc-metrics.fifo" } ```
@@ -241,11 +244,13 @@ And start a container! ```bash $ sudo firecracker-ctr --address /run/firecracker-containerd/containerd.sock \ - run --snapshotter firecracker-naive --runtime aws.firecracker --tty \ + run --snapshotter firecracker-naive --runtime aws.firecracker \ + --rm --tty --net-host \ docker.io/library/busybox:latest busybox-test ``` -Alternatively you can specify `--runtime` and `--snapshotter` just once when creating a new namespace using containerd's default labels: +Alternatively you can specify `--runtime` and `--snapshotter` just once when +creating a new namespace using containerd's default labels: ```bash $ sudo firecracker-ctr --address /run/firecracker-containerd/containerd.sock \ @@ -258,6 +263,75 @@ $ sudo firecracker-ctr --address /run/firecracker-containerd/containerd.sock \ $ sudo firecracker-ctr --address /run/firecracker-containerd/containerd.sock \ -n fc \ - run --tty \ + run --rm --tty --net-host \ docker.io/library/busybox:latest busybox-test ``` + +## Networking support +Firecracker-containerd supports the same networking options as provided by the +Firecracker Go SDK, [documented here](https://github.com/firecracker-microvm/firecracker-go-sdk#network-configuration). +This includes support for configuring VM network interfaces both with +pre-created tap devices and with tap devices created automatically by +[CNI](https://github.com/containernetworking/cni) plugins. + +### CNI Setup +CNI-configured networks offer the quickest way to get VMs up and running with +connectivity to external networks. Setting one up requires a few extra steps in +addition to the above Setup steps. + +To install the required CNI dependencies, run the following make target from the +previously cloned firecracker-containerd repository: +```bash +$ sudo make demo-network +``` + +You can check the Makefile to see exactly what is installed and where, but for a +quick summary: +* [`ptp` CNI plugin](https://github.com/containernetworking/plugins/tree/master/plugins/main/ptp) + - Creates a [veth](http://man7.org/linux/man-pages/man4/veth.4.html) pair with + one end in a private network namespace and the other end in the host's network namespace. +* [`host-local` CNI + plugin](https://github.com/containernetworking/plugins/tree/master/plugins/ipam/host-local) + - Manages IP allocations of network devices present on the local machine by + vending them from a statically defined subnet. +* [`tc-redirect-tap` CNI + plugin](https://github.com/firecracker-microvm/firecracker-go-sdk/tree/master/cni) + - A CNI plugin that adapts other CNI plugins to be usable by Firecracker VMs. + [See this doc for more details](networking.md). It is used here to adapt veth + devices created by the `ptp` plugin to tap devices provided to VMs. +* [`fcnet.conflist`](../tools/demo/fcnet.conflist) - A sample CNI configuration + file that defines a `fcnet` network created via the `ptp`, `host-local` and + `tc-redirect-tap` plugins + +After those dependencies are installed, an update to the firecracker-containerd +configuration file is required for VMs to use the `fcnet` CNI-configuration as +their default way of generating network interfaces. Just include the following ` +default_network_interfaces` key in your runtime configuration file (by default +at `/etc/containerd/firecracker-runtime.json`): +```json +"default_network_interfaces": [ + { + "CNIConfig": { + "NetworkName": "fcnet", + "InterfaceName": "veth0" + } + } +] +``` + +After that, start up a container (as described in the above Usage section) and +try pinging your host IP. + +At the time of this writing, there is a bug in the ptp plugin that prevents the +DNS settings from the IPAM plugin being propagated. This is being addressed, but +until that time DNS resolution will require users manually tweak the installed +CNI configuration to specify static DNS nameservers appropriate to their local +network in [the `dns` section of the PTP plugin](https://github.com/containernetworking/plugins/tree/master/plugins/main/ptp#network-configuration-reference) + +While your host's IP should always be reachable from the VM given the above +networking setup, your VM may or may not have outbound internet access depending +on the details of your host's network. The ptp plugin attempts to setup iptables +rules to allow the VM's traffic to be forwarded on your host's network but may +not be able to if there are pre-existing iptables rules that overlap. In those +cases, granting your VM outbound internet access may require customization of +the CNI configuration past what's installed above. diff --git a/docs/networking.md b/docs/networking.md index cbb75255c..f931c5796 100644 --- a/docs/networking.md +++ b/docs/networking.md @@ -70,6 +70,7 @@ The Linux Kernel’s [Traffic Control (TC)](http://tldp.org/HOWTO/Traffic-Contro Most relevant to our interests, the [U32 filter](http://man7.org/linux/man-pages/man8/tc-u32.8.html) provided as part of TC allows you to create a rule that essentially says “take all the packets entering the ingress queue of this device and move them to the egress queue of this other device”. For example, if you have DeviceA and DeviceB you can setup that rule on each of them such that the end effect is every packet sent into DeviceA goes out of DeviceB and every packet sent to DeviceB goes out of DeviceA. The host kernel just moves the ethernet packets from one device’s queue to the other’s, so the redirection is entirely transparent to any userspace application or VM guest kernel all the way down to and including the link layer. * We first learned about this approach from [Kata Containers](https://github.com/kata-containers/runtime), who are using it for similar purposes in their framework. They have [some more background information documented here](https://gist.github.com/mcastelino/7d85f4164ffdaf48242f9281bb1d0f9b). +* Another use of TC redirect filters in the context of CNI plugins can be found in the [bandwidth CNI plugin](https://github.com/containernetworking/plugins/tree/master/plugins/meta/bandwidth). This technique can be used to redirect between a Firecracker VM’s tap device and another device in the network namespace Firecracker is running in. If, for example, the VM tap is redirecting with a veth device in a network namespace, the VM guest internally gets a network device with the same mac address as the veth and needs to assign to it the same IP and routes the veth uses. After that, the VM guest essentially operates as though its nic is the same as the veth device outside on the host. @@ -117,51 +118,45 @@ VMs will execute in. In this option, Firecracker-containerd just asks for CNI configuration during a CreateVM call, which it will use to configure a network namespace for the Firecracker VM to execute in. The API updates may look something like: ``` -message FirecrackerNetworkConfiguration { - // CNI Configuration to use to create the network namespace in which the - // VM will execute. It's an error to specify both this and any NetworkInterfaces - // below. - FirecrackerCNIConfiguration CNIConfiguration; +message FirecrackerNetworkInterface { + // + + // CNI Configuration that will be used to configure the network interface + CNIConfiguration CNIConfig; - // The existing FirecrackerNetworkInterface configuration - // which specifies the name of the tap device on the host and rate limiters - repeated FirecrackerNetworkInterface NetworkInterfaces; + // Static configuration that will be used to configure the network interface + StaticNetworkConfiguration StaticConfig; } message FirecrackerCNIConfiguration { // Name of the CNI network that will be used to configure the VM string NetworkName; - // Path to CNI bin directory and CNI conf directory, respectively, that will - // be used to configure the VM. - string BinDirectory; + // IF_NAME CNI parameter provided to plugins for the name of devices to create + string InterfaceName; + + // Paths to CNI bin directories, CNI conf directory and CNI cache directory, + // respectively, that will be used to configure the VM. + repeated string BinPath; string ConfDirectory; + string CacheDirectory; + + // CNI Args passed to plugins + repeated CNIArg Args; } -message FirecrackerNetworkInterface { - // - - // (Optional) Static configuration that will be applied internally in the - // Guest VM. At first, it will be an error to specify this for multiple - // NetworkInterfacesin the same CreateVM call (due to the limitations of - // using "ip=..."). In time, we may be able to lift that restriction with - // updates to the implementation. - StaticIPConfiguration StaticIP; +message StaticNetworkConfiguration { + string MacAddress; + string HostDevName; + IPConfiguration IPConfig; } -message StaticIPConfiguration { +message IPConfiguration { // Network configuration that will be applied to a network interface in a // Guest VM on boot. - string IP; - string SubnetMask; - string DefaultGateway; + string PrimaryAddress; + string GatewayAddress; repeated string Nameservers; - string Hostname; -} - -message CreateVMRequest { - // - FirecrackerNetworkConfiguration NetworkConfiguration; } ``` @@ -258,7 +253,7 @@ In order for networking to work as expected inside the VM, it needs to have IP a The IP configuration is just pre-configured in the kernel when the system starts (the same end effect of having run the corresponding netlink commands to configure IP and routes). The DNS configuration is applied by writing the nameserver and search domain configuration to /proc/net/pnp in a format that is compatible with /etc/resolv.conf. The typical approach is to then have /etc/resolv.conf be a symlink to /proc/net/pnp. -Users of Firecracker-containerd are also free to provide their own kernel boot options, which could include their own static IP/DNS configuration. In those cases, if they have enabled CNI configuration, Firecracker-containerd will return an error. +Users of Firecracker-containerd are also free to provide their own kernel boot options, which could include their own static IP/DNS configuration. In those cases, if they have enabled CNI configuration, Firecracker-containerd will return an error. **Pros** @@ -335,21 +330,15 @@ The biggest immediate downside of Option A is the requirement that /etc/resolv.c Firecracker-containerd will build the current binaries it does today plus a new CNI-plugin compatible binary, `tc-redirect-tap`, that takes an existing network namespace and creates within it a tap device that is redirected via a TC filter to an already networked device in the netns. This CNI plugin is only useful when chained with other CNI-plugins (which will setup the device that the tap will redirect with). -When setting up Firecracker-containerd, users can optionally include CNI configuration in Firecracker-containerd’s runtime config file. If CNI configuration is not passed during CreateVM (such as the single-container VM use case), the runtime will fall back to configuration in the runtime config. If there’s no CNI configuration present in either the CreateVM call or the runtime config, the behavior will remain the same as it is today. +When setting up Firecracker-containerd, users can optionally include a set of default network interfaces to provide to a VM if none are specified by the user. This allows users to optionally set their VMs to use CNI-configured network interfaces by default. The user is free to provide an explicit NetworkInterfaces list during the CreateVM call (including an empty list), in which case that will be used instead of any defaults present in the runtime config file. -On a high-level, the implementation of CreateVM relevant to the new networking configuration will look something like this: - -1. Parse what, if any, CNI configuration is provided via either the CreateVM call or the defaults in the runtime config file. -2. If CNI Configuration is not present, just continue the VM creation process as it is today -3. If CNI Configuration is present, check to see if the Jailer configuration specifies a network namespace - 1. If it does, that will be the network namespace provided to the CNI plugins - 2. If it does not, a new empty network namespace will be created by the runtime and provided to the CNI plugins -4. Use the provided CNI configuration to configure the network namespace -5. Start the Firecracker VM in the network namespace via the Jailer and with the corresponding `ip=...` kernel boot parameters +The Firecracker Go SDK will take care of checking whether any Jailer config specifies a pre-existing network namespace to use and, if not, creating a new network namespace for the VM on behalf of the user. The Go SDK will also take care of invoking CNI on that network namespace, starting the VMM inside of it, and handling CNI network deletion after the VM stops. If CreateVM succeeds, any containers running inside the VM with a “host” network namespace will have access to the network configured via CNI outside the VM. -The CNI configuration Firecracker-containerd asks for are just references to a CNI network name, a CNI bin directory (i.e. `/opt/cni/bin`) and a CNI configuration directory (i.e. `/etc/cni/net.d`). A hypothetical example CNI configuration file that uses the standard [ptp CNI plugin](https://github.com/containernetworking/plugins/tree/master/plugins/main/ptp) to create a veth device whose traffic is redirected with a tap device: +The CNI configuration Firecracker-containerd requires from users are a CNI network name and an IfName parameter to provide to CNI plugins. Other values such as the a CNI bin directories and CNI configuration directories can be provided but will have sensible defaults if not provided. + +A hypothetical example CNI configuration file that uses the standard [ptp CNI plugin](https://github.com/containernetworking/plugins/tree/master/plugins/main/ptp) to create a veth device whose traffic is redirected with a tap device: ``` { @@ -361,10 +350,8 @@ The CNI configuration Firecracker-containerd asks for are just references to a C "ipMasq": true, "ipam": { "type": "host-local", - "subnet": "192.168.1.0/24" - }, - "dns": { - "nameservers": [ "1.1.1.1" ] + "subnet": "192.168.1.0/24", + "resolvConf": "/etc/resolv.conf" } }, { @@ -376,7 +363,7 @@ The CNI configuration Firecracker-containerd asks for are just references to a C Given the above configuration, the containers inside the VM will have access to the 192.168.1.0/24 network. Thanks to setting `ipMasq: true`, the containers should also have internet access (assuming the host itself has internet access). -Firecracker-containerd will also provide an example CNI configuration that, if used, will result in Firecracker VMs being spun up with the same access to the network the host has on its default interface (something comparable to Docker’s default networking configuration). This can be setup via a Makefile target (i.e. `install-default-network`), which allows users trying out Firecracker-containerd to get networking, including outbound internet access, working in their Firecracker VMs by default if they so choose. +Firecracker-containerd will also provide an example CNI configuration that, if used, will result in Firecracker VMs being spun up with the same access to the network the host has on its default interface (something comparable to Docker’s default networking configuration). This can be setup via a Makefile target (i.e. `demo-network`), which allows users trying out Firecracker-containerd to get networking, including outbound internet access, working in their Firecracker VMs by default if they so choose. ## Hypothetical CRI interactions diff --git a/docs/quickstart.md b/docs/quickstart.md index fde155baf..d183e5d29 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -4,7 +4,10 @@ This quickstart guide provides simple steps to get a working firecracker-containerd environment, with each of the major components built from source. Once you have completed this quickstart, you should be able to run and develop firecracker-containerd (the components in this repository), the -Firecracker VMM, and containerd. +Firecracker VMM, and containerd. Note that the guide below should result in VMs +by default having network access to IPs assigned on the host and may, depending +on the configuration of your host's network, also have outbound access to the +internet. This quickstart will clone repositories under your `$HOME` directory and install files into `/usr/local/bin`. @@ -67,7 +70,6 @@ sudo DEBIAN_FRONTEND=noninteractive apt-get \ install --yes \ docker-ce aufs-tools- sudo usermod -aG docker $(whoami) -exec newgrp docker cd ~ @@ -80,11 +82,14 @@ cd ~ # overlay # * firecracker-containerd, an alternative containerd binary that includes the # firecracker VM lifecycle plugin and API +# * tc-redirect-tap and other CNI dependencies that enable VMs to start with +# access to networks available on the host git clone https://github.com/firecracker-microvm/firecracker-containerd.git cd firecracker-containerd sudo DEBIAN_FRONTEND=noninteractive apt-get install -y dmsetup -make all image +sg docker -c 'make all image' sudo make install +sudo make demo-network cd ~ @@ -128,7 +133,13 @@ sudo tee /etc/containerd/firecracker-runtime.json </dev/null & \ + ctr content fetch docker.io/library/alpine:3.10.1 >/dev/null + RUN mkdir -p /var/lib/firecracker-containerd/naive +RUN make -C /firecracker-containerd demo-network +COPY tools/docker/firecracker-runtime.json /etc/containerd/firecracker-runtime.json +COPY tools/docker/naive-snapshotter/entrypoint.sh /entrypoint ENTRYPOINT ["/entrypoint"] CMD ["exec /bin/bash"] diff --git a/tools/docker/firecracker-runtime.json b/tools/docker/firecracker-runtime.json index 3edd6acfc..110169ae3 100644 --- a/tools/docker/firecracker-runtime.json +++ b/tools/docker/firecracker-runtime.json @@ -7,5 +7,13 @@ "cpu_template": "T2", "log_fifo": "/tmp/fc-logs.fifo", "log_level": "Debug", - "metrics_fifo": "/tmp/fc-metrics.fifo" + "metrics_fifo": "/tmp/fc-metrics.fifo", + "default_network_interfaces": [ + { + "CNIConfig": { + "NetworkName": "fcnet", + "InterfaceName": "veth0" + } + } + ] } diff --git a/tools/image-builder/files_debootstrap/etc/resolv.conf b/tools/image-builder/files_debootstrap/etc/resolv.conf new file mode 120000 index 000000000..cfdae740b --- /dev/null +++ b/tools/image-builder/files_debootstrap/etc/resolv.conf @@ -0,0 +1 @@ +/proc/net/pnp \ No newline at end of file