@@ -111,7 +111,8 @@ the guest. There are two ways to do this: using QEMU directly, or using
libvirt.
.. note::
- IOMMU is not supported with vhost-user ports.
+
+ IOMMU and Post-copy Live Migration are not supported with vhost-user ports.
Adding vhost-user ports to the guest (QEMU)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -301,6 +302,52 @@ The default value is false.
QEMU). Starting with QEMU v2.9.1, vhost-iommu-support can safely be
enabled, even without having an IOMMU device, with no performance penalty.
+vhost-user-client Post-copy Live Migration Support (experimental)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``Post-copy`` migration is the migration mode where the destination CPUs are
+started before all the memory has been transferred. The main advantage is the
+predictable migration time. Mostly used as a second phase after the normal
+'pre-copy' migration in case it takes too long to converge.
+
+More information can be found in QEMU `docs`_.
+
+.. _`docs`: https://git.qemu.org/?p=qemu.git;a=blob;f=docs/devel/migration.rst
+
+Post-copy support may be enabled via a global config value
+``vhost-postcopy-support``. Setting this to ``true`` enables Post-copy support
+for all vhost-user-client ports::
+
+ $ ovs-vsctl set Open_vSwitch . other_config:vhost-postcopy-support=true
+
+The default value is ``false``.
+
+.. important::
+
+ Changing this value requires restarting the daemon.
+
+.. important::
+
+ DPDK Post-copy migration mode uses userfaultfd syscall to communicate with
+ the kernel about page fault handling and uses shared memory based on huge
+ pages. So destination host linux kernel should support userfaultfd over
+ shared hugetlbfs. This feature only introduced in kernel upstream version
+ 4.11.
+
+ Post-copy feature supported in DPDK since 18.11.0 version and in QEMU
+ since 2.12.0 version. But it's suggested to use QEMU >= 3.0.1 because
+ migration recovery was fixed for post-copy in 3.0 and few additional bug
+ fixes (like userfaulfd leak) was released in 3.0.1.
+
+ DPDK Post-copy feature requires avoiding to populate the guest memory
+ (application must not call mlock* syscall). So enabling mlockall and
+ dequeue zero-copy features is mis-compatible with post-copy feature.
+
+ Note that during migration of vhost-user device, PMD threads hang for the
+ time of faulted pages download from source host. Transferring 1GB hugepage
+ across a 10Gbps link possibly unacceptably slow. So recommended hugepage
+ size is 2MB.
+
.. _dpdk-testpmd:
DPDK in the Guest
@@ -500,6 +547,10 @@ QEMU versions v2.10 and greater). This value can be set like so::
Because of this limitation, this feature is considered 'experimental'.
+.. note::
+
+ Post-copy Live Migration is not compatible with dequeue zero copy.
+
Further information can be found in the
`DPDK documentation
<https://doc.dpdk.org/guides-18.11/prog_guide/vhost_lib.html>`__
@@ -3,6 +3,7 @@ Post-v2.11.0
- DPDK:
* New option 'other_config:dpdk-socket-limit' to limit amount of
hugepage memory that can be used by DPDK.
+ * Add support for vHost Post-copy Live Migration (experimental).
* OVS validated with DPDK 18.11.1 which is recommended to be used.
- OpenFlow:
* Removed support for OpenFlow 1.6 (draft), which ONF abandoned.
@@ -56,6 +56,12 @@ dpdk_vhost_iommu_enabled(void)
return false;
}
+bool
+dpdk_vhost_postcopy_enabled(void)
+{
+ return false;
+}
+
bool
dpdk_per_port_memory(void)
{
@@ -39,6 +39,7 @@
#include "ovs-numa.h"
#include "smap.h"
#include "svec.h"
+#include "util.h"
#include "vswitch-idl.h"
VLOG_DEFINE_THIS_MODULE(dpdk);
@@ -47,6 +48,8 @@ static FILE *log_stream = NULL; /* Stream for DPDK log redirection */
static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */
+static bool vhost_postcopy_enabled = false; /* Status of vHost POSTCOPY
+ * support. */
static bool dpdk_initialized = false; /* Indicates successful initialization
* of DPDK. */
static bool per_port_memory = false; /* Status of per port memory support */
@@ -311,6 +314,15 @@ dpdk_init__(const struct smap *ovs_other_config)
VLOG_INFO("IOMMU support for vhost-user-client %s.",
vhost_iommu_enabled ? "enabled" : "disabled");
+ vhost_postcopy_enabled = smap_get_bool(ovs_other_config,
+ "vhost-postcopy-support", false);
+ if (vhost_postcopy_enabled && memory_locked()) {
+ VLOG_WARN("vhost-postcopy-support and mlockall are not compatible.");
+ vhost_postcopy_enabled = false;
+ }
+ VLOG_INFO("POSTCOPY support for vhost-user-client %s.",
+ vhost_postcopy_enabled ? "enabled" : "disabled");
+
per_port_memory = smap_get_bool(ovs_other_config,
"per-port-memory", false);
VLOG_INFO("Per port memory for DPDK devices %s.",
@@ -492,6 +504,12 @@ dpdk_vhost_iommu_enabled(void)
return vhost_iommu_enabled;
}
+bool
+dpdk_vhost_postcopy_enabled(void)
+{
+ return vhost_postcopy_enabled;
+}
+
bool
dpdk_per_port_memory(void)
{
@@ -39,6 +39,7 @@ void dpdk_init(const struct smap *ovs_other_config);
void dpdk_set_lcore_id(unsigned cpu);
const char *dpdk_get_vhost_sock_dir(void);
bool dpdk_vhost_iommu_enabled(void);
+bool dpdk_vhost_postcopy_enabled(void);
bool dpdk_per_port_memory(void);
void print_dpdk_version(void);
void dpdk_status(const struct ovsrec_open_vswitch *);
@@ -4147,6 +4147,11 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
vhost_flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
}
+ /* Enable POSTCOPY support, if explicitly requested. */
+ if (dpdk_vhost_postcopy_enabled()) {
+ vhost_flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
+ }
+
zc_enabled = dev->vhost_driver_flags
& RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
/* Enable zero copy flag, if requested */
@@ -406,6 +406,22 @@
</p>
</column>
+ <column name="other_config" key="vhost-postcopy-support"
+ type='{"type": "boolean"}'>
+ <p>
+ vHost post-copy is a feature which allows switching live migration
+ of VM attached to dpdkvhostuserclient port to post-copy mode if
+ default pre-copy migration can not be converged or takes too long to
+ converge.
+ Setting this value to <code>true</code> enables vHost post-copy
+ support for all dpdkvhostuserclient ports. Available starting from
+ DPDK v18.11 and QEMU 2.12.
+ </p>
+ <p>
+ Changing this value requires restarting the daemon.
+ </p>
+ </column>
+
<column name="other_config" key="per-port-memory"
type='{"type": "boolean"}'>
<p>