libvirt: support memory failure event

Introduce memory failure event. Libvirt should monitor domain's
event, then posts it to uplayer. According to the hardware memory
corrupted message, a cloud scheduler could migrate domain to another
health physical server.

Several changes in this patch:
public API:
    include/*
    src/conf/*
    src/remote/*
    src/remote_protocol-structs

client:
    examples/c/misc/event-test.c
    tools/virsh-domain.c

With this patch, each driver could implement its own method to run
this new event.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
This commit is contained in:
zhenwei pi 2020-10-14 18:37:50 +08:00 committed by Michal Privoznik
parent c383dc948e
commit b866adf8d9
10 changed files with 321 additions and 1 deletions

View File

@ -963,6 +963,21 @@ myDomainEventBlockThresholdCallback(virConnectPtr conn G_GNUC_UNUSED,
}
static int
myDomainEventMemoryFailureCallback(virConnectPtr conn G_GNUC_UNUSED,
virDomainPtr dom,
int recipient,
int action,
unsigned int flags,
void *opaque G_GNUC_UNUSED)
{
printf("%s EVENT: Domain %s(%d) memory failure: recipient '%d', "
"aciont '%d', flags '%d'", __func__, virDomainGetName(dom),
virDomainGetID(dom), recipient, action, flags);
return 0;
}
static int
myDomainEventMigrationIterationCallback(virConnectPtr conn G_GNUC_UNUSED,
virDomainPtr dom,
@ -1093,6 +1108,7 @@ struct domainEventData domainEvents[] = {
DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED, myDomainEventDeviceRemovalFailedCallback),
DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_METADATA_CHANGE, myDomainEventMetadataChangeCallback),
DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD, myDomainEventBlockThresholdCallback),
DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE, myDomainEventMemoryFailureCallback),
};
struct storagePoolEventData {

View File

@ -3195,6 +3195,64 @@ typedef enum {
# endif
} virDomainEventCrashedDetailType;
/**
* virDomainMemoryFailureRecipientType:
*
* Recipient of a memory failure event.
*/
typedef enum {
/* memory failure at hypersivor memory address space */
VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR = 0,
/* memory failure at guest memory address space */
VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST = 1,
# ifdef VIR_ENUM_SENTINELS
VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST
# endif
} virDomainMemoryFailureRecipientType;
/**
* virDomainMemoryFailureActionType:
*
* Action of a memory failure event.
*/
typedef enum {
/* the memory failure could be ignored. This will only be the case for
* action-optional failures. */
VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE = 0,
/* memory failure occurred in guest memory, the guest enabled MCE handling
* mechanism, and hypervisor could inject the MCE into the guest
* successfully. */
VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT = 1,
/* the failure is unrecoverable. This occurs for action-required failures
* if the recipient is the hypervisor; hypervisor will exit. */
VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL = 2,
/* the failure is unrecoverable but confined to the guest. This occurs if
* the recipient is a guest which is not ready to handle memory failures. */
VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET = 3,
# ifdef VIR_ENUM_SENTINELS
VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST
# endif
} virDomainMemoryFailureActionType;
typedef enum {
/* whether a memory failure event is action-required or action-optional
* (e.g. a failure during memory scrub). */
VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED = (1 << 0),
/* whether the failure occurred while the previous failure was still in
* progress. */
VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE = (1 << 1),
} virDomainMemoryFailureFlags;
/**
* virConnectDomainEventCallback:
* @conn: virConnect connection
@ -4564,6 +4622,31 @@ typedef void (*virConnectDomainEventBlockThresholdCallback)(virConnectPtr conn,
unsigned long long excess,
void *opaque);
/**
* virConnectDomainEventMemoryFailureCallback:
* @conn: connection object
* @dom: domain on which the event occurred
* @recipient: the recipient of hardware memory failure
* (virDomainMemoryFailureRecipientType)
* @action: the action of hardware memory failure
* (virDomainMemoryFailureActionType)
* @flags: the flags of hardware memory failure
* @opaque: application specified data
*
* The callback occurs when the hypervisor handles the hardware memory
* corrupted event.
*
* The callback signature to use when registering for an event of type
* VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE with virConnectDomainEventRegisterAny()
*/
typedef void (*virConnectDomainEventMemoryFailureCallback)(virConnectPtr conn,
virDomainPtr dom,
int recipient,
int action,
unsigned int flags,
void *opaque);
/**
* VIR_DOMAIN_EVENT_CALLBACK:
*
@ -4606,6 +4689,7 @@ typedef enum {
VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED = 22, /* virConnectDomainEventDeviceRemovalFailedCallback */
VIR_DOMAIN_EVENT_ID_METADATA_CHANGE = 23, /* virConnectDomainEventMetadataChangeCallback */
VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD = 24, /* virConnectDomainEventBlockThresholdCallback */
VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE = 25, /* virConnectDomainEventMemoryFailureCallback */
# ifdef VIR_ENUM_SENTINELS
VIR_DOMAIN_EVENT_ID_LAST

View File

@ -57,6 +57,7 @@ static virClassPtr virDomainEventJobCompletedClass;
static virClassPtr virDomainEventDeviceRemovalFailedClass;
static virClassPtr virDomainEventMetadataChangeClass;
static virClassPtr virDomainEventBlockThresholdClass;
static virClassPtr virDomainEventMemoryFailureClass;
static void virDomainEventDispose(void *obj);
static void virDomainEventLifecycleDispose(void *obj);
@ -79,6 +80,7 @@ static void virDomainEventJobCompletedDispose(void *obj);
static void virDomainEventDeviceRemovalFailedDispose(void *obj);
static void virDomainEventMetadataChangeDispose(void *obj);
static void virDomainEventBlockThresholdDispose(void *obj);
static void virDomainEventMemoryFailureDispose(void *obj);
static void
virDomainEventDispatchDefaultFunc(virConnectPtr conn,
@ -287,6 +289,15 @@ struct _virDomainEventBlockThreshold {
typedef struct _virDomainEventBlockThreshold virDomainEventBlockThreshold;
typedef virDomainEventBlockThreshold *virDomainEventBlockThresholdPtr;
struct _virDomainEventMemoryFailure {
virDomainEvent parent;
int recipient;
int action;
unsigned int flags;
};
typedef struct _virDomainEventMemoryFailure virDomainEventMemoryFailure;
typedef virDomainEventMemoryFailure *virDomainEventMemoryFailurePtr;
static int
virDomainEventsOnceInit(void)
@ -333,6 +344,8 @@ virDomainEventsOnceInit(void)
return -1;
if (!VIR_CLASS_NEW(virDomainEventBlockThreshold, virDomainEventClass))
return -1;
if (!VIR_CLASS_NEW(virDomainEventMemoryFailure, virDomainEventClass))
return -1;
return 0;
}
@ -542,6 +555,14 @@ virDomainEventBlockThresholdDispose(void *obj)
}
static void
virDomainEventMemoryFailureDispose(void *obj)
{
virDomainEventMemoryFailurePtr event = obj;
VIR_DEBUG("obj=%p", event);
}
static void *
virDomainEventNew(virClassPtr klass,
int eventID,
@ -1619,6 +1640,52 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom,
}
static virObjectEventPtr
virDomainEventMemoryFailureNew(int id,
const char *name,
unsigned char *uuid,
int recipient,
int action,
unsigned int flags)
{
virDomainEventMemoryFailurePtr ev;
if (virDomainEventsInitialize() < 0)
return NULL;
if (!(ev = virDomainEventNew(virDomainEventMemoryFailureClass,
VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE,
id, name, uuid)))
return NULL;
ev->recipient = recipient;
ev->action = action;
ev->flags = flags;
return (virObjectEventPtr)ev;
}
virObjectEventPtr
virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj,
int recipient,
int action,
unsigned int flags)
{
return virDomainEventMemoryFailureNew(obj->def->id, obj->def->name,
obj->def->uuid, recipient, action,
flags);
}
virObjectEventPtr
virDomainEventMemoryFailureNewFromDom(virDomainPtr dom,
int recipient,
int action,
unsigned int flags)
{
return virDomainEventMemoryFailureNew(dom->id, dom->name, dom->uuid,
recipient, action, flags);
}
static void
virDomainEventDispatchDefaultFunc(virConnectPtr conn,
virObjectEventPtr event,
@ -1902,6 +1969,19 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn,
cbopaque);
goto cleanup;
}
case VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE:
{
virDomainEventMemoryFailurePtr memoryFailureEvent;
memoryFailureEvent = (virDomainEventMemoryFailurePtr)event;
((virConnectDomainEventMemoryFailureCallback)cb)(conn, dom,
memoryFailureEvent->recipient,
memoryFailureEvent->action,
memoryFailureEvent->flags,
cbopaque);
goto cleanup;
}
case VIR_DOMAIN_EVENT_ID_LAST:
break;
}

View File

@ -255,6 +255,18 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom,
unsigned long long threshold,
unsigned long long excess);
virObjectEventPtr
virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj,
int recipient,
int action,
unsigned int flags);
virObjectEventPtr
virDomainEventMemoryFailureNewFromDom(virDomainPtr dom,
int recipient,
int action,
unsigned int flags);
int
virDomainEventStateRegister(virConnectPtr conn,
virObjectEventStatePtr state,

View File

@ -705,6 +705,8 @@ virDomainEventLifecycleNew;
virDomainEventLifecycleNewFromDef;
virDomainEventLifecycleNewFromDom;
virDomainEventLifecycleNewFromObj;
virDomainEventMemoryFailureNewFromDom;
virDomainEventMemoryFailureNewFromObj;
virDomainEventMetadataChangeNewFromDom;
virDomainEventMetadataChangeNewFromObj;
virDomainEventMigrationIterationNewFromDom;

View File

@ -1302,6 +1302,37 @@ remoteRelayDomainEventBlockThreshold(virConnectPtr conn,
}
static int
remoteRelayDomainEventMemoryFailure(virConnectPtr conn,
virDomainPtr dom,
int recipient,
int action,
unsigned int flags,
void *opaque)
{
daemonClientEventCallbackPtr callback = opaque;
remote_domain_event_memory_failure_msg data;
if (callback->callbackID < 0 ||
!remoteRelayDomainEventCheckACL(callback->client, conn, dom))
return -1;
/* build return data */
memset(&data, 0, sizeof(data));
data.callbackID = callback->callbackID;
data.recipient = recipient;
data.action = action;
data.flags = flags;
make_nonnull_domain(&data.dom, dom);
remoteDispatchObjectEventSend(callback->client, remoteProgram,
REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE,
(xdrproc_t)xdr_remote_domain_event_memory_failure_msg, &data);
return 0;
}
static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventLifecycle),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventReboot),
@ -1328,6 +1359,7 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceRemovalFailed),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMetadataChange),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventBlockThreshold),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMemoryFailure),
};
G_STATIC_ASSERT(G_N_ELEMENTS(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST);

View File

@ -404,6 +404,11 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog,
virNetClientPtr client,
void *evdata, void *opaque);
static void
remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog,
virNetClientPtr client,
void *evdata, void *opaque);
static void
remoteConnectNotifyEventConnectionClosed(virNetClientProgramPtr prog G_GNUC_UNUSED,
virNetClientPtr client G_GNUC_UNUSED,
@ -615,6 +620,10 @@ static virNetClientProgramEvent remoteEvents[] = {
remoteDomainBuildEventBlockThreshold,
sizeof(remote_domain_event_block_threshold_msg),
(xdrproc_t)xdr_remote_domain_event_block_threshold_msg },
{ REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE,
remoteDomainBuildEventMemoryFailure,
sizeof(remote_domain_event_memory_failure_msg),
(xdrproc_t)xdr_remote_domain_event_memory_failure_msg },
};
static void
@ -5440,6 +5449,29 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog G_GNUC_UNUSED,
}
static void
remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog G_GNUC_UNUSED,
virNetClientPtr client G_GNUC_UNUSED,
void *evdata, void *opaque)
{
virConnectPtr conn = opaque;
remote_domain_event_memory_failure_msg *msg = evdata;
struct private_data *priv = conn->privateData;
virDomainPtr dom;
virObjectEventPtr event = NULL;
if (!(dom = get_nonnull_domain(conn, msg->dom)))
return;
event = virDomainEventMemoryFailureNewFromDom(dom, msg->recipient,
msg->action, msg->flags);
virObjectUnref(dom);
virObjectEventStateQueueRemote(priv->eventState, event, msg->callbackID);
}
static int
remoteStreamSend(virStreamPtr st,
const char *data,

View File

@ -3469,6 +3469,14 @@ struct remote_domain_event_callback_metadata_change_msg {
remote_string nsuri;
};
struct remote_domain_event_memory_failure_msg {
int callbackID;
remote_nonnull_domain dom;
int recipient;
int action;
unsigned int flags;
};
struct remote_connect_secret_event_register_any_args {
int eventID;
remote_secret secret;
@ -6668,5 +6676,11 @@ enum remote_procedure {
* @priority: high
* @acl: domain:read
*/
REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422
REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422,
/**
* @generate: both
* @acl: none
*/
REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423
};

View File

@ -2862,6 +2862,13 @@ struct remote_domain_event_callback_metadata_change_msg {
int type;
remote_string nsuri;
};
struct remote_domain_event_memory_failure_msg {
int callbackID;
remote_nonnull_domain dom;
int recipient;
int action;
u_int flags;
};
struct remote_connect_secret_event_register_any_args {
int eventID;
remote_secret secret;
@ -3558,4 +3565,5 @@ enum remote_procedure {
REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420,
REMOTE_PROC_DOMAIN_BACKUP_BEGIN = 421,
REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422,
REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423,
};

View File

@ -13591,6 +13591,44 @@ virshEventBlockThresholdPrint(virConnectPtr conn G_GNUC_UNUSED,
}
VIR_ENUM_DECL(virshEventMemoryFailureRecipientType);
VIR_ENUM_IMPL(virshEventMemoryFailureRecipientType,
VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST,
N_("hypervisor"),
N_("guest"));
VIR_ENUM_DECL(virshEventMemoryFailureActionType);
VIR_ENUM_IMPL(virshEventMemoryFailureActionType,
VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST,
N_("ignore"),
N_("inject"),
N_("fatal"),
N_("reset"));
static void
virshEventMemoryFailurePrint(virConnectPtr conn G_GNUC_UNUSED,
virDomainPtr dom,
int recipient,
int action,
unsigned int flags,
void *opaque)
{
g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
virBufferAsprintf(&buf, _("event 'memory-failure' for domain %s:\n"
"recipient: %s\naction: %s\n"),
virDomainGetName(dom),
UNKNOWNSTR(virshEventMemoryFailureRecipientTypeTypeToString(recipient)),
UNKNOWNSTR(virshEventMemoryFailureActionTypeTypeToString(action)));
virBufferAsprintf(&buf, _("flags:\n"
"\taction required: %d\n\trecursive: %d\n"),
!!(flags & VIR_DOMAIN_MEMORY_FAILURE_ACTION_REQUIRED),
!!(flags & VIR_DOMAIN_MEMORY_FAILURE_RECURSIVE));
virshEventPrint(opaque, &buf);
}
virshDomainEventCallback virshDomainEventCallbacks[] = {
{ "lifecycle",
VIR_DOMAIN_EVENT_CALLBACK(virshEventLifecyclePrint), },
@ -13640,6 +13678,8 @@ virshDomainEventCallback virshDomainEventCallbacks[] = {
VIR_DOMAIN_EVENT_CALLBACK(virshEventMetadataChangePrint), },
{ "block-threshold",
VIR_DOMAIN_EVENT_CALLBACK(virshEventBlockThresholdPrint), },
{ "memory-failure",
VIR_DOMAIN_EVENT_CALLBACK(virshEventMemoryFailurePrint), },
};
G_STATIC_ASSERT(VIR_DOMAIN_EVENT_ID_LAST == G_N_ELEMENTS(virshDomainEventCallbacks));