#!/usr/bin/env python3 import argparse import copy import lark import os import re T = { # translating qemu -> libvirt cpu vendor names "CPUID_VENDOR_AMD": "AMD", "CPUID_VENDOR_INTEL": "Intel", "CPUID_VENDOR_HYGON": "Hygon", # translating qemu -> libvirt cpu feature names "CPUID_6_EAX_ARAT": "arat", "CPUID_7_0_EBX_ADX": "adx", "CPUID_7_0_EBX_AVX2": "avx2", "CPUID_7_0_EBX_AVX512BW": "avx512bw", "CPUID_7_0_EBX_AVX512CD": "avx512cd", "CPUID_7_0_EBX_AVX512DQ": "avx512dq", "CPUID_7_0_EBX_AVX512ER": "avx512er", "CPUID_7_0_EBX_AVX512F": "avx512f", "CPUID_7_0_EBX_AVX512PF": "avx512pf", "CPUID_7_0_EBX_AVX512VL": "avx512vl", "CPUID_7_0_EBX_BMI1": "bmi1", "CPUID_7_0_EBX_BMI2": "bmi2", "CPUID_7_0_EBX_CLFLUSHOPT": "clflushopt", "CPUID_7_0_EBX_CLWB": "clwb", "CPUID_7_0_EBX_ERMS": "erms", "CPUID_7_0_EBX_FSGSBASE": "fsgsbase", "CPUID_7_0_EBX_HLE": "hle", "CPUID_7_0_EBX_INVPCID": "invpcid", "CPUID_7_0_EBX_MPX": "mpx", "CPUID_7_0_EBX_RDSEED": "rdseed", "CPUID_7_0_EBX_RTM": "rtm", "CPUID_7_0_EBX_SHA_NI": "sha-ni", "CPUID_7_0_EBX_SMAP": "smap", "CPUID_7_0_EBX_SMEP": "smep", "CPUID_7_0_ECX_AVX512BITALG": "avx512bitalg", "CPUID_7_0_ECX_AVX512_VBMI2": "avx512vbmi2", "CPUID_7_0_ECX_AVX512_VBMI": "avx512vbmi", "CPUID_7_0_ECX_AVX512VNNI": "avx512vnni", "CPUID_7_0_ECX_AVX512_VPOPCNTDQ": "avx512-vpopcntdq", "CPUID_7_0_ECX_CLDEMOTE": "cldemote", "CPUID_7_0_ECX_GFNI": "gfni", "CPUID_7_0_ECX_LA57": "la57", "CPUID_7_0_ECX_MOVDIR64B": "movdir64b", "CPUID_7_0_ECX_MOVDIRI": "movdiri", "CPUID_7_0_ECX_PKU": "pku", "CPUID_7_0_ECX_RDPID": "rdpid", "CPUID_7_0_ECX_UMIP": "umip", "CPUID_7_0_ECX_VAES": "vaes", "CPUID_7_0_ECX_VPCLMULQDQ": "vpclmulqdq", "CPUID_7_0_EDX_ARCH_CAPABILITIES": "arch-capabilities", "CPUID_7_0_EDX_AVX512_4FMAPS": "avx512-4fmaps", "CPUID_7_0_EDX_AVX512_4VNNIW": "avx512-4vnniw", "CPUID_7_0_EDX_CORE_CAPABILITY": "core-capability", "CPUID_7_0_EDX_SPEC_CTRL": "spec-ctrl", "CPUID_7_0_EDX_SPEC_CTRL_SSBD": "ssbd", "CPUID_7_0_EDX_STIBP": "stibp", "CPUID_7_1_EAX_AVX512_BF16": "avx512-bf16", "CPUID_8000_0008_EBX_CLZERO": "clzero", "CPUID_8000_0008_EBX_IBPB": "ibpb", "CPUID_8000_0008_EBX_STIBP": "amd-stibp", "CPUID_8000_0008_EBX_WBNOINVD": "wbnoinvd", "CPUID_8000_0008_EBX_XSAVEERPTR": "xsaveerptr", "CPUID_ACPI": "acpi", "CPUID_APIC": "apic", "CPUID_CLFLUSH": "clflush", "CPUID_CMOV": "cmov", "CPUID_CX8": "cx8", "CPUID_DE": "de", "CPUID_EXT2_3DNOW": "3dnow", "CPUID_EXT2_3DNOWEXT": "3dnowext", "CPUID_EXT2_FFXSR": "fxsr_opt", "CPUID_EXT2_LM": "lm", "CPUID_EXT2_MMXEXT": "mmxext", "CPUID_EXT2_NX": "nx", "CPUID_EXT2_PDPE1GB": "pdpe1gb", "CPUID_EXT2_RDTSCP": "rdtscp", "CPUID_EXT2_SYSCALL": "syscall", "CPUID_EXT3_3DNOWPREFETCH": "3dnowprefetch", "CPUID_EXT3_ABM": "abm", "CPUID_EXT3_CR8LEG": "cr8legacy", "CPUID_EXT3_FMA4": "fma4", "CPUID_EXT3_LAHF_LM": "lahf_lm", "CPUID_EXT3_MISALIGNSSE": "misalignsse", "CPUID_EXT3_OSVW": "osvw", "CPUID_EXT3_PERFCORE": "perfctr_core", "CPUID_EXT3_SSE4A": "sse4a", "CPUID_EXT3_SVM": "svm", "CPUID_EXT3_TBM": "tbm", "CPUID_EXT3_XOP": "xop", "CPUID_EXT_AES": "aes", "CPUID_EXT_AVX": "avx", "CPUID_EXT_CX16": "cx16", "CPUID_EXT_F16C": "f16c", "CPUID_EXT_FMA": "fma", "CPUID_EXT_MOVBE": "movbe", "CPUID_EXT_PCID": "pcid", "CPUID_EXT_PCLMULQDQ": "pclmuldq", "CPUID_EXT_POPCNT": "popcnt", "CPUID_EXT_RDRAND": "rdrand", "CPUID_EXT_SSE3": "pni", "CPUID_EXT_SSE41": "sse4.1", "CPUID_EXT_SSE42": "sse4.2", "CPUID_EXT_SSSE3": "ssse3", "CPUID_EXT_TSC_DEADLINE_TIMER": "tsc-deadline", "CPUID_EXT_X2APIC": "x2apic", "CPUID_EXT_XSAVE": "xsave", "CPUID_FP87": "fpu", "CPUID_FXSR": "fxsr", "CPUID_MCA": "mca", "CPUID_MCE": "mce", "CPUID_MMX": "mmx", "CPUID_MSR": "msr", "CPUID_MTRR": "mtrr", "CPUID_PAE": "pae", "CPUID_PAT": "pat", "CPUID_PGE": "pge", "CPUID_PSE36": "pse36", "CPUID_PSE": "pse", "CPUID_SEP": "sep", "CPUID_SSE2": "sse2", "CPUID_SSE": "sse", "CPUID_SS": "ss", "CPUID_SVM_NPT": "npt", "CPUID_SVM_NRIPSAVE": "nrip-save", "CPUID_TSC": "tsc", "CPUID_VME": "vme", "CPUID_XSAVE_XGETBV1": "xgetbv1", "CPUID_XSAVE_XSAVEC": "xsavec", "CPUID_XSAVE_XSAVEOPT": "xsaveopt", "CPUID_XSAVE_XSAVES": "xsaves", "MSR_ARCH_CAP_IBRS_ALL": "ibrs-all", "MSR_ARCH_CAP_MDS_NO": "mds-no", "MSR_ARCH_CAP_PSCHANGE_MC_NO": "pschange-mc-no", "MSR_ARCH_CAP_RDCL_NO": "rdctl-no", "MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY": "skip-l1dfl-vmentry", "MSR_ARCH_CAP_TAA_NO": "taa-no", "MSR_CORE_CAP_SPLIT_LOCK_DETECT": "split-lock-detect", # always disabled features "CPUID_EXT_MONITOR": None, "0": None, # set to "no auto enable" by qemu "CPUID_EXT3_TOPOEXT": None, "MSR_VMX_BASIC_DUAL_MONITOR": None, } def readline_cont(f): """Read one logical line from a file `f` i.e. continues lines that end in a backslash.""" line = f.readline() while line.endswith("\\\n"): line = line[:-2] + " " + f.readline() return line def read_builtin_x86_defs(filename): """Extract content between begin_mark and end_mark from file `filename` as string, while expanding shorthand macros like "I486_FEATURES".""" begin_mark = "static X86CPUDefinition builtin_x86_defs[] = {\n" end_mark = "};\n" shorthand = re.compile("^#define ([A-Z0-9_]+_FEATURES) (.*)$") lines = list() shorthands = dict() with open(filename, "rt") as f: while True: line = readline_cont(f) if line == begin_mark: break if not line: raise RuntimeError("begin mark not found") match = shorthand.match(line) if match: # TCG definitions are irrelevant for cpu models newk = match.group(1) if newk.startswith("TCG_"): continue # remove comments, whitespace and bit operators, effectively # turning the bitfield into a list newv = re.sub("([()|\t\n])|(/\\*.*?\\*/)", " ", match.group(2)) # resolve recursive shorthands for k, v in shorthands.items(): newv = newv.replace(k, v) shorthands[newk] = newv while True: line = readline_cont(f) if line == end_mark: break if not line: raise RuntimeError("end marker not found") # apply shorthands for k, v in shorthands.items(): line = line.replace(k, v) lines.append(line) return "".join(lines) def transform(item): """Recursively transform a Lark syntax tree into python native objects.""" if isinstance(item, lark.lexer.Token): return str(item) if item.data == "list": retval = list() for child in item.children: value = transform(child) if value is None: continue retval.append(value) return retval if item.data == "map": retval = dict() for child in item.children: if len(child.children) != 2: raise RuntimeError("map entry with more than 2 elements") key = transform(child.children[0]) value = transform(child.children[1]) if key is None: raise RuntimeError("map entry with 'None' key") if value is None: continue retval[key] = value return retval if item.data == "text": retval = list() for child in item.children: value = transform(child) if value is None: continue retval.append(value) return " ".join(retval) if item.data == "value": if item.children: raise RuntimeError("empty list is not empty") return None raise RuntimeError("unexpected item type") def expand_model(model): """Expand a qemu cpu model description that has its feature split up into different fields and may have differing versions into several libvirt- friendly cpu models.""" result = { "name": model.pop(".name"), "vendor": T[model.pop(".vendor")], "features": set(), "extra": dict()} if ".family" in model and ".model" in model: result["family"] = model.pop(".family") result["model"] = model.pop(".model") for k in [k for k in model if k.startswith(".features")]: v = model.pop(k) for feature in v.split(): if feature.startswith("VMX_") or feature.startswith("MSR_VMX_"): continue translated = T.get(feature, feature) if translated: result["features"].add(translated) versions = model.pop(".versions", []) for k, v in model.items(): result["extra"]["model" + k] = v yield result for version in versions: result = copy.deepcopy(result) result["name"] = version.pop(".alias", result["name"]) props = version.pop(".props", dict()) for k, v in props: if v == "on": result["features"].add(k) elif v == "off" and k in result["features"]: result["features"].remove(k) else: result["extra"]["property." + k] = v for k, v in version.items(): result["extra"]["version" + k] = v yield result def output_model(f, model): if model["extra"]: f.write("\n") f.write("\n") f.write(" \n".format(model["name"])) f.write(" \n") f.write(" \n".format( model["family"], model["model"])) f.write(" \n".format(model["vendor"])) for feature in sorted(model["features"]): f.write(" \n".format(feature)) f.write(" \n") f.write("\n") def main(): parser = argparse.ArgumentParser( description="Synchronize x86 cpu models from QEMU i386 target.") parser.add_argument( "cpufile", help="Path to 'target/i386/cpu.c' file in the QEMU repository", type=os.path.realpath) parser.add_argument( "outdir", help="Path to 'src/cpu_map' directory in the libvirt repository", type=os.path.realpath) args = parser.parse_args() builtin_x86_defs = read_builtin_x86_defs(args.cpufile) ast = lark.Lark(r""" list: value ( "," value )* ","? map: keyvalue ( "," keyvalue )* ","? keyvalue: IDENTIFIER "=" value ?value: text | "{" "}" | "{" list "}" | "{" map "}" text: (IDENTIFIER | "\"" (/[^"]+/)? "\"")+ IDENTIFIER: /[\[\]\._&a-zA-Z0-9]/+ %ignore (" " | "\r" | "\n" | "\t" | "|" )+ %ignore "(" ( "X86CPUVersionDefinition" | "PropValue" ) "[])" %ignore "//" /.*?/ "\n" %ignore "/*" /(.|\n)*?/ "*/" """, start="list").parse(builtin_x86_defs) models_json = transform(ast) models = list() for model in models_json: models.extend(expand_model(model)) for model in models: name = os.path.join(args.outdir, "x86_{}.xml".format(model["name"])) with open(name, "wt") as f: output_model(f, model) if __name__ == "__main__": main()