#!/usr/bin/env python3
#
# This is the API builder, it parses the C sources and build the
# API formal description in XML.
#
# See Copyright for the status of this software.
#
# daniel@veillard.com
#

import argparse
import glob
import os
import re
import sys

quiet = True
warnings = 0
debug = False
debugsym = None

#
# C parser analysis code
#
included_files = {
    "libvirt-common.h": "header with general libvirt API definitions",
    "libvirt-domain.h": "header with general libvirt API definitions",
    "libvirt-domain-checkpoint.h": "header with general libvirt API definitions",
    "libvirt-domain-snapshot.h": "header with general libvirt API definitions",
    "libvirt-event.h": "header with general libvirt API definitions",
    "libvirt-host.h": "header with general libvirt API definitions",
    "libvirt-interface.h": "header with general libvirt API definitions",
    "libvirt-network.h": "header with general libvirt API definitions",
    "libvirt-nodedev.h": "header with general libvirt API definitions",
    "libvirt-nwfilter.h": "header with general libvirt API definitions",
    "libvirt-secret.h": "header with general libvirt API definitions",
    "libvirt-storage.h": "header with general libvirt API definitions",
    "libvirt-stream.h": "header with general libvirt API definitions",
    "virterror.h": "header with error specific API definitions",
    "libvirt.c": "Main interfaces for the libvirt library",
    "libvirt-domain.c": "Domain interfaces for the libvirt library",
    "libvirt-domain-checkpoint.c": "Domain checkpoint interfaces for the libvirt library",
    "libvirt-domain-snapshot.c": "Domain snapshot interfaces for the libvirt library",
    "libvirt-host.c": "Host interfaces for the libvirt library",
    "libvirt-interface.c": "Interface interfaces for the libvirt library",
    "libvirt-network.c": "Network interfaces for the libvirt library",
    "libvirt-nodedev.c": "Node device interfaces for the libvirt library",
    "libvirt-nwfilter.c": "NWFilter interfaces for the libvirt library",
    "libvirt-secret.c": "Secret interfaces for the libvirt library",
    "libvirt-storage.c": "Storage interfaces for the libvirt library",
    "libvirt-stream.c": "Stream interfaces for the libvirt library",
    "virerror.c": "implements error handling and reporting code for libvirt",
    "virevent.c": "event loop for monitoring file handles",
    "virtypedparam-public.c": "virTypedParameters APIs",
}

qemu_included_files = {
    "libvirt-qemu.h": "header with QEMU specific API definitions",
    "libvirt-qemu.c": "Implementations for the QEMU specific APIs",
}

lxc_included_files = {
    "libvirt-lxc.h": "header with LXC specific API definitions",
    "libvirt-lxc.c": "Implementations for the LXC specific APIs",
}

admin_included_files = {
    "libvirt-admin.h": "header with admin specific API definitions",
    "admin/libvirt-admin.c": "Implementations for the admin specific APIs",
}

ignored_words = {
    "G_GNUC_UNUSED": (0, "macro keyword"),
    "G_GNUC_NULL_TERMINATED": (0, "macro keyword"),
    "VIR_DEPRECATED": (0, "macro keyword"),
    "VIR_EXPORT_VAR": (0, "macro keyword"),
    "WINAPI": (0, "Windows keyword"),
    "__declspec": (3, "Windows keyword"),
    "__stdcall": (0, "Windows keyword"),
}

ignored_functions = {
    "virConnectSupportsFeature": "private function for remote access",
    "virDomainMigrateCheckNotLocal": "private function for migration",
    "virDomainMigrateFinish": "private function for migration",
    "virDomainMigrateFinish2": "private function for migration",
    "virDomainMigratePerform": "private function for migration",
    "virDomainMigratePrepare": "private function for migration",
    "virDomainMigratePrepare2": "private function for migration",
    "virDomainMigratePrepareTunnel": "private function for tunnelled migration",
    "virDomainMigrateBegin3": "private function for migration",
    "virDomainMigrateFinish3": "private function for migration",
    "virDomainMigratePerform3": "private function for migration",
    "virDomainMigratePrepare3": "private function for migration",
    "virDomainMigrateConfirm3": "private function for migration",
    "virDomainMigratePrepareTunnel3": "private function for tunnelled migration",
    "DllMain": "specific function for Win32",
    "virTypedParamsValidate": "internal function in virtypedparam.c",
    "virTypedParameterValidateSet": "internal function in virtypedparam.c",
    "virTypedParameterAssign": "internal function in virtypedparam.c",
    "virTypedParameterAssignFromStr": "internal function in virtypedparam.c",
    "virTypedParameterToString": "internal function in virtypedparam.c",
    "virTypedParamsCheck": "internal function in virtypedparam.c",
    "virTypedParamsCopy": "internal function in virtypedparam.c",
    "virDomainMigrateBegin3Params": "private function for migration",
    "virDomainMigrateFinish3Params": "private function for migration",
    "virDomainMigratePerform3Params": "private function for migration",
    "virDomainMigratePrepare3Params": "private function for migration",
    "virDomainMigrateConfirm3Params": "private function for migration",
    "virDomainMigratePrepareTunnel3Params": "private function for tunnelled migration",
    "virErrorCopyNew": "private",
}

# The version in the .sym file might different from
# the real version that the function was introduced.
# This dict's value is the correct version, as it should
# be in the docstrings.
ignored_function_versions = {
    'virDomainSetBlockThreshold': '3.2.0',
    'virAdmServerUpdateTlsFiles': '6.2.0',
    'virDomainBlockPeek': '0.4.3',
    'virDomainMemoryPeek': '0.4.3',
}

ignored_macros = {
    "_virSchedParameter": "backward compatibility macro for virTypedParameter",
    "_virBlkioParameter": "backward compatibility macro for virTypedParameter",
    "_virMemoryParameter": "backward compatibility macro for virTypedParameter",
}

# macros that should be completely skipped
hidden_macros = {
    "VIR_DEPRECATED": "internal macro to mark deprecated apis",
    "VIR_EXPORT_VAR": "internal macro to mark exported vars",
}


def escape(raw):
    raw = raw.replace('&', '&')
    raw = raw.replace('<', '&lt;')
    raw = raw.replace('>', '&gt;')
    raw = raw.replace("'", '&apos;')
    raw = raw.replace('"', '&quot;')
    return raw


def uniq(items):
    return sorted(set(items))


class identifier:
    def __init__(self, name, header=None, module=None, type=None, lineno=0,
                 info=None, extra=None, conditionals=None):
        self.name = name
        self.header = header
        self.module = module
        self.type = type
        self.info = info
        self.extra = extra
        self.lineno = lineno
        self.static = 0
        if conditionals is None or len(conditionals) == 0:
            self.conditionals = None
        else:
            self.conditionals = conditionals[:]
        if self.name == debugsym and not quiet:
            print("=> define %s : %s" % (debugsym, (module, type, info,
                                         extra, conditionals)))

    def __repr__(self):
        r = "%s %s:" % (self.type, self.name)
        if self.static:
            r = r + " static"
        if self.module is not None:
            r = r + " from %s" % self.module
        if self.info is not None:
            r = r + " " + repr(self.info)
        if self.extra is not None:
            r = r + " " + repr(self.extra)
        if self.conditionals is not None:
            r = r + " " + repr(self.conditionals)
        return r

    def set_header(self, header):
        self.header = header

    def set_module(self, module):
        self.module = module

    def set_type(self, type):
        self.type = type

    def set_info(self, info):
        self.info = info

    def set_extra(self, extra):
        self.extra = extra

    def set_lineno(self, lineno):
        self.lineno = lineno

    def set_static(self, static):
        self.static = static

    def set_conditionals(self, conditionals):
        if conditionals is None or len(conditionals) == 0:
            self.conditionals = None
        else:
            self.conditionals = conditionals[:]

    def get_name(self):
        return self.name

    def get_header(self):
        return self.module

    def get_module(self):
        return self.module

    def get_type(self):
        return self.type

    def get_info(self):
        return self.info

    def get_lineno(self):
        return self.lineno

    def get_extra(self):
        return self.extra

    def get_static(self):
        return self.static

    def get_conditionals(self):
        return self.conditionals

    def update(self, header, module, type=None, info=None, extra=None,
               conditionals=None):
        if self.name == debugsym and not quiet:
            print("=> update %s : %s" % (debugsym, (module, type, info,
                                         extra, conditionals)))
        if header is not None and self.header is None:
            self.set_header(module)
        if module is not None and (self.module is None or self.header == self.module):
            self.set_module(module)
        if type is not None and self.type is None:
            self.set_type(type)
        if info is not None:
            self.set_info(info)
        if extra is not None:
            self.set_extra(extra)
        if conditionals is not None:
            self.set_conditionals(conditionals)


class index:
    def __init__(self, name="noname"):
        self.name = name
        self.identifiers = {}
        self.functions = {}
        self.variables = {}
        self.includes = {}
        self.structs = {}
        self.unions = {}
        self.enums = {}
        self.typedefs = {}
        self.macros = {}
        self.references = {}
        self.info = {}

    def warning(self, msg):
        global warnings
        warnings = warnings + 1
        print(msg)

    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals=None):
        if name[0:2] == '__':
            return None
        d = None
        try:
            d = self.identifiers[name]
            d.update(header, module, type, lineno, info, extra, conditionals)
        except Exception:
            d = identifier(name, header, module, type, lineno, info, extra,
                           conditionals)
            self.identifiers[name] = d

        if d is not None and static == 1:
            d.set_static(1)

        if d is not None and name is not None and type is not None:
            self.references[name] = d

        if name == debugsym and not quiet:
            print("New ref: %s" % (d))

        return d

    def add(self, name, header, module, static, type, lineno, info=None,
            extra=None, conditionals=None):
        if name[0:2] == '__':
            return None
        d = None
        try:
            d = self.identifiers[name]
            d.update(header, module, type, lineno, info, extra, conditionals)
        except Exception:
            d = identifier(name, header, module, type, lineno, info, extra,
                           conditionals)
            self.identifiers[name] = d

        if d is not None and static == 1:
            d.set_static(1)

        if d is not None and name is not None and type is not None:
            type_map = {
                "function": self.functions,
                "functype": self.functions,
                "variable": self.variables,
                "include": self.includes,
                "struct": self.structs,
                "union": self.unions,
                "enum": self.enums,
                "typedef": self.typedefs,
                "macro": self.macros
            }
            if type in type_map:
                type_map[type][name] = d
            else:
                self.warning("Unable to register type %s" % type)

        if name == debugsym and not quiet:
            print("New symbol: %s" % (d))

        return d

    def merge(self, idx):
        for id in idx.functions.keys():
            #
            # macro might be used to override functions or variables
            # definitions
            #
            if id in self.macros:
                del self.macros[id]
            if id in self.functions:
                self.warning("function %s from %s redeclared in %s" % (
                    id, self.functions[id].header, idx.functions[id].header))
            else:
                self.functions[id] = idx.functions[id]
                self.identifiers[id] = idx.functions[id]
        for id in idx.variables.keys():
            #
            # macro might be used to override functions or variables
            # definitions
            #
            if id in self.macros:
                del self.macros[id]
            if id in self.variables:
                self.warning("variable %s from %s redeclared in %s" % (
                    id, self.variables[id].header, idx.variables[id].header))
            else:
                self.variables[id] = idx.variables[id]
                self.identifiers[id] = idx.variables[id]
        for id in idx.structs.keys():
            if id in self.structs:
                self.warning("struct %s from %s redeclared in %s" % (
                    id, self.structs[id].header, idx.structs[id].header))
            else:
                self.structs[id] = idx.structs[id]
                self.identifiers[id] = idx.structs[id]
        for id in idx.unions.keys():
            if id in self.unions:
                print("union %s from %s redeclared in %s" % (
                    id, self.unions[id].header, idx.unions[id].header))
            else:
                self.unions[id] = idx.unions[id]
                self.identifiers[id] = idx.unions[id]
        for id in idx.typedefs.keys():
            if id in self.typedefs:
                self.warning("typedef %s from %s redeclared in %s" % (
                    id, self.typedefs[id].header, idx.typedefs[id].header))
            else:
                self.typedefs[id] = idx.typedefs[id]
                self.identifiers[id] = idx.typedefs[id]
        for id in idx.macros.keys():
            #
            # macro might be used to override functions or variables
            # definitions
            #
            if id in self.variables:
                continue
            if id in self.functions:
                continue
            if id in self.enums:
                continue
            if id in self.macros:
                self.warning("macro %s from %s redeclared in %s" % (
                    id, self.macros[id].header, idx.macros[id].header))
            else:
                self.macros[id] = idx.macros[id]
                self.identifiers[id] = idx.macros[id]
        for id in idx.enums.keys():
            if id in self.enums:
                self.warning("enum %s from %s redeclared in %s" % (
                    id, self.enums[id].header, idx.enums[id].header))
            else:
                self.enums[id] = idx.enums[id]
                self.identifiers[id] = idx.enums[id]

    def merge_public(self, idx):
        for id in idx.functions.keys():
            if id in self.functions:
                up = idx.functions[id]
                # check that function condition agrees with header
                if up.conditionals != self.functions[id].conditionals:
                    self.warning("Header condition differs from Function"
                                 " for %s:" % id)
                    self.warning("  H: %s" % self.functions[id].conditionals)
                    self.warning("  C: %s" % up.conditionals)
                self.functions[id].update(None, up.module, up.type, up.info,
                                          up.extra)
        #     else:
        #         print("Function %s from %s is not declared in headers" % (
        #               id, idx.functions[id].module))
        # TODO: do the same for variables.

    def analyze_dict(self, type, dict):
        count = 0
        public = 0
        for name in dict.keys():
            id = dict[name]
            count = count + 1
            if id.static == 0:
                public = public + 1
        if count != public:
            print("  %d %s , %d public" % (count, type, public))
        elif count != 0:
            print("  %d public %s" % (count, type))

    def analyze(self):
        if not quiet:
            self.analyze_dict("functions", self.functions)
            self.analyze_dict("variables", self.variables)
            self.analyze_dict("structs", self.structs)
            self.analyze_dict("unions", self.unions)
            self.analyze_dict("typedefs", self.typedefs)
            self.analyze_dict("macros", self.macros)


class CLexer:
    """A lexer for the C language, tokenize the input by reading and
       analyzing it line by line"""
    def __init__(self, input):
        self.input = input
        self.tokens = []
        self.line = ""
        self.lineno = 0

    def getline(self):
        line = ''
        while line == '':
            line = self.input.readline()
            if not line:
                return None
            self.lineno += 1
            line = line.strip()
            if line == '':
                continue
            while line[-1] == '\\':
                line = line[:-1]
                n = self.input.readline().strip()
                self.lineno += 1
                if not n:
                    break
                line += n
        return line

    def getlineno(self):
        return self.lineno

    def push(self, token):
        self.tokens.insert(0, token)

    def debug(self):
        print("Last token: ", self.last)
        print("Token queue: ", self.tokens)
        print("Line %d end: " % self.lineno, self.line)

    def token(self):
        while self.tokens == []:
            if self.line == "":
                line = self.getline()
            else:
                line = self.line
                self.line = ""
            if line is None:
                return None

            if line[0] == '#':
                self.tokens = [('preproc', word) for word in line.split()]

                # We might have whitespace between the '#' and preproc
                # macro name, so instead of having a single token element
                # of '#define' we might end up with '#' and 'define'. This
                # merges them back together
                if self.tokens[0][1] == "#":
                    self.tokens[0] = ('preproc', "#" + self.tokens[1][1])
                    del self.tokens[1]

                if self.tokens[0][1] == "#define" and "(" in self.tokens[1][1]:
                    newtokens = [self.tokens[0]]

                    endArg = self.tokens[1][1].find(")")
                    if endArg != -1:
                        extra = self.tokens[1][1][endArg + 1:]
                        name = self.tokens[1][1][0:endArg + 1]
                        newtokens.append(('preproc', name))
                        if extra != "":
                            newtokens.append(('preproc', extra))
                    else:
                        name = self.tokens[1][1]
                        for token in self.tokens[2:]:
                            if name is not None:
                                name = name + token[1]
                                if ")" in token[1]:
                                    newtokens.append(('preproc', name))
                                    name = None
                            else:
                                newtokens.append(token)
                    self.tokens = newtokens
                break
            nline = len(line)
            if line[0] == '"' or line[0] == "'":
                quote = line[0]
                i = 1
                while quote not in line[i:]:
                    i = len(line)
                    nextline = self.getline()
                    if nextline is None:
                        return None
                    line += nextline

                tok, self.line = line[1:].split(quote, 1)
                self.last = ('string', tok)
                return self.last

            if line.startswith("/*"):
                line = line[2:]
                found = 0
                tok = ""
                while found == 0:
                    i = 0
                    nline = len(line)
                    while i < nline:
                        if line[i] == '*' and i + 1 < nline and line[i + 1] == '/':
                            self.line = line[i + 2:]
                            line = line[:i - 1]
                            nline = i
                            found = 1
                            break
                        i = i + 1
                    if tok != "":
                        tok = tok + "\n"
                    tok = tok + line
                    if found == 0:
                        line = self.getline()
                        if line is None:
                            return None
                self.last = ('comment', tok)
                return self.last
            if line.startswith("//"):
                line = line[2:]
                self.last = ('comment', line)
                return self.last
            i = 0
            while i < nline:
                if line[i] == '/' and i + 1 < nline and line[i + 1] == '/':
                    self.line = line[i:]
                    line = line[:i]
                    break
                if line[i] == '/' and i + 1 < nline and line[i + 1] == '*':
                    self.line = line[i:]
                    line = line[:i]
                    break
                if line[i] == '"' or line[i] == "'":
                    self.line = line[i:]
                    line = line[:i]
                    break
                i = i + 1
            nline = len(line)
            i = 0
            while i < nline:
                if line[i] == ' ' or line[i] == '\t':
                    i = i + 1
                    continue
                if line[i].isalnum():
                    s = i
                    while i < nline:
                        if line[i] not in " \t(){}:;,+-*/%&!|[]=><":
                            i = i + 1
                        else:
                            break
                    self.tokens.append(('name', line[s:i]))
                    continue
                if line[i] in "(){}:;,[]":
                    self.tokens.append(('sep', line[i]))
                    i = i + 1
                    continue
                if line[i] in "+-*><=/%&!|.":
                    if line[i] == '.' and i + 2 < nline and \
                       line[i + 1] == '.' and line[i + 2] == '.':
                        self.tokens.append(('name', '...'))
                        i = i + 3
                        continue

                    j = i
                    while (j + 1) < nline and line[j + 1] in "+-*><=/%&!|":
                        j = j + 1

                    self.tokens.append(('op', line[i:j + 1]))
                    i = j + 1
                    continue
                s = i
                while i < nline:
                    if line[i] not in " \t(){}:;,+-*/%&!|[]=><":
                        i = i + 1
                    else:
                        break
                self.tokens.append(('name', line[s:i]))

        tok = self.tokens[0]
        self.tokens = self.tokens[1:]
        self.last = tok
        return tok


class CParser:
    """The C module parser"""
    def __init__(self, filename, idx=None):
        self.filename = filename
        if len(filename) > 2 and filename[-2:] == '.h':
            self.is_header = 1
        else:
            self.is_header = 0
        self.input = open(filename)
        self.lexer = CLexer(self.input)
        if idx is None:
            self.index = index()
        else:
            self.index = idx
        self.top_comment = ""
        self.last_comment = ""
        self.comment = None
        self.collect_ref = 0
        self.no_error = 0
        self.conditionals = []
        self.defines = []

    def collect_references(self):
        self.collect_ref = 1

    def stop_error(self):
        self.no_error = 1

    def start_error(self):
        self.no_error = 0

    def lineno(self):
        return self.lexer.getlineno()

    def index_add(self, name, module, static, type, info=None, extra=None):
        if self.is_header == 1:
            self.index.add(name, module, module, static, type, self.lineno(),
                           info, extra, self.conditionals)
        else:
            self.index.add(name, None, module, static, type, self.lineno(),
                           info, extra, self.conditionals)

    def index_add_ref(self, name, module, static, type, info=None,
                      extra=None):
        if self.is_header == 1:
            self.index.add_ref(name, module, module, static, type,
                               self.lineno(), info, extra, self.conditionals)
        else:
            self.index.add_ref(name, None, module, static, type, self.lineno(),
                               info, extra, self.conditionals)

    def warning(self, msg):
        global warnings
        warnings = warnings + 1
        if self.no_error:
            return
        print(msg)

    def error(self, msg, token=-1):
        if self.no_error:
            return

        print("Parse Error: " + msg)
        if token != -1:
            print("Got token ", token)
        self.lexer.debug()
        sys.exit(1)

    def debug(self, msg, token=-1):
        print("Debug: " + msg)
        if token != -1:
            print("Got token ", token)
        self.lexer.debug()

    def parseTopComment(self, comment):
        res = {}
        lines = comment.split("\n")
        item = None
        for line in lines:
            line = line.lstrip().lstrip('*').lstrip()

            m = re.match(r'([_.a-zA-Z0-9]+):(.*)', line)
            if m:
                item = m.group(1)
                line = m.group(2).lstrip()

            # don't include the Copyright in the last 'item'
            if line.startswith("Copyright (C)"):
                # truncate any whitespace originating from newlines
                # before the Copyright
                if item:
                    res[item] = res[item].rstrip()
                break

            if item:
                if item in res:
                    res[item] = res[item] + " " + line
                else:
                    res[item] = line
        self.index.info = res

    def strip_lead_star(self, line):
        if line.lstrip().startswith('*'):
            line = line.replace('*', '', 1)
        return line

    def cleanup_code_comment(self, comment: str, type_name="") -> str:
        if not isinstance(comment, str) or comment == "":
            return ""

        lines = comment.splitlines(True)

        # If type_name is provided, check and remove header of
        # the comment block.
        if type_name != "" and f"{type_name}:" in lines[0]:
            del lines[0]

        com = ""
        for line in lines:
            com = com + self.strip_lead_star(line)
        return com.strip()

    def cleanupComment(self):
        self.comment = self.cleanup_code_comment(self.comment)

    def parseComment(self, token):
        com = token[1]
        if self.top_comment == "":
            self.top_comment = com
        if self.comment is None or com[0] == '*':
            self.comment = com
        else:
            self.comment = self.comment + com
        token = self.lexer.token()

        if self.comment.find("DOC_DISABLE") != -1:
            self.stop_error()

        if self.comment.find("DOC_ENABLE") != -1:
            self.start_error()

        return token

    #
    # Parse a comment block associate to a typedef
    #
    def parseTypeComment(self, name, quiet=False):
        if name[0:2] == '__':
            quiet = True

        if self.comment is None:
            if not quiet:
                self.warning("Missing comment for type %s" % name)
            return None
        if not self.comment.startswith('*'):
            if not quiet:
                self.warning("Missing * in type comment for %s" % name)
            return None

        lines = self.comment.split('\n')
        # Remove lines that contain only single asterisk
        lines[:] = [line for line in lines if line.strip() != '*']

        if lines[0] != "* %s:" % name:
            if not quiet:
                self.warning("Misformatted type comment for %s" % name)
                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
            return None
        del lines[0]

        # Concatenate all remaining lines by striping leading asterisks
        desc = " ".join([line.lstrip("*").strip() for line in lines]).strip()

        if not (quiet or desc):
            self.warning("Type comment for %s lack description of the macro"
                         % name)

        return desc

    #
    # Parse a comment block associate to a macro
    #
    def parseMacroComment(self, name, quiet=0):
        global ignored_macros

        if name[0:2] == '__':
            quiet = 1
        if name in ignored_macros:
            quiet = 1

        args = []
        desc = ""

        if self.comment is None:
            if not quiet:
                self.warning("Missing comment for macro %s" % name)
            return args, desc
        if self.comment[0] != '*':
            if not quiet:
                self.warning("Missing * in macro comment for %s" % name)
            return args, desc
        lines = self.comment.split('\n')
        if lines[0] == '*':
            del lines[0]
        if lines[0] != "* %s:" % name:
            if not quiet:
                self.warning("Misformatted macro comment for %s" % name)
                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
            return args, desc
        del lines[0]
        while lines[0] == '*':
            del lines[0]
        while len(lines) > 0 and lines[0][0:3] == '* @':
            prefix = lines[0][3:]
            try:
                arg, desc = prefix.split(':', 1)
                desc = desc.strip()
                arg = arg.strip()
            except Exception:
                if not quiet:
                    self.warning("Misformatted macro comment for %s" % name)
                    self.warning("  problem with '%s'" % lines[0])
                del lines[0]
                continue
            del lines[0]
            line = lines[0].strip()
            while len(line) > 2 and line[0:3] != '* @':
                while line[0] == '*':
                    line = line[1:]
                desc = desc + ' ' + line.strip()
                del lines[0]
                if len(lines) == 0:
                    break
                line = lines[0]
            args.append((arg, desc))
        while len(lines) > 0 and lines[0] == '*':
            del lines[0]
        desc = ""
        while len(lines) > 0:
            line = lines[0]
            while len(line) > 0 and line[0] == '*':
                line = line[1:]
            line = line.strip()
            desc = desc + " " + line
            del lines[0]

        desc = desc.strip()

        if quiet == 0:
            if desc == "":
                self.warning("Macro comment for %s lack description of the macro" % name)

        return args, desc

    #
    # Parse a comment block and merge the information found in the
    # parameters descriptions, finally returns a block as complete
    # as possible
    #
    def mergeFunctionComment(self, name, description, quiet=0):
        global ignored_functions

        if name == 'main':
            quiet = 1
        if name[0:2] == '__':
            quiet = 1
        if name in ignored_functions:
            quiet = 1

        ret, args = description
        desc = ""
        retdesc = ""

        if self.comment is None:
            if not quiet:
                self.warning("Missing comment for function %s" % name)
            return (ret[0], retdesc), args, desc
        if self.comment[0] != '*':
            if not quiet:
                self.warning("Missing * in function comment for %s" % name)
            return (ret[0], retdesc), args, desc
        lines = self.comment.split('\n')
        if lines[0] == '*':
            del lines[0]
        if lines[0] != "* %s:" % name:
            if not quiet:
                self.warning("Misformatted function comment for %s" % name)
                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
            return (ret[0], retdesc), args, desc
        del lines[0]
        while lines[0] == '*':
            del lines[0]
        nbargs = len(args)
        while len(lines) > 0 and lines[0][0:3] == '* @':
            prefix = lines[0][3:]
            try:
                arg, desc = prefix.split(':', 1)
                desc = desc.strip()
                arg = arg.strip()
            except Exception:
                if not quiet:
                    self.warning("Misformatted function comment for %s" % name)
                    self.warning("  problem with '%s'" % lines[0])
                del lines[0]
                continue
            del lines[0]
            line = lines[0].strip()
            while len(line) > 2 and line[0:3] != '* @':
                while line[0] == '*':
                    line = line[1:]
                desc = desc + ' ' + line.strip()
                del lines[0]
                if len(lines) == 0:
                    break
                line = lines[0]
            i = 0
            while i < nbargs:
                if args[i][1] == arg:
                    args[i] = (args[i][0], arg, desc)
                    break
                i = i + 1
            if i >= nbargs:
                if not quiet:
                    self.warning("Unable to find arg %s from function comment for %s" %
                                 (arg, name))
        while len(lines) > 0 and lines[0] == '*':
            del lines[0]
        desc = None
        while len(lines) > 0:
            line = lines[0]
            i = 0
            # Remove all leading '*', followed by at most one ' ' character
            # since we need to preserve correct indentation of code examples
            while i < len(line) and line[i] == '*':
                i = i + 1
            if i > 0:
                if i < len(line) and line[i] == ' ':
                    i = i + 1
                line = line[i:]
            if len(line) >= 6 and line[0:7] == "Returns":
                try:
                    line = line.split(' ', 1)[1]
                except Exception:
                    line = ""
                retdesc = line.strip()
                del lines[0]
                while len(lines) > 0:
                    line = lines[0]
                    while len(line) > 0 and line[0] == '*':
                        line = line[1:]
                    line = line.strip()
                    retdesc = retdesc + " " + line
                    del lines[0]
            else:
                if desc is not None:
                    desc = desc + "\n" + line
                else:
                    desc = line
                del lines[0]

        if desc is None:
            desc = ""
        retdesc = retdesc.strip()
        desc = desc.strip()

        if quiet == 0:
            #
            # report missing comments
            #
            i = 0
            while i < nbargs:
                if args[i][2] is None and args[i][0] != "void" and args[i][1] is not None:
                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
                i = i + 1
            if retdesc == "" and ret[0] != "void":
                self.warning("Function comment for %s lacks description of return value" % name)
            if desc == "":
                self.warning("Function comment for %s lacks description of the function" % name)

        return (ret[0], retdesc), args, desc

    def parsePreproc(self, token):
        if debug:
            print("=> preproc ", token, self.lexer.tokens)
        name = token[1]
        if name == "#include":
            token = self.lexer.token()
            if token is None:
                return None
            if token[0] == 'preproc':
                self.index_add(token[1], self.filename, not self.is_header,
                               "include")
                return self.lexer.token()
            return token
        if name == "#define":
            token = self.lexer.token()
            if token is None:
                return None
            if token[0] == 'preproc':
                # TODO macros with arguments
                name = token[1]
                lst = []
                token = self.lexer.token()
                while (token is not None and token[0] == 'preproc' and
                       token[1][0] != '#'):
                    lst.append(token[1])
                    token = self.lexer.token()

                paramStart = name.find("(")
                params = None
                if paramStart != -1:
                    params = name[paramStart + 1:-1]
                    name = name[0:paramStart]

                # skip hidden macros
                if name in hidden_macros:
                    return token
                if name[-2:] == "_H" or name[-8:] == "_H_ALLOW":
                    return token

                strValue = None
                rawValue = None
                if len(lst) == 1 and lst[0][0] == '"' and lst[0][-1] == '"':
                    strValue = lst[0][1:-1]
                else:
                    rawValue = " ".join(lst)
                (args, desc) = self.parseMacroComment(name, not self.is_header)
                self.index_add(name, self.filename, not self.is_header,
                               "macro", (args, desc, params, strValue, rawValue))
                return token

        #
        # Processing of conditionals modified by Bill 1/1/05
        #
        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
        # #if, #else and #endif) for headers and mainline code,
        # store the ones from the header in libxml2-api.xml, and later
        # (in the routine merge_public) verify that the two (header and
        # mainline code) agree.
        #
        # There is a small problem with processing the headers. Some of
        # the variables are not concerned with enabling / disabling of
        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
        # them to be included in libxml2-api.xml, or involved in
        # the check between the header and the mainline code.  To
        # accomplish this, we ignore any conditional which doesn't include
        # the string 'ENABLED'
        #
        if name == "#ifdef":
            apstr = self.lexer.tokens[0][1]
            try:
                self.defines.append(apstr)
                if apstr.find('ENABLED') != -1:
                    self.conditionals.append("defined(%s)" % apstr)
            except Exception:
                pass
        elif name == "#ifndef":
            apstr = self.lexer.tokens[0][1]
            try:
                self.defines.append(apstr)
                if apstr.find('ENABLED') != -1:
                    self.conditionals.append("!defined(%s)" % apstr)
            except Exception:
                pass
        elif name == "#if":
            apstr = ""
            for tok in self.lexer.tokens:
                if apstr != "":
                    apstr = apstr + " "
                apstr = apstr + tok[1]
            try:
                self.defines.append(apstr)
                if apstr.find('ENABLED') != -1:
                    self.conditionals.append(apstr)
            except Exception:
                pass
        elif name == "#else":
            if (self.conditionals != [] and
                    self.defines[-1].find('ENABLED') != -1):
                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
        elif name == "#endif":
            if (self.conditionals != [] and
                    self.defines[-1].find('ENABLED') != -1):
                self.conditionals = self.conditionals[:-1]
            self.defines = self.defines[:-1]
        token = self.lexer.token()
        while (token is not None and token[0] == 'preproc' and
               token[1][0] != '#'):
            token = self.lexer.token()
        return token

    #
    # token acquisition on top of the lexer, it handle internally
    # preprocessor and comments since they are logically not part of
    # the program structure.
    #
    def push(self, tok):
        self.lexer.push(tok)

    def token(self):
        global ignored_words

        token = self.lexer.token()
        while token is not None:
            if token[0] == 'comment':
                token = self.parseComment(token)
                continue
            elif token[0] == 'preproc':
                token = self.parsePreproc(token)
                continue
            elif token[0] == "name" and token[1] == "__const":
                token = ("name", "const")
                return token
            elif token[0] == "name" and token[1] == "__attribute":
                token = self.lexer.token()
                while token is not None and token[1] != ";":
                    token = self.lexer.token()
                return token
            elif token[0] == "name" and token[1] in ignored_words:
                (n, info) = ignored_words[token[1]]
                i = 0
                while i < n:
                    token = self.lexer.token()
                    i = i + 1
                token = self.lexer.token()
                continue
            else:
                if debug:
                    print("=> ", token)
                return token
        return None

    #
    # Parse a typedef, it records the type and its name.
    #
    def parseTypedef(self, token):
        if token is None:
            return None

        # With typedef enum types, we can have comments parsed before the
        # enum themselves. The parsing of enum values does clear the
        # self.comment variable. So we store it here for later.
        typedef_comment = self.comment

        token = self.parseType(token)
        if token is None:
            self.error("parsing typedef")
            return None
        base_type = self.type
        type = base_type
        # self.debug("end typedef type", token)
        while token is not None:
            if token[0] == "name":
                name = token[1]
                signature = self.signature
                if signature is not None:
                    type = type.split('(')[0]
                    d = self.mergeFunctionComment(name,
                                                  ((type, None), signature), 1)
                    self.index_add(name, self.filename, not self.is_header,
                                   "functype", d)
                else:
                    if base_type == "struct":
                        self.index_add(name, self.filename, not self.is_header,
                                       "struct", type)
                        base_type = "struct " + name
                    else:
                        self.comment = typedef_comment
                        info = self.parseTypeComment(name, 1)
                        self.index_add(name, self.filename, not self.is_header,
                                       "typedef", type, info)
                token = self.token()
            else:
                self.error("parsing typedef: expecting a name")
                return token
            # self.debug("end typedef", token)
            if token is not None and token[0] == 'sep' and token[1] == ',':
                type = base_type
                token = self.token()
                while token is not None and token[0] == "op":
                    type = type + token[1]
                    token = self.token()
            elif token is not None and token[0] == 'sep' and token[1] == ';':
                break
            elif token is not None and token[0] == 'name':
                type = base_type
                continue
            else:
                self.error("parsing typedef: expecting ';'", token)
                return token
        token = self.token()
        return token

    #
    # Parse a C code block, used for functions it parse till
    # the balancing } included
    #
    def parseBlock(self, token):
        while token is not None:
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                self.comment = None
                token = self.token()
                return token
            else:
                if self.collect_ref == 1:
                    oldtok = token
                    token = self.token()
                    if oldtok[0] == "name" and oldtok[1][0:3] == "vir":
                        if token[0] == "sep" and token[1] == "(":
                            self.index_add_ref(oldtok[1], self.filename,
                                               0, "function")
                            token = self.token()
                        elif token[0] == "name":
                            token = self.token()
                            if token[0] == "sep" and (token[1] == ";" or
                               token[1] == "," or token[1] == "="):
                                self.index_add_ref(oldtok[1], self.filename,
                                                   0, "type")
                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XEN_":
                        self.index_add_ref(oldtok[1], self.filename,
                                           0, "typedef")
                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXEN_":
                        self.index_add_ref(oldtok[1], self.filename,
                                           0, "typedef")

                else:
                    token = self.token()
        return token

    #
    # Parse a C struct definition till the balancing }
    #
    def parseStruct(self, token):
        fields = []
        # self.debug("start parseStruct", token)
        while token is not None:
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                self.struct_fields = fields
                # self.debug("end parseStruct", token)
                # print(fields)
                token = self.token()
                return token
            else:
                base_type = self.type
                # self.debug("before parseType", token)
                token = self.parseType(token)
                # self.debug("after parseType", token)
                if token is not None and token[0] == "name":
                    fname = token[1]
                    token = self.token()
                    if token[0] == "sep" and token[1] == ";":
                        self.comment = None
                        token = self.token()
                        self.cleanupComment()
                        if self.type == "union":
                            fields.append((self.type, fname, self.comment,
                                           self.union_fields))
                            self.union_fields = []
                        else:
                            fields.append((self.type, fname, self.comment))
                        self.comment = None
                    else:
                        self.error("parseStruct: expecting ;", token)
                elif token is not None and token[0] == "sep" and token[1] == "{":
                    token = self.token()
                    token = self.parseTypeBlock(token)
                    if token is not None and token[0] == "name":
                        token = self.token()
                    if token is not None and token[0] == "sep" and token[1] == ";":
                        token = self.token()
                    else:
                        self.error("parseStruct: expecting ;", token)
                else:
                    self.error("parseStruct: name", token)
                    token = self.token()
                self.type = base_type
        self.struct_fields = fields
        # self.debug("end parseStruct", token)
        # print(fields)
        return token

    #
    # Parse a C union definition till the balancing }
    #
    def parseUnion(self, token):
        fields = []
        # self.debug("start parseUnion", token)
        while token is not None:
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                self.union_fields = fields
                # self.debug("end parseUnion", token)
                # print(fields)
                token = self.token()
                return token
            else:
                base_type = self.type
                # self.debug("before parseType", token)
                token = self.parseType(token)
                # self.debug("after parseType", token)
                if token is not None and token[0] == "name":
                    fname = token[1]
                    token = self.token()
                    if token[0] == "sep" and token[1] == ";":
                        self.comment = None
                        token = self.token()
                        self.cleanupComment()
                        fields.append((self.type, fname, self.comment))
                        self.comment = None
                    else:
                        self.error("parseUnion: expecting ;", token)
                elif token is not None and token[0] == "sep" and token[1] == "{":
                    token = self.token()
                    token = self.parseTypeBlock(token)
                    if token is not None and token[0] == "name":
                        token = self.token()
                    if token is not None and token[0] == "sep" and token[1] == ";":
                        token = self.token()
                    else:
                        self.error("parseUnion: expecting ;", token)
                else:
                    self.error("parseUnion: name", token)
                    token = self.token()
                self.type = base_type
        self.union_fields = fields
        # self.debug("end parseUnion", token)
        # print(fields)
        return token

    #
    # Parse a C enum block, parse till the balancing }
    #
    def parseEnumBlock(self, token):
        self.enums = []
        name = None
        comment = ""
        value = "-1"
        commentsBeforeVal = self.comment is not None
        while token is not None:
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                if name is not None:
                    self.cleanupComment()
                    if self.comment is not None:
                        comment = self.comment
                        self.comment = None
                    self.enums.append((name, value, comment))
                token = self.token()
                return token
            elif token[0] == "name":
                self.cleanupComment()
                if name is not None:
                    if self.comment is not None:
                        comment = self.comment.strip()
                        self.comment = None
                    self.enums.append((name, value, comment))
                name = token[1]
                comment = ""
                token = self.token()
                if token[0] == "op" and token[1][0] == "=":
                    value = ""
                    if len(token[1]) > 1:
                        value = token[1][1:]
                    token = self.token()
                    while token[0] != "sep" or (token[1] != ',' and
                                                token[1] != '}'):
                        # We might be dealing with '1U << 12' here
                        value = value + re.sub(r"^(\d+)U$", "\\1", token[1])
                        token = self.token()
                else:
                    try:
                        value = "%d" % (int(value) + 1)
                    except Exception:
                        self.warning("Failed to compute value of enum %s" % name)
                        value = ""
                if token[0] == "sep" and token[1] == ",":
                    if commentsBeforeVal:
                        self.cleanupComment()
                        self.enums.append((name, value, self.comment))
                        name = comment = self.comment = None
                    token = self.token()
            else:
                token = self.token()
        return token

    def parseVirEnumDecl(self, token):
        if token[0] != "name":
            self.error("parsing VIR_ENUM_DECL: expecting name", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_ENUM_DECL: expecting ')'", token)

        if token[1] != ')':
            self.error("parsing VIR_ENUM_DECL: expecting ')'", token)

        token = self.token()
        if token[0] == "sep" and token[1] == ';':
            token = self.token()

        return token

    def parseVirEnumImpl(self, token):
        # First the type name
        if token[0] != "name":
            self.error("parsing VIR_ENUM_IMPL: expecting name", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)

        if token[1] != ',':
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)
        token = self.token()

        # Now the sentinel name
        if token[0] != "name":
            self.error("parsing VIR_ENUM_IMPL: expecting name", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)

        if token[1] != ',':
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)

        token = self.token()

        # Now a list of strings (optional comments)
        while token is not None:
            isGettext = False
            # First a string, optionally with N_(...)
            if token[0] == 'name':
                if token[1] != 'N_':
                    self.error("parsing VIR_ENUM_IMPL: expecting 'N_'", token)
                token = self.token()
                if token[0] != "sep" or token[1] != '(':
                    self.error("parsing VIR_ENUM_IMPL: expecting '('", token)
                token = self.token()
                isGettext = True

                if token[0] != "string":
                    self.error("parsing VIR_ENUM_IMPL: expecting a string", token)
                token = self.token()
            elif token[0] == "string":
                token = self.token()
            else:
                self.error("parsing VIR_ENUM_IMPL: expecting a string", token)

            # Then a separator
            if token[0] == "sep":
                if isGettext and token[1] == ')':
                    token = self.token()

                if token[1] == ',':
                    token = self.token()

                if token[1] == ')':
                    token = self.token()
                    break

            # Then an optional comment
            if token[0] == "comment":
                token = self.token()

        if token[0] == "sep" and token[1] == ';':
            token = self.token()

        return token

    def parseVirLogInit(self, token):
        if token[0] != "string":
            self.error("parsing VIR_LOG_INIT: expecting string", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_LOG_INIT: expecting ')'", token)

        if token[1] != ')':
            self.error("parsing VIR_LOG_INIT: expecting ')'", token)

        token = self.token()
        if token[0] == "sep" and token[1] == ';':
            token = self.token()

        return token

    #
    # Parse a C definition block, used for structs or unions it parse till
    # the balancing }
    #
    def parseTypeBlock(self, token):
        while token is not None:
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                token = self.token()
                return token
            else:
                token = self.token()
        return token

    #
    # Parse a type: the fact that the type name can either occur after
    #    the definition or within the definition makes it a little harder
    #    if inside, the name token is pushed back before returning
    #
    def parseType(self, token):
        self.type = ""
        self.struct_fields = []
        self.union_fields = []
        self.signature = None
        if token is None:
            return token

        while (token[0] == "name" and
               token[1] in ["const", "unsigned", "signed"]):
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            token = self.token()

        if token[0] == "name" and token[1] == "long":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]

            # some read ahead for long long
            oldtmp = token
            token = self.token()
            if token[0] == "name" and token[1] == "long":
                self.type = self.type + " " + token[1]
            else:
                self.push(token)
                token = oldtmp

            oldtmp = token
            token = self.token()
            if token[0] == "name" and token[1] == "int":
                self.type = self.type + " " + token[1]
            else:
                self.push(token)
                token = oldtmp

        elif token[0] == "name" and token[1] == "short":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]

        elif token[0] == "name" and token[1] == "struct":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            token = self.token()
            nametok = None
            if token[0] == "name":
                nametok = token
                token = self.token()
            if token is not None and token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseStruct(token)
            elif token is not None and token[0] == "op" and token[1] == "*":
                self.type = self.type + " " + nametok[1] + " *"
                token = self.token()
                while token is not None and token[0] == "op" and token[1] == "*":
                    self.type = self.type + " *"
                    token = self.token()
                if token[0] == "name":
                    nametok = token
                    token = self.token()
                else:
                    self.error("struct : expecting name", token)
                    return token
            elif token is not None and token[0] == "name" and nametok is not None:
                self.type = self.type + " " + nametok[1]
                return token

            if nametok is not None:
                self.lexer.push(token)
                token = nametok
            return token

        elif token[0] == "name" and token[1] == "union":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            token = self.token()
            nametok = None
            if token[0] == "name":
                nametok = token
                token = self.token()
            if token is not None and token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseUnion(token)
            elif token is not None and token[0] == "name" and nametok is not None:
                self.type = self.type + " " + nametok[1]
                return token

            if nametok is not None:
                self.lexer.push(token)
                token = nametok
            return token

        elif token[0] == "name" and token[1] == "enum":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            self.enums = []
            token = self.token()
            if token is not None and token[0] == "sep" and token[1] == "{":
                # drop comments before the enum block
                self.comment = None
                token = self.token()
                token = self.parseEnumBlock(token)
            else:
                self.error("parsing enum: expecting '{'", token)
            enum_type = None
            if token is not None and token[0] != "name":
                self.lexer.push(token)
                token = ("name", "enum")
            else:
                enum_type = token[1]
            for enum in self.enums:
                self.index_add(enum[0], self.filename,
                               not self.is_header, "enum",
                               (enum[1], enum[2], enum_type))
            return token
        elif token[0] == "name" and token[1] == "VIR_ENUM_DECL":
            token = self.token()
            if token is not None and token[0] == "sep" and token[1] == "(":
                token = self.token()
                token = self.parseVirEnumDecl(token)
            else:
                self.error("parsing VIR_ENUM_DECL: expecting '('", token)
            if token is not None:
                self.lexer.push(token)
                token = ("name", "virenumdecl")
            return token

        elif token[0] == "name" and token[1] == "VIR_ENUM_IMPL":
            token = self.token()
            if token is not None and token[0] == "sep" and token[1] == "(":
                token = self.token()
                token = self.parseVirEnumImpl(token)
            else:
                self.error("parsing VIR_ENUM_IMPL: expecting '('", token)
            if token is not None:
                self.lexer.push(token)
                token = ("name", "virenumimpl")
            return token

        elif token[0] == "name" and token[1] == "VIR_LOG_INIT":
            token = self.token()
            if token is not None and token[0] == "sep" and token[1] == "(":
                token = self.token()
                token = self.parseVirLogInit(token)
            else:
                self.error("parsing VIR_LOG_INIT: expecting '('", token)
            if token is not None:
                self.lexer.push(token)
                token = ("name", "virloginit")
            return token

        elif token[0] == "name" and token[1] == "G_STATIC_ASSERT":
            # skip whole line
            while token is not None and not (token[0] == "sep" and
                                             token[1] == ";"):
                token = self.token()
            return self.token()

        elif token[0] == "name":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
        else:
            self.error("parsing type %s: expecting a name" % (self.type),
                       token)
            return token
        token = self.token()
        while token is not None and (token[0] == "op" or
                                     token[0] == "name" and
                                     token[1] == "const"):
            self.type = self.type + " " + token[1]
            token = self.token()

        #
        # if there is a parenthesis here, this means a function type
        #
        if token is not None and token[0] == "sep" and token[1] == '(':
            self.type = self.type + token[1]
            token = self.token()
            while token is not None and token[0] == "op" and token[1] == '*':
                self.type = self.type + token[1]
                token = self.token()
            if token is None or token[0] != "name":
                self.error("parsing function type, name expected", token)
                return token
            self.type = self.type + token[1]
            nametok = token
            token = self.token()
            if token is not None and token[0] == "sep" and token[1] == ')':
                self.type = self.type + token[1]
                token = self.token()
                if token is not None and token[0] == "sep" and token[1] == '(':
                    token = self.token()
                    type = self.type
                    token = self.parseSignature(token)
                    self.type = type
                else:
                    self.error("parsing function type, '(' expected", token)
                    return token
            else:
                self.error("parsing function type, ')' expected", token)
                return token
            self.lexer.push(token)
            token = nametok
            return token

        #
        # do some lookahead for arrays
        #
        if token is not None and token[0] == "name":
            nametok = token
            token = self.token()
            if token is not None and token[0] == "sep" and token[1] == '[':
                self.type = self.type + " " + nametok[1]
                while token is not None and token[0] == "sep" and token[1] == '[':
                    self.type = self.type + token[1]
                    token = self.token()
                    while (token is not None and token[0] != 'sep' and
                           token[1] != ']' and token[1] != ';'):
                        self.type = self.type + token[1]
                        token = self.token()
                if token is not None and token[0] == 'sep' and token[1] == ']':
                    self.type = self.type + token[1]
                    token = self.token()
                else:
                    self.error("parsing array type, ']' expected", token)
                    return token
            elif token is not None and token[0] == "sep" and token[1] == ':':
                # remove :12 in case it's a limited int size
                token = self.token()
                token = self.token()
            self.lexer.push(token)
            token = nametok

        return token

    #
    # Parse a signature: '(' has been parsed and we scan the type definition
    #    up to the ')' included
    def parseSignature(self, token):
        signature = []
        if token is not None and token[0] == "sep" and token[1] == ')':
            self.signature = []
            token = self.token()
            return token
        while token is not None:
            token = self.parseType(token)
            if token is not None and token[0] == "name":
                signature.append((self.type, token[1], None))
                token = self.token()
            elif token is not None and token[0] == "sep" and token[1] == ',':
                token = self.token()
                continue
            elif token is not None and token[0] == "sep" and token[1] == ')':
                # only the type was provided
                if self.type == "...":
                    signature.append((self.type, "...", None))
                else:
                    signature.append((self.type, None, None))
            if token is not None and token[0] == "sep":
                if token[1] == ',':
                    token = self.token()
                    continue
                elif token[1] == ')':
                    token = self.token()
                    break
        self.signature = signature
        return token

    # this dict contains the functions that are allowed to use [unsigned]
    # long for legacy reasons in their signature and return type. this list is
    # fixed. new procedures and public APIs have to use [unsigned] long long
    long_legacy_functions = {
        "virGetVersion": (False, ("libVer", "typeVer")),
        "virConnectGetLibVersion": (False, ("libVer")),
        "virConnectGetVersion": (False, ("hvVer")),
        "virDomainGetMaxMemory": (True, ()),
        "virDomainMigrate": (False, ("flags", "bandwidth")),
        "virDomainMigrate2": (False, ("flags", "bandwidth")),
        "virDomainMigrateBegin3": (False, ("flags", "bandwidth")),
        "virDomainMigrateConfirm3": (False, ("flags", "bandwidth")),
        "virDomainMigrateDirect": (False, ("flags", "bandwidth")),
        "virDomainMigrateFinish": (False, ("flags")),
        "virDomainMigrateFinish2": (False, ("flags")),
        "virDomainMigrateFinish3": (False, ("flags")),
        "virDomainMigratePeer2Peer": (False, ("flags", "bandwidth")),
        "virDomainMigratePerform": (False, ("flags", "bandwidth")),
        "virDomainMigratePerform3": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepare": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepare2": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepare3": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepareTunnel": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepareTunnel3": (False, ("flags", "bandwidth")),
        "virDomainMigrateToURI": (False, ("flags", "bandwidth")),
        "virDomainMigrateToURI2": (False, ("flags", "bandwidth")),
        "virDomainMigrateVersion1": (False, ("flags", "bandwidth")),
        "virDomainMigrateVersion2": (False, ("flags", "bandwidth")),
        "virDomainMigrateVersion3": (False, ("flags", "bandwidth")),
        "virDomainMigrateSetMaxSpeed": (False, ("bandwidth")),
        "virDomainSetMaxMemory": (False, ("memory")),
        "virDomainSetMemory": (False, ("memory")),
        "virDomainSetMemoryFlags": (False, ("memory")),
        "virDomainBlockCommit": (False, ("bandwidth")),
        "virDomainBlockJobSetSpeed": (False, ("bandwidth")),
        "virDomainBlockPull": (False, ("bandwidth")),
        "virDomainBlockRebase": (False, ("bandwidth")),
        "virDomainMigrateGetMaxSpeed": (False, ("bandwidth"))
    }

    def checkLongLegacyFunction(self, name, return_type, signature):
        if "long" in return_type and "long long" not in return_type:
            try:
                if not CParser.long_legacy_functions[name][0]:
                    raise Exception()
            except Exception:
                self.error(("function '%s' is not allowed to return long, "
                            "use long long instead") % name)

        for param in signature:
            if "long" in param[0] and "long long" not in param[0]:
                try:
                    if param[1] not in CParser.long_legacy_functions[name][1]:
                        raise Exception()
                except Exception:
                    self.error(("function '%s' is not allowed to take long "
                                "parameter '%s', use long long instead")
                               % (name, param[1]))

    # this dict contains the structs that are allowed to use [unsigned]
    # long for legacy reasons. this list is fixed. new structs have to use
    # [unsigned] long long
    long_legacy_struct_fields = {
        "_virDomainInfo": ("maxMem", "memory"),
        "_virNodeInfo": ("memory"),
        "_virDomainBlockJobInfo": ("bandwidth")
    }

    def checkLongLegacyStruct(self, name, fields):
        for field in fields:
            if "long" in field[0] and "long long" not in field[0]:
                try:
                    if field[1] not in CParser.long_legacy_struct_fields[name]:
                        raise Exception()
                except Exception:
                    self.error(("struct '%s' is not allowed to contain long "
                                "field '%s', use long long instead")
                               % (name, field[1]))

    #
    # Parse a global definition, be it a type, variable or function
    # the extern "C" blocks are a bit nasty and require it to recurse.
    #
    def parseGlobal(self, token):
        static = 0
        if token[1] == 'extern':
            token = self.token()
            if token is None:
                return token
            if token[0] == 'string':
                if token[1] == 'C':
                    token = self.token()
                    if token is None:
                        return token
                    if token[0] == 'sep' and token[1] == "{":
                        token = self.token()
#                        print('Entering extern "C line ', self.lineno())
                        while token is not None and (token[0] != 'sep' or
                                                     token[1] != "}"):
                            if token[0] == 'name':
                                token = self.parseGlobal(token)
                            else:
                                self.error(("token %s %s unexpected at the "
                                            "top level") %
                                           (token[0], token[1]))
                                token = self.parseGlobal(token)
#                        print('Exiting extern "C" line', self.lineno())
                        token = self.token()
                        return token
                else:
                    return token
        elif token[1] == 'static':
            static = 1
            token = self.token()
            if token is None or token[0] != 'name':
                return token

        variable_comment = None
        if token[1] == 'typedef':
            token = self.token()
            return self.parseTypedef(token)
        else:
            # Store block of comment that might be from variable as
            # the code uses self.comment a lot and it would lose it.
            variable_comment = self.comment
            token = self.parseType(token)
            type_orig = self.type
        if token is None or token[0] != "name":
            return token
        type = type_orig
        self.name = token[1]
        token = self.token()
        while token is not None and (token[0] == "sep" or token[0] == "op"):
            if token[0] == "sep":
                if token[1] == "[":
                    type = type + token[1]
                    token = self.token()
                    while token is not None and (token[0] != "sep" or
                                                 token[1] != ";"):
                        type = type + token[1]
                        token = self.token()

            if token is not None and token[0] == "op" and token[1] == "=":
                #
                # Skip the initialization of the variable
                #
                token = self.token()
                if token[0] == 'sep' and token[1] == '{':
                    token = self.token()
                    token = self.parseBlock(token)
                else:
                    self.comment = None
                    while token is not None and (token[0] != "sep" or
                                                 token[1] not in ',;'):
                        token = self.token()
                self.comment = None
                if token is None or token[0] != "sep" or (token[1] != ';' and
                   token[1] != ','):
                    self.error("missing ';' or ',' after value")

            if token is not None and token[0] == "sep":
                if token[1] == ";":
                    self.comment = None
                    token = self.token()
                    if type == "struct":
                        self.checkLongLegacyStruct(self.name, self.struct_fields)
                        self.index_add(self.name, self.filename,
                                       not self.is_header, "struct",
                                       self.struct_fields)
                    else:
                        # Just to use the cleanupComment function.
                        variable_comment = self.cleanup_code_comment(variable_comment, self.name)
                        info = (type, variable_comment)
                        self.index_add(self.name, self.filename,
                                       not self.is_header, "variable", info)
                    break
                elif token[1] == "(":
                    token = self.token()
                    token = self.parseSignature(token)
                    if token is None:
                        return None
                    if token[0] == "sep" and token[1] == ";":
                        self.checkLongLegacyFunction(self.name, type, self.signature)
                        d = self.mergeFunctionComment(self.name,
                                                      ((type, None),
                                                       self.signature), 1)
                        self.index_add(self.name, self.filename, static,
                                       "function", d)
                        token = self.token()
                    elif token[0] == "sep" and token[1] == "{":
                        self.checkLongLegacyFunction(self.name, type, self.signature)
                        d = self.mergeFunctionComment(self.name,
                                                      ((type, None),
                                                       self.signature), static)
                        self.index_add(self.name, self.filename, static,
                                       "function", d)
                        token = self.token()
                        token = self.parseBlock(token)
                elif token[1] == ',':
                    self.comment = None
                    self.index_add(self.name, self.filename, static,
                                   "variable", type)
                    type = type_orig
                    token = self.token()
                    while token is not None and token[0] == "sep":
                        type = type + token[1]
                        token = self.token()
                    if token is not None and token[0] == "name":
                        self.name = token[1]
                        token = self.token()
                else:
                    break

        return token

    def parse(self):
        if not quiet:
            print("Parsing %s" % (self.filename))
        token = self.token()
        while token is not None:
            if token[0] == 'name':
                token = self.parseGlobal(token)
            else:
                self.error("token %s %s unexpected at the top level" % (
                    token[0], token[1]))
                token = self.parseGlobal(token)
                return
        self.parseTopComment(self.top_comment)
        return self.index


class docBuilder:
    """A documentation builder"""
    def __init__(self, name, syms, path='.', directories=['.'], includes=[], acls=None):
        self.name = name
        self.syms = syms
        self.path = path
        self.acls = acls
        self.directories = directories
        if name == "libvirt":
            self.includes = includes + list(included_files.keys())
        elif name == "libvirt-qemu":
            self.includes = includes + list(qemu_included_files.keys())
        elif name == "libvirt-lxc":
            self.includes = includes + list(lxc_included_files.keys())
        elif name == "libvirt-admin":
            self.includes = includes + list(admin_included_files.keys())
        self.modules = {}
        self.headers = {}
        self.versions = {}
        self.idx = index()
        self.xref = {}
        self.index = {}
        self.basename = name
        self.errors = 0

    def warning(self, msg):
        global warnings
        warnings = warnings + 1
        print(msg)

    def error(self, msg):
        self.errors += 1
        print("Error:", msg, file=sys.stderr)

    def indexString(self, id, str):
        if str is None:
            return
        str = str.replace("'", ' ')
        str = str.replace('"', ' ')
        str = str.replace("/", ' ')
        str = str.replace('*', ' ')
        str = str.replace("[", ' ')
        str = str.replace("]", ' ')
        str = str.replace("(", ' ')
        str = str.replace(")", ' ')
        str = str.replace("<", ' ')
        str = str.replace('>', ' ')
        str = str.replace("&", ' ')
        str = str.replace('#', ' ')
        str = str.replace(",", ' ')
        str = str.replace('.', ' ')
        str = str.replace(';', ' ')
        tokens = str.split()
        for token in tokens:
            c = token[0]
            if not re.match(r"[a-zA-Z]", c):
                pass
            elif len(token) < 3:
                pass
            else:
                lower = token.lower()
                # TODO: generalize this a bit
                if lower == 'and' or lower == 'the':
                    pass
                elif token in self.xref:
                    self.xref[token].append(id)
                else:
                    self.xref[token] = [id]

    def analyze(self):
        if not quiet:
            print("Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())))
        self.idx.analyze()

    def scanHeaders(self):
        for header in self.headers.keys():
            parser = CParser(header)
            idx = parser.parse()
            self.headers[header] = idx
            self.idx.merge(idx)

    def scanModules(self):
        for module in self.modules.keys():
            parser = CParser(module)
            idx = parser.parse()
            # idx.analyze()
            self.modules[module] = idx
            self.idx.merge_public(idx)

    def scanVersions(self):
        prefix = self.name.upper().replace("-", "_") + "_"

        version = None
        prevversion = None
        with open(self.syms, "r") as syms:
            while True:
                line = syms.readline()
                if not line:
                    break
                line = line.strip()
                if line.startswith("#"):
                    continue
                if line == "":
                    continue

                if line.startswith(prefix) and line.endswith(" {"):
                    version = line[len(prefix):-2]
                elif line == "global:":
                    continue
                elif line == "local:":
                    continue
                elif line.startswith("}"):
                    if prevversion is None:
                        if line != "};":
                            raise Exception("Unexpected closing version")
                    else:
                        if line != ("} %s%s;" % (prefix, prevversion)):
                            raise Exception("Unexpected end of version '%s': %s'" % (line, "} " + prefix + version))

                    prevversion = version
                    version = None
                elif line.endswith(";") and version is not None:
                    func = line[:-1]
                    self.versions[func] = version
                else:
                    raise Exception("Unexpected line in syms file: %s" % line)

    def scan(self):
        for directory in self.directories:
            files = glob.glob(directory + "/*.c")
            for file in files:
                skip = 1
                for incl in self.includes:
                    if file.find(incl) != -1:
                        skip = 0
                        break
                if skip == 0:
                    self.modules[file] = None
            files = glob.glob(directory + "/*.h")
            for file in files:
                skip = 1
                for incl in self.includes:
                    if file.find(incl) != -1:
                        skip = 0
                        break
                if skip == 0:
                    self.headers[file] = None
        self.scanHeaders()
        self.scanModules()
        self.scanVersions()

    # Fetch tags from the comment. Only 'Since' supported at the moment.
    # For functions, since tags are on Return comments.
    # Return the tags and the original comments, but without the tags.
    def retrieve_comment_tags(self, name: str, comment: str,
                              return_comment="") -> (str, str, str):
        since = ""
        if comment is not None:
            comment_match = re.search(r"\(?Since: (\d+\.\d+\.\d+\.?\d?)\)?",
                                      comment)
            if comment_match:
                # Remove Since tag from the comment
                (start, end) = comment_match.span()
                comment = comment[:start] + comment[end:]
                comment = comment.strip()
                # Only the version
                since = comment_match.group(1)

        if since == "" and return_comment is not None:
            return_match = re.search(r"\(?Since: (\d+\.\d+\.\d+\.?\d?)\)?",
                                     return_comment)
            if return_match:
                # Remove Since tag from the comment
                (start, end) = return_match.span()
                return_comment = return_comment[:start] + return_comment[end:]
                return_comment = return_comment.strip()
                # Only the version
                since = return_match.group(1)

        if since == "":
            self.warning("Missing 'Since' tag for: " + name)
        return (since, comment, return_comment)

    def modulename_file(self, file):
        module = os.path.basename(file)
        if module[-2:] == '.h':
            module = module[:-2]
        elif module[-2:] == '.c':
            module = module[:-2]
        return module

    def serialize_enum(self, output, name):
        id = self.idx.enums[name]
        output.write("    <enum name='%s' file='%s'" % (name,
                     self.modulename_file(id.header)))
        if id.info is not None:
            info = id.info
            valhex = ""
            if info[0] is not None and info[0] != '':
                try:
                    val = eval(info[0])
                    valhex = hex(val)
                except Exception:
                    val = info[0]
                output.write(" value='%s'" % (val))

                if valhex != "":
                    output.write(" value_hex='%s'" % (valhex))

                m = re.match(r"\(?1<<(\d+)\)?", info[0])
                if m:
                    output.write(" value_bitshift='%s'" % (m.group(1)))

            if info[2] is not None and info[2] != '':
                output.write(" type='%s'" % info[2])
            if info[1] is not None and info[1] != '':
                # Search for 'Since' version tag
                (since, comment, _) = self.retrieve_comment_tags(name, info[1])
                if len(since) > 0:
                    output.write(" version='%s'" % escape(since))
                if len(comment) > 0:
                    output.write(" info='%s'" % escape(comment))
            else:
                self.warning("Missing docstring for enum: " + name)

        output.write("/>\n")

    def serialize_macro(self, output, name):
        id = self.idx.macros[name]
        output.write("    <macro name='%s' file='%s'" % (name,
                     self.modulename_file(id.header)))
        if id.info is None:
            args = []
            desc = None
            params = None
            strValue = None
            rawValue = None
        else:
            (args, desc, params, strValue, rawValue) = id.info

        if params is not None:
            output.write(" params='%s'" % params)
        if strValue is not None:
            output.write(" string='%s'" % strValue)
        else:
            output.write(" raw='%s'" % escape(rawValue))

        (since, comment, _) = self.retrieve_comment_tags(name, desc)
        if len(since) > 0:
            output.write(" version='%s'" % escape(since))
        output.write(">\n")

        if comment is not None and comment != "":
            output.write("      <info><![CDATA[%s]]></info>\n" % (comment))
            self.indexString(name, comment)
        for arg in args:
            (name, desc) = arg
            if desc is not None and desc != "":
                output.write("      <arg name='%s' info='%s'/>\n" % (
                             name, escape(desc)))
                self.indexString(name, desc)
            else:
                output.write("      <arg name='%s'/>\n" % name)
        output.write("    </macro>\n")

    def serialize_union(self, output, field, desc):
        output.write("      <field name='%s' type='union' info='%s'>\n" % (field[1], desc))
        output.write("        <union>\n")
        for f in field[3]:
            desc = f[2]
            if desc is None:
                desc = ''
            else:
                desc = escape(desc)
            output.write("          <field name='%s' type='%s' info='%s'/>\n" % (f[1], f[0], desc))

        output.write("        </union>\n")
        output.write("      </field>\n")

    def serialize_typedef(self, output, name):
        id = self.idx.typedefs[name]
        (since, comment, _) = self.retrieve_comment_tags(name, id.extra)
        version_tag = len(since) > 0 and f" version='{since}'" or ""
        if id.info[0:7] == 'struct ':
            output.write("    <struct name='%s' file='%s' type='%s'%s" % (
                name, self.modulename_file(id.header), id.info, version_tag))
            name = id.info[7:]
            if (name in self.idx.structs and
                    isinstance(self.idx.structs[name].info, (list, tuple))):
                output.write(">\n")
                try:
                    for field in self.idx.structs[name].info:
                        desc = field[2]
                        self.indexString(name, desc)
                        if desc is None:
                            desc = ''
                        else:
                            desc = escape(desc)
                        if field[0] == "union":
                            self.serialize_union(output, field, desc)
                        else:
                            output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1], field[0], desc))
                except Exception:
                    self.warning("Failed to serialize struct %s" % name)
                output.write("    </struct>\n")
            else:
                output.write("/>\n")
        else:
            output.write("    <typedef name='%s' file='%s' type='%s'%s" % (
                         name, self.modulename_file(id.header), id.info, version_tag))
            try:
                if comment is not None and comment != "":
                    output.write(">\n      <info><![CDATA[%s]]></info>\n" % (comment))
                    output.write("    </typedef>\n")
                else:
                    output.write("/>\n")
            except Exception:
                output.write("/>\n")

    def serialize_variable(self, output, name):
        id = self.idx.variables[name]
        (type, comment) = id.info
        (since, comment, _) = self.retrieve_comment_tags(name, comment)
        version_tag = len(since) > 0 and f" version='{since}'" or ""
        output.write("    <variable name='%s' file='%s' type='%s'%s" % (
            name, self.modulename_file(id.header), type, version_tag))
        if len(comment) == 0:
            output.write("/>\n")
        else:
            output.write(">\n      <info><![CDATA[%s]]></info>\n" % (comment))
            output.write("    </variable>\n")

    def serialize_function(self, output, name):
        id = self.idx.functions[name]
        if name == debugsym and not quiet:
            print("=>", id)

        (ret, params, desc) = id.info
        return_comment = (ret is not None and ret[1] is not None) and ret[1] or ""
        (since, comment, return_comment) = self.retrieve_comment_tags(name, desc, return_comment)
        # Simple way to avoid setting empty version
        version_tag = len(since) > 0 and f" version='{since}'" or ""

        # NB: this is consumed by a regex in 'getAPIFilenames' in hvsupport.pl
        if id.type == "function":
            if name not in self.versions:
                raise Exception("Missing symbol file entry for '%s'" % name)
            ver = self.versions[name]
            if ver is None:
                raise Exception("Missing version for '%s'" % name)
            output.write("    <function name='%s' file='%s' module='%s' version='%s'>\n" % (
                name, self.modulename_file(id.header),
                self.modulename_file(id.module), self.versions[name]))
        else:
            output.write("    <functype name='%s' file='%s' module='%s'%s>\n" % (
                name, self.modulename_file(id.header),
                self.modulename_file(id.module),
                version_tag))
        #
        # Processing of conditionals modified by Bill 1/1/05
        #
        if id.conditionals is not None:
            apstr = ""
            for cond in id.conditionals:
                if apstr != "":
                    apstr = apstr + " &amp;&amp; "
                apstr = apstr + cond
            output.write("      <cond>%s</cond>\n" % (apstr))

        try:
            # For functions, we get the since version from .syms files.
            # This is an extra check to see that docstrings are correct
            # and to avoid wrong versions in the .sym files too.
            ver = name in self.versions and self.versions[name] or None
            if len(since) > 0 and ver is not None and since != ver:
                if name in ignored_function_versions:
                    allowedver = ignored_function_versions[name]
                    if allowedver != since:
                        self.warning(f"Function {name} has allowed version {allowedver} but docstring says {since}")
                else:
                    self.warning(f"Function {name} has symversion {ver} but docstring says {since}")

            output.write("      <info><![CDATA[%s]]></info>\n" % (comment))
            self.indexString(name, desc)

            if ret[0] is not None:
                if ret[0] == "void":
                    output.write("      <return type='void'/>\n")
                elif (return_comment == '') and name not in ignored_functions:
                    self.error("Missing documentation for return of function `%s'" % name)
                else:
                    output.write("      <return type='%s' info='%s'/>\n" % (
                        ret[0], escape(return_comment)))
                    self.indexString(name, ret[1])

            for param in params:
                if param[0] == 'void':
                    continue
                if (param[2] is None or param[2] == ''):
                    if name in ignored_functions:
                        output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
                    else:
                        self.error("Missing documentation for arg `%s' of function `%s'" % (param[1], name))
                else:
                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
                    self.indexString(name, param[2])
        except Exception:
            print("Exception:", sys.exc_info()[1], file=sys.stderr)
            self.warning("Failed to save function %s info: %s" % (name, repr(id.info)))

        if self.acls and name in self.acls:
            acls = self.acls[name][0]
            aclfilters = self.acls[name][1]

            if len(acls) > 0 or len(aclfilters) > 0:
                output.write("      <acls>\n")
                for acl in acls:
                    comp = acl.split(':', 3)
                    objname = comp[0].replace('_', '-')
                    perm = comp[1].replace('_', '-')
                    output.write("        <check object='%s' perm='%s'" % (objname, perm))
                    if len(comp) > 2:
                        output.write(" flags='%s'" % comp[2])

                    output.write("/>\n")

                for aclfilter in aclfilters:
                    comp = aclfilter.split(':', 2)
                    objname = comp[0].replace('_', '-')
                    perm = comp[1].replace('_', '-')

                    output.write("        <filter object='%s' perm='%s'/>\n" % (objname, perm))

                output.write("      </acls>\n")

        output.write("    </%s>\n" % (id.type))

    def serialize_exports(self, output, file):
        module = self.modulename_file(file)
        output.write("    <file name='%s'>\n" % (module))
        dict = self.headers[file]
        if dict.info is not None:
            for data in ('Summary', 'Description'):
                try:
                    output.write("     <%s>%s</%s>\n" % (
                                 data.lower(),
                                 escape(dict.info[data]),
                                 data.lower()))
                except KeyError:
                    self.warning("Header %s lacks a %s description" % (module, data))
            if 'Description' in dict.info:
                desc = dict.info['Description']
                if desc.find("DEPRECATED") != -1:
                    output.write("     <deprecated/>\n")

        for id in uniq(dict.macros.keys()):
            # Macros are sometime used to masquerade other types.
            if id in dict.functions:
                continue
            if id in dict.variables:
                continue
            if id in dict.typedefs:
                continue
            if id in dict.structs:
                continue
            if id in dict.unions:
                continue
            if id in dict.enums:
                continue
            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
        for id in uniq(dict.enums.keys()):
            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
        for id in uniq(dict.typedefs.keys()):
            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
        for id in uniq(dict.structs.keys()):
            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
        for id in uniq(dict.variables.keys()):
            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
        for id in uniq(dict.functions.keys()):
            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
        output.write("    </file>\n")

    def serialize(self):
        filename = "%s/%s-api.xml" % (self.path, self.name)
        if not quiet:
            print("Saving XML description %s" % (filename))
        output = open(filename, "w")
        output.write('<?xml version="1.0" encoding="UTF-8"?>\n')
        output.write("<api name='%s'>\n" % self.name)
        output.write("  <files>\n")
        headers = sorted(self.headers.keys())
        for file in headers:
            self.serialize_exports(output, file)
        output.write("  </files>\n")
        output.write("  <symbols>\n")
        macros = sorted(self.idx.macros.keys())
        for macro in macros:
            self.serialize_macro(output, macro)
        enums = sorted(self.idx.enums.keys())
        for enum in enums:
            self.serialize_enum(output, enum)
        typedefs = sorted(self.idx.typedefs.keys())
        for typedef in typedefs:
            self.serialize_typedef(output, typedef)
        variables = sorted(self.idx.variables.keys())
        for variable in variables:
            self.serialize_variable(output, variable)
        functions = sorted(self.idx.functions.keys())
        for function in functions:
            self.serialize_function(output, function)
        output.write("  </symbols>\n")
        output.write("</api>\n")
        output.close()

        if self.errors > 0:
            print("apibuild.py: %d error(s) encountered during generation" % self.errors, file=sys.stderr)
            sys.exit(3)


def remoteProcToAPI(remotename: str) -> (str):
    components = remotename.split('_')
    fixednames = []

    if components[1] != "PROC":
        raise Exception("Malformed remote function name '%s'" % remotename)

    if components[0] == 'REMOTE':
        driver = ''
    elif components[0] == 'QEMU':
        driver = 'Qemu'
    elif components[0] == 'LXC':
        driver = 'Lxc'
    else:
        raise Exception("Unknown remote protocol '%s'" % components[0])

    for comp in components[2:]:
        if comp == '':
            raise Exception("Invalid empty component in remote procedure name '%s'" % remotename)

        fixedname = comp[0].upper() + comp[1:].lower()

        fixedname = re.sub('Nwfilter', 'NWFilter', fixedname)
        fixedname = re.sub('Xml$', 'XML', fixedname)
        fixedname = re.sub('Xml2$', 'XML2', fixedname)
        fixedname = re.sub('Uri$', 'URI', fixedname)
        fixedname = re.sub('Uuid$', 'UUID', fixedname)
        fixedname = re.sub('Id$', 'ID', fixedname)
        fixedname = re.sub('Mac$', 'MAC', fixedname)
        fixedname = re.sub('Cpu$', 'CPU', fixedname)
        fixedname = re.sub('Os$', 'OS', fixedname)
        fixedname = re.sub('Nmi$', 'NMI', fixedname)
        fixedname = re.sub('Pm', 'PM', fixedname)
        fixedname = re.sub('Fstrim$', 'FSTrim', fixedname)
        fixedname = re.sub('Fsfreeze$', 'FSFreeze', fixedname)
        fixedname = re.sub('Fsthaw$', 'FSThaw', fixedname)
        fixedname = re.sub('Fsinfo$', 'FSInfo', fixedname)
        fixedname = re.sub('Iothread$', 'IOThread', fixedname)
        fixedname = re.sub('Scsi', 'SCSI', fixedname)
        fixedname = re.sub('Wwn$', 'WWN', fixedname)
        fixedname = re.sub('Dhcp$', 'DHCP', fixedname)

        fixednames.append(fixedname)

    apiname = "vir" + fixednames[0]

    # In case of remote procedures for qemu/lxc private APIs we need to add
    # the name of the driver in the middle of the string after the object name.
    # For a special case of event callbacks the 'object' name is actually two
    # words: virConenctDomainQemuEvent ...
    if fixednames[1] == 'Domain':
        apiname += 'Domain'
        fixednames.pop(1)

    apiname += driver

    for name in fixednames[1:]:
        apiname = apiname + name

    return apiname


def remoteProtocolGetAcls(protocolfilename: str) -> {}:
    apiacls = {}

    with open(protocolfilename) as proto:
        in_procedures = False
        acls = []
        aclfilters = []

        while True:
            line = proto.readline()
            if not line:
                break

            if not in_procedures:
                if re.match('^enum [a-z]+_procedure {$', line):
                    in_procedures = True

                continue

            if line == '};\n':
                break

            acl_match = re.search(r"\* @acl: ([^\s]+)", line)

            if acl_match:
                acls.append(acl_match.group(1))
                continue

            aclfilter_match = re.search(r"\* @aclfilter: ([^\s]+)", line)

            if aclfilter_match:
                aclfilters.append(aclfilter_match.group(1))
                continue

            remote_proc_match = re.search(r"^\s+([A-Z_0-9]+) ", line)

            if remote_proc_match:
                proc = remote_proc_match.group(1)
                apiname = remoteProcToAPI(proc)

                if len(acls) == 0:
                    raise Exception("No ACLs for procedure %s(%s)" % proc, apiname)

                if 'none' in acls:
                    if len(acls) > 1:
                        raise Exception("Procedure %s(%s) has 'none' ACL followed by other ACLs" % proc, apiname)

                    acls = []

                apiacls[apiname] = (acls, aclfilters)
                acls = []
                aclfilters = []
                continue

    return apiacls


class app:
    def warning(self, msg):
        global warnings
        warnings = warnings + 1
        print(msg)

    def rebuild(self, name, srcdir, builddir):
        apiacl = None

        syms = {
            "libvirt": srcdir + "/../src/libvirt_public.syms",
            "libvirt-qemu": srcdir + "/../src/libvirt_qemu.syms",
            "libvirt-lxc": srcdir + "/../src/libvirt_lxc.syms",
            "libvirt-admin": srcdir + "/../src/admin/libvirt_admin_public.syms",
        }
        protocols = {
            "libvirt": srcdir + "/../src/remote/remote_protocol.x",
            "libvirt-qemu": srcdir + "/../src/remote/qemu_protocol.x",
            "libvirt-lxc": srcdir + "/../src/remote/lxc_protocol.x",
            "libvirt-admin": None,
        }
        if name not in syms or name not in protocols:
            self.warning("rebuild() failed, unknown module %s" % name)
            return None

        if protocols[name]:
            apiacl = remoteProtocolGetAcls(protocols[name])

        builder = None
        if glob.glob(srcdir + "/../src/libvirt.c") != []:
            if not quiet:
                print("Rebuilding API description for %s" % name)
            dirs = [srcdir + "/../src",
                    srcdir + "/../src/admin",
                    srcdir + "/../src/util",
                    srcdir + "/../include/libvirt",
                    builddir + "/../include/libvirt"]
            builder = docBuilder(name, syms[name], builddir, dirs, [], apiacl)
        else:
            self.warning("rebuild() failed, unable to guess the module")
            return None
        builder.scan()
        builder.analyze()
        builder.serialize()
        return builder

    #
    # for debugging the parser
    #
    def parse(self, filename):
        parser = CParser(filename)
        idx = parser.parse()
        return idx


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="XML API builder")
    parser.add_argument("srcdir", type=str, help="path to docs source dir")
    parser.add_argument("builddir", type=str, help="path to docs build dir")
    parser.add_argument("-d", "--debug", type=str, help="path to source file")

    args = parser.parse_args()

    app = app()

    if args.debug:
        debug = 1
        app.parse(args.debug)
    else:
        app.rebuild("libvirt", args.srcdir, args.builddir)
        app.rebuild("libvirt-qemu", args.srcdir, args.builddir)
        app.rebuild("libvirt-lxc", args.srcdir, args.builddir)
        app.rebuild("libvirt-admin", args.srcdir, args.builddir)

    if warnings > 0:
        sys.exit(2)
    else:
        sys.exit(0)