From 8c8b97685bd42f36e2cf2baf1c43e6ec836e1446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Mon, 19 Dec 2022 12:48:06 -0500 Subject: [PATCH] rpcgen: add an XDR protocol lexer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a lexer capable of handling the XDR protocol files. The lexical rquirements are detailed in https://www.rfc-editor.org/rfc/rfc4506#section-6.2 pytest is introduced as a build dependancy for testing python code. Reviewed-by: Michal Privoznik Signed-off-by: Daniel P. Berrangé --- libvirt.spec.in | 1 + meson.build | 1 + scripts/meson.build | 2 + scripts/rpcgen/meson.build | 11 ++ scripts/rpcgen/rpcgen/lexer.py | 213 +++++++++++++++++++++++++++++ scripts/rpcgen/tests/meson.build | 3 + scripts/rpcgen/tests/simple.x | 35 +++++ scripts/rpcgen/tests/test_lexer.py | 116 ++++++++++++++++ 8 files changed, 382 insertions(+) create mode 100644 scripts/rpcgen/meson.build create mode 100644 scripts/rpcgen/rpcgen/lexer.py create mode 100644 scripts/rpcgen/tests/meson.build create mode 100644 scripts/rpcgen/tests/simple.x create mode 100644 scripts/rpcgen/tests/test_lexer.py diff --git a/libvirt.spec.in b/libvirt.spec.in index 262c59eb5b..8d0a0aad60 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -287,6 +287,7 @@ BuildRequires: ninja-build BuildRequires: git BuildRequires: perl-interpreter BuildRequires: python3 +BuildRequires: python3-pytest %if %{with_libxl} BuildRequires: xen-devel %endif diff --git a/meson.build b/meson.build index 5b095bb809..85ddad52c1 100644 --- a/meson.build +++ b/meson.build @@ -823,6 +823,7 @@ optional_programs = [ 'ovs-vsctl', 'passt', 'pdwtags', + 'pytest', 'rmmod', 'scrub', 'tc', diff --git a/scripts/meson.build b/scripts/meson.build index 65fd1e21c5..283b790d03 100644 --- a/scripts/meson.build +++ b/scripts/meson.build @@ -37,3 +37,5 @@ foreach name : scripts sname = name.split('.')[0].underscorify() set_variable('@0@_prog'.format(sname), find_program(name)) endforeach + +subdir('rpcgen') diff --git a/scripts/rpcgen/meson.build b/scripts/rpcgen/meson.build new file mode 100644 index 0000000000..52526bf812 --- /dev/null +++ b/scripts/rpcgen/meson.build @@ -0,0 +1,11 @@ +if pytest_prog.found() + subdir('tests') + + test( + 'rpcgen-pytest', + python3_prog, + args: [ '-mpytest' ] + rpcgen_tests, + env: runutf8, + workdir: meson.current_source_dir(), + ) +endif diff --git a/scripts/rpcgen/rpcgen/lexer.py b/scripts/rpcgen/rpcgen/lexer.py new file mode 100644 index 0000000000..989c2ae216 --- /dev/null +++ b/scripts/rpcgen/rpcgen/lexer.py @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +import abc + + +class XDRReader: + def __init__(self, fp): + self.fp = fp + self.lookahead = "" + self.lookbehind = "" + self.line = 1 + self.column = 0 + + def _read(self): + if len(self.lookahead) > 0: + c = self.lookahead[0:1] + self.lookahead = self.lookahead[1:] + return c + return self.fp.read(1) + + def peek(self, skip=0): + need = 1 + skip + if len(self.lookahead) < need: + self.lookahead = self.lookahead + self.fp.read(need - len(self.lookahead)) + if len(self.lookahead) < need: + return None + + return self.lookahead[skip : skip + 1] + + def last(self, skip=0): + if (skip + 1) > len(self.lookbehind): + return None + return self.lookbehind[skip] + + def next(self): + c = self._read() + line = self.line + column = self.column + if c == "\n": + self.line = self.line + 1 + self.column = 0 + else: + self.column = self.column + 1 + self.lookbehind = c + self.lookbehind + if len(self.lookbehind) > 2: + self.lookbehind = self.lookbehind[0:2] + return c, line, column + + +class XDRToken(abc.ABC): + def __init__(self, line, column, value): + self.line = line + self.column = column + self.value = value + + def __eq__(self, other): + return ( + type(self) == type(other) + and self.line == other.line + and self.column == other.column + and self.value == other.value + ) + + @classmethod + @abc.abstractmethod + def start(cls, reader): + pass + + @classmethod + @abc.abstractmethod + def end(cls, reader): + pass + + @classmethod + def consume(cls, reader): + c, line, col = reader.next() + buf = c + while True: + if cls.end(reader): + break + c, _, _ = reader.next() + buf = buf + c + return cls(line, col, buf) + + def __repr__(self): + return "%s{line=%d,col=%d,value={{{%s}}}}" % ( + self.__class__.__name__, + self.line, + self.column, + self.value, + ) + + +class XDRTokenComment(XDRToken): + @classmethod + def start(cls, reader): + return reader.peek() == "/" and reader.peek(skip=1) == "*" + + @classmethod + def end(cls, reader): + c1 = reader.last(skip=1) + c2 = reader.last() + if c1 == "*" and c2 == "/": + return True + + if reader.peek() is None: + raise Exception( + "EOF before closing comment starting at %d:%d" + % (reader.line, reader.column) + ) + + +class XDRTokenIdentifier(XDRToken): + @classmethod + def start(cls, reader): + c = reader.peek() + return c.isalpha() + + @classmethod + def end(cls, reader): + c = reader.peek() + if c is None: + return True + return not c.isalnum() and c != "_" + + +class XDRTokenPunctuation(XDRToken): + @classmethod + def start(cls, reader): + c = reader.peek() + return c in [";", "=", "{", "}", ",", "[", "]", "<", ">", "*", "(", ")", ":"] + + @classmethod + def end(cls, reader): + return True + + +class XDRTokenConstant(XDRToken): + @classmethod + def start(cls, reader): + c1 = reader.peek() + c2 = reader.peek(skip=1) + return c1.isdecimal() or (c1 == "-" and c2 is not None and c2.isdecimal()) + + @classmethod + def end(cls, reader): + c = reader.peek() + return ( + not c.isdecimal() + and not c == "." + and not c.lower() in ["x", "a", "b", "c", "d", "e", "f"] + ) + + +class XDRTokenCEscape(XDRToken): + @classmethod + def start(cls, reader): + return reader.column == 0 and reader.peek() == "%" + + @classmethod + def end(cls, reader): + return reader.peek() == "\n" + + +class XDRTokenSpace(XDRToken): + @classmethod + def start(cls, reader): + return reader.peek().isspace() + + @classmethod + def end(cls, reader): + c = reader.peek() + return c is None or not c.isspace() + + +class XDRLexer: + def __init__(self, fp): + self.reader = XDRReader(fp) + self.lookahead = [] + + def _token(self): + tokenTypes = [ + XDRTokenComment, + XDRTokenIdentifier, + XDRTokenCEscape, + XDRTokenPunctuation, + XDRTokenConstant, + XDRTokenSpace, + ] + while True: + if self.reader.peek() is None: + return None + + for tokenType in tokenTypes: + if tokenType.start(self.reader): + ret = tokenType.consume(self.reader) + if type(ret) not in [XDRTokenSpace, XDRTokenComment]: + return ret + + def next(self): + if len(self.lookahead) > 0: + token = self.lookahead[0] + self.lookahead = self.lookahead[1:] + return token + return self._token() + + def peek(self): + if len(self.lookahead) == 0: + token = self._token() + if token is None: + return None + self.lookahead.append(token) + return self.lookahead[0] diff --git a/scripts/rpcgen/tests/meson.build b/scripts/rpcgen/tests/meson.build new file mode 100644 index 0000000000..9162412d31 --- /dev/null +++ b/scripts/rpcgen/tests/meson.build @@ -0,0 +1,3 @@ +rpcgen_tests = files([ + 'test_lexer.py', +]) diff --git a/scripts/rpcgen/tests/simple.x b/scripts/rpcgen/tests/simple.x new file mode 100644 index 0000000000..91a1f2d234 --- /dev/null +++ b/scripts/rpcgen/tests/simple.x @@ -0,0 +1,35 @@ +/* Example from https://www.rfc-editor.org/rfc/rfc4506#section-7 */ + +const MAXUSERNAME = 32; /* max length of a user name */ +const MAXFILELEN = 65535; /* max length of a file */ +const MAXNAMELEN = 255; /* max length of a file name */ + +/* + * Types of files: + */ +enum filekind { + TEXT = 0, /* ascii data */ + DATA = 1, /* raw data */ + EXEC = 2 /* executable */ +}; + +/* + * File information, per kind of file: + */ +union filetype switch (filekind kind) { +case TEXT: + void; /* no extra information */ +case DATA: + string creator; /* data creator */ +case EXEC: + string interpretor; /* program interpretor */ +}; +/* + * A complete file: + */ +struct file { + string filename; /* name of file */ + filetype type; /* info about file */ + string owner; /* owner of file */ + opaque data; /* file data */ +}; diff --git a/scripts/rpcgen/tests/test_lexer.py b/scripts/rpcgen/tests/test_lexer.py new file mode 100644 index 0000000000..7cba98057f --- /dev/null +++ b/scripts/rpcgen/tests/test_lexer.py @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +from pathlib import Path + +from rpcgen.lexer import ( + XDRLexer, + XDRTokenIdentifier, + XDRTokenPunctuation, + XDRTokenConstant, +) + + +def test_lexer(): + p = Path(Path(__file__).parent, "simple.x") + with p.open("r") as fp: + lexer = XDRLexer(fp) + + tokens = [] + while True: + tok = lexer.next() + if tok is None: + break + tokens.append(tok) + + assert tokens == [ + XDRTokenIdentifier(line=3, column=0, value="const"), + XDRTokenIdentifier(line=3, column=6, value="MAXUSERNAME"), + XDRTokenPunctuation(line=3, column=18, value="="), + XDRTokenConstant(line=3, column=20, value="32"), + XDRTokenPunctuation(line=3, column=22, value=";"), + XDRTokenIdentifier(line=4, column=0, value="const"), + XDRTokenIdentifier(line=4, column=6, value="MAXFILELEN"), + XDRTokenPunctuation(line=4, column=17, value="="), + XDRTokenConstant(line=4, column=19, value="65535"), + XDRTokenPunctuation(line=4, column=24, value=";"), + XDRTokenIdentifier(line=5, column=0, value="const"), + XDRTokenIdentifier(line=5, column=6, value="MAXNAMELEN"), + XDRTokenPunctuation(line=5, column=17, value="="), + XDRTokenConstant(line=5, column=19, value="255"), + XDRTokenPunctuation(line=5, column=22, value=";"), + XDRTokenIdentifier(line=10, column=0, value="enum"), + XDRTokenIdentifier(line=10, column=5, value="filekind"), + XDRTokenPunctuation(line=10, column=14, value="{"), + XDRTokenIdentifier(line=11, column=3, value="TEXT"), + XDRTokenPunctuation(line=11, column=8, value="="), + XDRTokenConstant(line=11, column=10, value="0"), + XDRTokenPunctuation(line=11, column=11, value=","), + XDRTokenIdentifier(line=12, column=3, value="DATA"), + XDRTokenPunctuation(line=12, column=8, value="="), + XDRTokenConstant(line=12, column=10, value="1"), + XDRTokenPunctuation(line=12, column=11, value=","), + XDRTokenIdentifier(line=13, column=3, value="EXEC"), + XDRTokenPunctuation(line=13, column=8, value="="), + XDRTokenConstant(line=13, column=10, value="2"), + XDRTokenPunctuation(line=14, column=0, value="}"), + XDRTokenPunctuation(line=14, column=1, value=";"), + XDRTokenIdentifier(line=19, column=0, value="union"), + XDRTokenIdentifier(line=19, column=6, value="filetype"), + XDRTokenIdentifier(line=19, column=15, value="switch"), + XDRTokenPunctuation(line=19, column=22, value="("), + XDRTokenIdentifier(line=19, column=23, value="filekind"), + XDRTokenIdentifier(line=19, column=32, value="kind"), + XDRTokenPunctuation(line=19, column=36, value=")"), + XDRTokenPunctuation(line=19, column=38, value="{"), + XDRTokenIdentifier(line=20, column=0, value="case"), + XDRTokenIdentifier(line=20, column=5, value="TEXT"), + XDRTokenPunctuation(line=20, column=9, value=":"), + XDRTokenIdentifier(line=21, column=3, value="void"), + XDRTokenPunctuation(line=21, column=7, value=";"), + XDRTokenIdentifier(line=22, column=0, value="case"), + XDRTokenIdentifier(line=22, column=5, value="DATA"), + XDRTokenPunctuation(line=22, column=9, value=":"), + XDRTokenIdentifier(line=23, column=3, value="string"), + XDRTokenIdentifier(line=23, column=10, value="creator"), + XDRTokenPunctuation(line=23, column=17, value="<"), + XDRTokenIdentifier(line=23, column=18, value="MAXNAMELEN"), + XDRTokenPunctuation(line=23, column=28, value=">"), + XDRTokenPunctuation(line=23, column=29, value=";"), + XDRTokenIdentifier(line=24, column=0, value="case"), + XDRTokenIdentifier(line=24, column=5, value="EXEC"), + XDRTokenPunctuation(line=24, column=9, value=":"), + XDRTokenIdentifier(line=25, column=3, value="string"), + XDRTokenIdentifier(line=25, column=10, value="interpretor"), + XDRTokenPunctuation(line=25, column=21, value="<"), + XDRTokenIdentifier(line=25, column=22, value="MAXNAMELEN"), + XDRTokenPunctuation(line=25, column=32, value=">"), + XDRTokenPunctuation(line=25, column=33, value=";"), + XDRTokenPunctuation(line=26, column=0, value="}"), + XDRTokenPunctuation(line=26, column=1, value=";"), + XDRTokenIdentifier(line=30, column=0, value="struct"), + XDRTokenIdentifier(line=30, column=7, value="file"), + XDRTokenPunctuation(line=30, column=12, value="{"), + XDRTokenIdentifier(line=31, column=3, value="string"), + XDRTokenIdentifier(line=31, column=10, value="filename"), + XDRTokenPunctuation(line=31, column=18, value="<"), + XDRTokenIdentifier(line=31, column=19, value="MAXNAMELEN"), + XDRTokenPunctuation(line=31, column=29, value=">"), + XDRTokenPunctuation(line=31, column=30, value=";"), + XDRTokenIdentifier(line=32, column=3, value="filetype"), + XDRTokenIdentifier(line=32, column=12, value="type"), + XDRTokenPunctuation(line=32, column=16, value=";"), + XDRTokenIdentifier(line=33, column=3, value="string"), + XDRTokenIdentifier(line=33, column=10, value="owner"), + XDRTokenPunctuation(line=33, column=15, value="<"), + XDRTokenIdentifier(line=33, column=16, value="MAXUSERNAME"), + XDRTokenPunctuation(line=33, column=27, value=">"), + XDRTokenPunctuation(line=33, column=28, value=";"), + XDRTokenIdentifier(line=34, column=3, value="opaque"), + XDRTokenIdentifier(line=34, column=10, value="data"), + XDRTokenPunctuation(line=34, column=14, value="<"), + XDRTokenIdentifier(line=34, column=15, value="MAXFILELEN"), + XDRTokenPunctuation(line=34, column=25, value=">"), + XDRTokenPunctuation(line=34, column=26, value=";"), + XDRTokenPunctuation(line=35, column=0, value="}"), + XDRTokenPunctuation(line=35, column=1, value=";"), + ]