From 8c8b97685bd42f36e2cf2baf1c43e6ec836e1446 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
Date: Mon, 19 Dec 2022 12:48:06 -0500
Subject: [PATCH] rpcgen: add an XDR protocol lexer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a lexer capable of handling the XDR protocol files.

The lexical rquirements are detailed in

  https://www.rfc-editor.org/rfc/rfc4506#section-6.2

pytest is introduced as a build dependancy for testing python
code.

Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 libvirt.spec.in                    |   1 +
 meson.build                        |   1 +
 scripts/meson.build                |   2 +
 scripts/rpcgen/meson.build         |  11 ++
 scripts/rpcgen/rpcgen/lexer.py     | 213 +++++++++++++++++++++++++++++
 scripts/rpcgen/tests/meson.build   |   3 +
 scripts/rpcgen/tests/simple.x      |  35 +++++
 scripts/rpcgen/tests/test_lexer.py | 116 ++++++++++++++++
 8 files changed, 382 insertions(+)
 create mode 100644 scripts/rpcgen/meson.build
 create mode 100644 scripts/rpcgen/rpcgen/lexer.py
 create mode 100644 scripts/rpcgen/tests/meson.build
 create mode 100644 scripts/rpcgen/tests/simple.x
 create mode 100644 scripts/rpcgen/tests/test_lexer.py

diff --git a/libvirt.spec.in b/libvirt.spec.in
index 262c59eb5b..8d0a0aad60 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -287,6 +287,7 @@ BuildRequires: ninja-build
 BuildRequires: git
 BuildRequires: perl-interpreter
 BuildRequires: python3
+BuildRequires: python3-pytest
 %if %{with_libxl}
 BuildRequires: xen-devel
 %endif
diff --git a/meson.build b/meson.build
index 5b095bb809..85ddad52c1 100644
--- a/meson.build
+++ b/meson.build
@@ -823,6 +823,7 @@ optional_programs = [
   'ovs-vsctl',
   'passt',
   'pdwtags',
+  'pytest',
   'rmmod',
   'scrub',
   'tc',
diff --git a/scripts/meson.build b/scripts/meson.build
index 65fd1e21c5..283b790d03 100644
--- a/scripts/meson.build
+++ b/scripts/meson.build
@@ -37,3 +37,5 @@ foreach name : scripts
   sname = name.split('.')[0].underscorify()
   set_variable('@0@_prog'.format(sname), find_program(name))
 endforeach
+
+subdir('rpcgen')
diff --git a/scripts/rpcgen/meson.build b/scripts/rpcgen/meson.build
new file mode 100644
index 0000000000..52526bf812
--- /dev/null
+++ b/scripts/rpcgen/meson.build
@@ -0,0 +1,11 @@
+if pytest_prog.found()
+    subdir('tests')
+
+    test(
+        'rpcgen-pytest',
+        python3_prog,
+        args: [ '-mpytest' ] + rpcgen_tests,
+        env: runutf8,
+        workdir: meson.current_source_dir(),
+    )
+endif
diff --git a/scripts/rpcgen/rpcgen/lexer.py b/scripts/rpcgen/rpcgen/lexer.py
new file mode 100644
index 0000000000..989c2ae216
--- /dev/null
+++ b/scripts/rpcgen/rpcgen/lexer.py
@@ -0,0 +1,213 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+import abc
+
+
+class XDRReader:
+    def __init__(self, fp):
+        self.fp = fp
+        self.lookahead = ""
+        self.lookbehind = ""
+        self.line = 1
+        self.column = 0
+
+    def _read(self):
+        if len(self.lookahead) > 0:
+            c = self.lookahead[0:1]
+            self.lookahead = self.lookahead[1:]
+            return c
+        return self.fp.read(1)
+
+    def peek(self, skip=0):
+        need = 1 + skip
+        if len(self.lookahead) < need:
+            self.lookahead = self.lookahead + self.fp.read(need - len(self.lookahead))
+        if len(self.lookahead) < need:
+            return None
+
+        return self.lookahead[skip : skip + 1]
+
+    def last(self, skip=0):
+        if (skip + 1) > len(self.lookbehind):
+            return None
+        return self.lookbehind[skip]
+
+    def next(self):
+        c = self._read()
+        line = self.line
+        column = self.column
+        if c == "\n":
+            self.line = self.line + 1
+            self.column = 0
+        else:
+            self.column = self.column + 1
+        self.lookbehind = c + self.lookbehind
+        if len(self.lookbehind) > 2:
+            self.lookbehind = self.lookbehind[0:2]
+        return c, line, column
+
+
+class XDRToken(abc.ABC):
+    def __init__(self, line, column, value):
+        self.line = line
+        self.column = column
+        self.value = value
+
+    def __eq__(self, other):
+        return (
+            type(self) == type(other)
+            and self.line == other.line
+            and self.column == other.column
+            and self.value == other.value
+        )
+
+    @classmethod
+    @abc.abstractmethod
+    def start(cls, reader):
+        pass
+
+    @classmethod
+    @abc.abstractmethod
+    def end(cls, reader):
+        pass
+
+    @classmethod
+    def consume(cls, reader):
+        c, line, col = reader.next()
+        buf = c
+        while True:
+            if cls.end(reader):
+                break
+            c, _, _ = reader.next()
+            buf = buf + c
+        return cls(line, col, buf)
+
+    def __repr__(self):
+        return "%s{line=%d,col=%d,value={{{%s}}}}" % (
+            self.__class__.__name__,
+            self.line,
+            self.column,
+            self.value,
+        )
+
+
+class XDRTokenComment(XDRToken):
+    @classmethod
+    def start(cls, reader):
+        return reader.peek() == "/" and reader.peek(skip=1) == "*"
+
+    @classmethod
+    def end(cls, reader):
+        c1 = reader.last(skip=1)
+        c2 = reader.last()
+        if c1 == "*" and c2 == "/":
+            return True
+
+        if reader.peek() is None:
+            raise Exception(
+                "EOF before closing comment starting at %d:%d"
+                % (reader.line, reader.column)
+            )
+
+
+class XDRTokenIdentifier(XDRToken):
+    @classmethod
+    def start(cls, reader):
+        c = reader.peek()
+        return c.isalpha()
+
+    @classmethod
+    def end(cls, reader):
+        c = reader.peek()
+        if c is None:
+            return True
+        return not c.isalnum() and c != "_"
+
+
+class XDRTokenPunctuation(XDRToken):
+    @classmethod
+    def start(cls, reader):
+        c = reader.peek()
+        return c in [";", "=", "{", "}", ",", "[", "]", "<", ">", "*", "(", ")", ":"]
+
+    @classmethod
+    def end(cls, reader):
+        return True
+
+
+class XDRTokenConstant(XDRToken):
+    @classmethod
+    def start(cls, reader):
+        c1 = reader.peek()
+        c2 = reader.peek(skip=1)
+        return c1.isdecimal() or (c1 == "-" and c2 is not None and c2.isdecimal())
+
+    @classmethod
+    def end(cls, reader):
+        c = reader.peek()
+        return (
+            not c.isdecimal()
+            and not c == "."
+            and not c.lower() in ["x", "a", "b", "c", "d", "e", "f"]
+        )
+
+
+class XDRTokenCEscape(XDRToken):
+    @classmethod
+    def start(cls, reader):
+        return reader.column == 0 and reader.peek() == "%"
+
+    @classmethod
+    def end(cls, reader):
+        return reader.peek() == "\n"
+
+
+class XDRTokenSpace(XDRToken):
+    @classmethod
+    def start(cls, reader):
+        return reader.peek().isspace()
+
+    @classmethod
+    def end(cls, reader):
+        c = reader.peek()
+        return c is None or not c.isspace()
+
+
+class XDRLexer:
+    def __init__(self, fp):
+        self.reader = XDRReader(fp)
+        self.lookahead = []
+
+    def _token(self):
+        tokenTypes = [
+            XDRTokenComment,
+            XDRTokenIdentifier,
+            XDRTokenCEscape,
+            XDRTokenPunctuation,
+            XDRTokenConstant,
+            XDRTokenSpace,
+        ]
+        while True:
+            if self.reader.peek() is None:
+                return None
+
+            for tokenType in tokenTypes:
+                if tokenType.start(self.reader):
+                    ret = tokenType.consume(self.reader)
+                    if type(ret) not in [XDRTokenSpace, XDRTokenComment]:
+                        return ret
+
+    def next(self):
+        if len(self.lookahead) > 0:
+            token = self.lookahead[0]
+            self.lookahead = self.lookahead[1:]
+            return token
+        return self._token()
+
+    def peek(self):
+        if len(self.lookahead) == 0:
+            token = self._token()
+            if token is None:
+                return None
+            self.lookahead.append(token)
+        return self.lookahead[0]
diff --git a/scripts/rpcgen/tests/meson.build b/scripts/rpcgen/tests/meson.build
new file mode 100644
index 0000000000..9162412d31
--- /dev/null
+++ b/scripts/rpcgen/tests/meson.build
@@ -0,0 +1,3 @@
+rpcgen_tests = files([
+    'test_lexer.py',
+])
diff --git a/scripts/rpcgen/tests/simple.x b/scripts/rpcgen/tests/simple.x
new file mode 100644
index 0000000000..91a1f2d234
--- /dev/null
+++ b/scripts/rpcgen/tests/simple.x
@@ -0,0 +1,35 @@
+/* Example from https://www.rfc-editor.org/rfc/rfc4506#section-7 */
+
+const MAXUSERNAME = 32; /* max length of a user name */
+const MAXFILELEN = 65535; /* max length of a file */
+const MAXNAMELEN = 255; /* max length of a file name */
+
+/*
+ * Types of files:
+ */
+enum filekind {
+   TEXT = 0, /* ascii data */
+   DATA = 1, /* raw data   */
+   EXEC = 2  /* executable */
+};
+
+/*
+ * File information, per kind of file:
+ */
+union filetype switch (filekind kind) {
+case TEXT:
+   void; /* no extra information */
+case DATA:
+   string creator<MAXNAMELEN>; /* data creator */
+case EXEC:
+   string interpretor<MAXNAMELEN>; /* program interpretor */
+};
+/*
+ * A complete file:
+ */
+struct file {
+   string filename<MAXNAMELEN>; /* name of file */
+   filetype type; /* info about file */
+   string owner<MAXUSERNAME>; /* owner of file */
+   opaque data<MAXFILELEN>; /* file data */
+};
diff --git a/scripts/rpcgen/tests/test_lexer.py b/scripts/rpcgen/tests/test_lexer.py
new file mode 100644
index 0000000000..7cba98057f
--- /dev/null
+++ b/scripts/rpcgen/tests/test_lexer.py
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+from pathlib import Path
+
+from rpcgen.lexer import (
+    XDRLexer,
+    XDRTokenIdentifier,
+    XDRTokenPunctuation,
+    XDRTokenConstant,
+)
+
+
+def test_lexer():
+    p = Path(Path(__file__).parent, "simple.x")
+    with p.open("r") as fp:
+        lexer = XDRLexer(fp)
+
+        tokens = []
+        while True:
+            tok = lexer.next()
+            if tok is None:
+                break
+            tokens.append(tok)
+
+    assert tokens == [
+        XDRTokenIdentifier(line=3, column=0, value="const"),
+        XDRTokenIdentifier(line=3, column=6, value="MAXUSERNAME"),
+        XDRTokenPunctuation(line=3, column=18, value="="),
+        XDRTokenConstant(line=3, column=20, value="32"),
+        XDRTokenPunctuation(line=3, column=22, value=";"),
+        XDRTokenIdentifier(line=4, column=0, value="const"),
+        XDRTokenIdentifier(line=4, column=6, value="MAXFILELEN"),
+        XDRTokenPunctuation(line=4, column=17, value="="),
+        XDRTokenConstant(line=4, column=19, value="65535"),
+        XDRTokenPunctuation(line=4, column=24, value=";"),
+        XDRTokenIdentifier(line=5, column=0, value="const"),
+        XDRTokenIdentifier(line=5, column=6, value="MAXNAMELEN"),
+        XDRTokenPunctuation(line=5, column=17, value="="),
+        XDRTokenConstant(line=5, column=19, value="255"),
+        XDRTokenPunctuation(line=5, column=22, value=";"),
+        XDRTokenIdentifier(line=10, column=0, value="enum"),
+        XDRTokenIdentifier(line=10, column=5, value="filekind"),
+        XDRTokenPunctuation(line=10, column=14, value="{"),
+        XDRTokenIdentifier(line=11, column=3, value="TEXT"),
+        XDRTokenPunctuation(line=11, column=8, value="="),
+        XDRTokenConstant(line=11, column=10, value="0"),
+        XDRTokenPunctuation(line=11, column=11, value=","),
+        XDRTokenIdentifier(line=12, column=3, value="DATA"),
+        XDRTokenPunctuation(line=12, column=8, value="="),
+        XDRTokenConstant(line=12, column=10, value="1"),
+        XDRTokenPunctuation(line=12, column=11, value=","),
+        XDRTokenIdentifier(line=13, column=3, value="EXEC"),
+        XDRTokenPunctuation(line=13, column=8, value="="),
+        XDRTokenConstant(line=13, column=10, value="2"),
+        XDRTokenPunctuation(line=14, column=0, value="}"),
+        XDRTokenPunctuation(line=14, column=1, value=";"),
+        XDRTokenIdentifier(line=19, column=0, value="union"),
+        XDRTokenIdentifier(line=19, column=6, value="filetype"),
+        XDRTokenIdentifier(line=19, column=15, value="switch"),
+        XDRTokenPunctuation(line=19, column=22, value="("),
+        XDRTokenIdentifier(line=19, column=23, value="filekind"),
+        XDRTokenIdentifier(line=19, column=32, value="kind"),
+        XDRTokenPunctuation(line=19, column=36, value=")"),
+        XDRTokenPunctuation(line=19, column=38, value="{"),
+        XDRTokenIdentifier(line=20, column=0, value="case"),
+        XDRTokenIdentifier(line=20, column=5, value="TEXT"),
+        XDRTokenPunctuation(line=20, column=9, value=":"),
+        XDRTokenIdentifier(line=21, column=3, value="void"),
+        XDRTokenPunctuation(line=21, column=7, value=";"),
+        XDRTokenIdentifier(line=22, column=0, value="case"),
+        XDRTokenIdentifier(line=22, column=5, value="DATA"),
+        XDRTokenPunctuation(line=22, column=9, value=":"),
+        XDRTokenIdentifier(line=23, column=3, value="string"),
+        XDRTokenIdentifier(line=23, column=10, value="creator"),
+        XDRTokenPunctuation(line=23, column=17, value="<"),
+        XDRTokenIdentifier(line=23, column=18, value="MAXNAMELEN"),
+        XDRTokenPunctuation(line=23, column=28, value=">"),
+        XDRTokenPunctuation(line=23, column=29, value=";"),
+        XDRTokenIdentifier(line=24, column=0, value="case"),
+        XDRTokenIdentifier(line=24, column=5, value="EXEC"),
+        XDRTokenPunctuation(line=24, column=9, value=":"),
+        XDRTokenIdentifier(line=25, column=3, value="string"),
+        XDRTokenIdentifier(line=25, column=10, value="interpretor"),
+        XDRTokenPunctuation(line=25, column=21, value="<"),
+        XDRTokenIdentifier(line=25, column=22, value="MAXNAMELEN"),
+        XDRTokenPunctuation(line=25, column=32, value=">"),
+        XDRTokenPunctuation(line=25, column=33, value=";"),
+        XDRTokenPunctuation(line=26, column=0, value="}"),
+        XDRTokenPunctuation(line=26, column=1, value=";"),
+        XDRTokenIdentifier(line=30, column=0, value="struct"),
+        XDRTokenIdentifier(line=30, column=7, value="file"),
+        XDRTokenPunctuation(line=30, column=12, value="{"),
+        XDRTokenIdentifier(line=31, column=3, value="string"),
+        XDRTokenIdentifier(line=31, column=10, value="filename"),
+        XDRTokenPunctuation(line=31, column=18, value="<"),
+        XDRTokenIdentifier(line=31, column=19, value="MAXNAMELEN"),
+        XDRTokenPunctuation(line=31, column=29, value=">"),
+        XDRTokenPunctuation(line=31, column=30, value=";"),
+        XDRTokenIdentifier(line=32, column=3, value="filetype"),
+        XDRTokenIdentifier(line=32, column=12, value="type"),
+        XDRTokenPunctuation(line=32, column=16, value=";"),
+        XDRTokenIdentifier(line=33, column=3, value="string"),
+        XDRTokenIdentifier(line=33, column=10, value="owner"),
+        XDRTokenPunctuation(line=33, column=15, value="<"),
+        XDRTokenIdentifier(line=33, column=16, value="MAXUSERNAME"),
+        XDRTokenPunctuation(line=33, column=27, value=">"),
+        XDRTokenPunctuation(line=33, column=28, value=";"),
+        XDRTokenIdentifier(line=34, column=3, value="opaque"),
+        XDRTokenIdentifier(line=34, column=10, value="data"),
+        XDRTokenPunctuation(line=34, column=14, value="<"),
+        XDRTokenIdentifier(line=34, column=15, value="MAXFILELEN"),
+        XDRTokenPunctuation(line=34, column=25, value=">"),
+        XDRTokenPunctuation(line=34, column=26, value=";"),
+        XDRTokenPunctuation(line=35, column=0, value="}"),
+        XDRTokenPunctuation(line=35, column=1, value=";"),
+    ]