mirror of
https://github.com/cloud-hypervisor/cloud-hypervisor.git
synced 2024-10-01 19:15:46 +00:00
d5f5648b37
We use cargo vendor to generate a .cargo/config file and the vendor directory. Vendoring allows us to lock our dependencies and to modify them easily from the top level Cargo.toml. We vendor all dependencies, including the crates.io ones, which allows for network isolated builds. Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
322 lines
10 KiB
Python
Executable File
322 lines
10 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT
|
|
# file at the top-level directory of this distribution and at
|
|
# http://rust-lang.org/COPYRIGHT.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
# option. This file may not be copied, modified, or distributed
|
|
# except according to those terms.
|
|
|
|
# This script uses the following Unicode tables:
|
|
# - EastAsianWidth.txt
|
|
# - ReadMe.txt
|
|
# - UnicodeData.txt
|
|
#
|
|
# Since this should not require frequent updates, we just store this
|
|
# out-of-line and check the unicode.rs file into git.
|
|
|
|
import fileinput, re, os, sys, operator
|
|
|
|
preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
|
// file at the top-level directory of this distribution and at
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
|
|
|
|
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
|
|
'''
|
|
|
|
# Mapping taken from Table 12 from:
|
|
# http://www.unicode.org/reports/tr44/#General_Category_Values
|
|
expanded_categories = {
|
|
'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
|
|
'Lm': ['L'], 'Lo': ['L'],
|
|
'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
|
|
'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
|
|
'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
|
|
'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
|
|
'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
|
|
'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
|
|
'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
|
|
}
|
|
|
|
# these are the surrogate codepoints, which are not valid rust characters
|
|
surrogate_codepoints = (0xd800, 0xdfff)
|
|
|
|
def fetch(f):
|
|
if not os.path.exists(os.path.basename(f)):
|
|
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
|
|
% f)
|
|
|
|
if not os.path.exists(os.path.basename(f)):
|
|
sys.stderr.write("cannot load %s" % f)
|
|
exit(1)
|
|
|
|
def is_surrogate(n):
|
|
return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
|
|
|
|
def load_unicode_data(f):
|
|
fetch(f)
|
|
gencats = {}
|
|
|
|
udict = {};
|
|
range_start = -1;
|
|
for line in fileinput.input(f):
|
|
data = line.split(';');
|
|
if len(data) != 15:
|
|
continue
|
|
cp = int(data[0], 16);
|
|
if is_surrogate(cp):
|
|
continue
|
|
if range_start >= 0:
|
|
for i in xrange(range_start, cp):
|
|
udict[i] = data;
|
|
range_start = -1;
|
|
if data[1].endswith(", First>"):
|
|
range_start = cp;
|
|
continue;
|
|
udict[cp] = data;
|
|
|
|
for code in udict:
|
|
[code_org, name, gencat, combine, bidi,
|
|
decomp, deci, digit, num, mirror,
|
|
old, iso, upcase, lowcase, titlecase ] = udict[code];
|
|
|
|
# place letter in categories as appropriate
|
|
for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []):
|
|
if cat not in gencats:
|
|
gencats[cat] = []
|
|
gencats[cat].append(code)
|
|
|
|
gencats = group_cats(gencats)
|
|
|
|
return gencats
|
|
|
|
def group_cats(cats):
|
|
cats_out = {}
|
|
for cat in cats:
|
|
cats_out[cat] = group_cat(cats[cat])
|
|
return cats_out
|
|
|
|
def group_cat(cat):
|
|
cat_out = []
|
|
letters = sorted(set(cat))
|
|
cur_start = letters.pop(0)
|
|
cur_end = cur_start
|
|
for letter in letters:
|
|
assert letter > cur_end, \
|
|
"cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
|
|
if letter == cur_end + 1:
|
|
cur_end = letter
|
|
else:
|
|
cat_out.append((cur_start, cur_end))
|
|
cur_start = cur_end = letter
|
|
cat_out.append((cur_start, cur_end))
|
|
return cat_out
|
|
|
|
def format_table_content(f, content, indent):
|
|
line = " "*indent
|
|
first = True
|
|
for chunk in content.split(","):
|
|
if len(line) + len(chunk) < 98:
|
|
if first:
|
|
line += chunk
|
|
else:
|
|
line += ", " + chunk
|
|
first = False
|
|
else:
|
|
f.write(line + ",\n")
|
|
line = " "*indent + chunk
|
|
f.write(line)
|
|
|
|
# load all widths of want_widths, except those in except_cats
|
|
def load_east_asian_width(want_widths, except_cats):
|
|
f = "EastAsianWidth.txt"
|
|
fetch(f)
|
|
widths = {}
|
|
re1 = re.compile("^([0-9A-F]+);(\w+) +# (\w+)")
|
|
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+);(\w+) +# (\w+)")
|
|
|
|
for line in fileinput.input(f):
|
|
width = None
|
|
d_lo = 0
|
|
d_hi = 0
|
|
cat = None
|
|
m = re1.match(line)
|
|
if m:
|
|
d_lo = m.group(1)
|
|
d_hi = m.group(1)
|
|
width = m.group(2)
|
|
cat = m.group(3)
|
|
else:
|
|
m = re2.match(line)
|
|
if m:
|
|
d_lo = m.group(1)
|
|
d_hi = m.group(2)
|
|
width = m.group(3)
|
|
cat = m.group(4)
|
|
else:
|
|
continue
|
|
if cat in except_cats or width not in want_widths:
|
|
continue
|
|
d_lo = int(d_lo, 16)
|
|
d_hi = int(d_hi, 16)
|
|
if width not in widths:
|
|
widths[width] = []
|
|
widths[width].append((d_lo, d_hi))
|
|
return widths
|
|
|
|
def escape_char(c):
|
|
return "'\\u{%x}'" % c
|
|
|
|
def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
|
|
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True):
|
|
pub_string = "const"
|
|
if not is_const:
|
|
pub_string = "let"
|
|
if is_pub:
|
|
pub_string = "pub " + pub_string
|
|
f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type))
|
|
data = ""
|
|
first = True
|
|
for dat in t_data:
|
|
if not first:
|
|
data += ","
|
|
first = False
|
|
data += pfun(dat)
|
|
format_table_content(f, data, 8)
|
|
f.write("\n ];\n\n")
|
|
|
|
def emit_charwidth_module(f, width_table):
|
|
f.write("pub mod charwidth {")
|
|
f.write("""
|
|
use core::option::Option::{self, Some, None};
|
|
use core::result::Result::{Ok, Err};
|
|
|
|
#[inline]
|
|
fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
|
|
use core::cmp::Ordering::{Equal, Less, Greater};
|
|
match r.binary_search_by(|&(lo, hi, _, _)| {
|
|
if lo <= c && c <= hi { Equal }
|
|
else if hi < c { Less }
|
|
else { Greater }
|
|
}) {
|
|
Ok(idx) => {
|
|
let (_, _, r_ncjk, r_cjk) = r[idx];
|
|
if is_cjk { r_cjk } else { r_ncjk }
|
|
}
|
|
Err(_) => 1
|
|
}
|
|
}
|
|
""")
|
|
|
|
f.write("""
|
|
#[inline]
|
|
pub fn width(c: char, is_cjk: bool) -> Option<usize> {
|
|
match c as usize {
|
|
_c @ 0 => Some(0), // null is zero width
|
|
cu if cu < 0x20 => None, // control sequences have no width
|
|
cu if cu < 0x7F => Some(1), // ASCII
|
|
cu if cu < 0xA0 => None, // more control sequences
|
|
_ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize)
|
|
}
|
|
}
|
|
|
|
""")
|
|
|
|
f.write(" // character width table. Based on Markus Kuhn's free wcwidth() implementation,\n")
|
|
f.write(" // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c\n")
|
|
emit_table(f, "charwidth_table", width_table, "&'static [(char, char, u8, u8)]", is_pub=False,
|
|
pfun=lambda x: "(%s,%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2], x[3]))
|
|
f.write("}\n\n")
|
|
|
|
def remove_from_wtable(wtable, val):
|
|
wtable_out = []
|
|
while wtable:
|
|
if wtable[0][1] < val:
|
|
wtable_out.append(wtable.pop(0))
|
|
elif wtable[0][0] > val:
|
|
break
|
|
else:
|
|
(wt_lo, wt_hi, width, width_cjk) = wtable.pop(0)
|
|
if wt_lo == wt_hi == val:
|
|
continue
|
|
elif wt_lo == val:
|
|
wtable_out.append((wt_lo+1, wt_hi, width, width_cjk))
|
|
elif wt_hi == val:
|
|
wtable_out.append((wt_lo, wt_hi-1, width, width_cjk))
|
|
else:
|
|
wtable_out.append((wt_lo, val-1, width, width_cjk))
|
|
wtable_out.append((val+1, wt_hi, width, width_cjk))
|
|
if wtable:
|
|
wtable_out.extend(wtable)
|
|
return wtable_out
|
|
|
|
|
|
|
|
def optimize_width_table(wtable):
|
|
wtable_out = []
|
|
w_this = wtable.pop(0)
|
|
while wtable:
|
|
if w_this[1] == wtable[0][0] - 1 and w_this[2:3] == wtable[0][2:3]:
|
|
w_tmp = wtable.pop(0)
|
|
w_this = (w_this[0], w_tmp[1], w_tmp[2], w_tmp[3])
|
|
else:
|
|
wtable_out.append(w_this)
|
|
w_this = wtable.pop(0)
|
|
wtable_out.append(w_this)
|
|
return wtable_out
|
|
|
|
if __name__ == "__main__":
|
|
r = "tables.rs"
|
|
if os.path.exists(r):
|
|
os.remove(r)
|
|
with open(r, "w") as rf:
|
|
# write the file's preamble
|
|
rf.write(preamble)
|
|
|
|
# download and parse all the data
|
|
fetch("ReadMe.txt")
|
|
with open("ReadMe.txt") as readme:
|
|
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
|
|
unicode_version = re.search(pattern, readme.read()).groups()
|
|
rf.write("""
|
|
/// The version of [Unicode](http://www.unicode.org/)
|
|
/// that this version of unicode-width is based on.
|
|
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
|
|
|
""" % unicode_version)
|
|
gencats = load_unicode_data("UnicodeData.txt")
|
|
|
|
### character width module
|
|
width_table = []
|
|
for zwcat in ["Me", "Mn", "Cf"]:
|
|
width_table.extend(map(lambda (lo, hi): (lo, hi, 0, 0), gencats[zwcat]))
|
|
width_table.append((4448, 4607, 0, 0))
|
|
|
|
# get widths, except those that are explicitly marked zero-width above
|
|
ea_widths = load_east_asian_width(["W", "F", "A"], ["Me", "Mn", "Cf"])
|
|
# these are doublewidth
|
|
for dwcat in ["W", "F"]:
|
|
width_table.extend(map(lambda (lo, hi): (lo, hi, 2, 2), ea_widths[dwcat]))
|
|
width_table.extend(map(lambda (lo, hi): (lo, hi, 1, 2), ea_widths["A"]))
|
|
|
|
width_table.sort(key=lambda w: w[0])
|
|
|
|
# soft hyphen is not zero width in preformatted text; it's used to indicate
|
|
# a hyphen inserted to facilitate a linebreak.
|
|
width_table = remove_from_wtable(width_table, 173)
|
|
|
|
# optimize the width table by collapsing adjacent entities when possible
|
|
width_table = optimize_width_table(width_table)
|
|
emit_charwidth_module(rf, width_table)
|