mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2024-12-22 05:35:25 +00:00
docs: Add HTML reference checker
In many cases we move around or rename internal anchors which may break links leading to the content. docutils handle the case of links inside a document, but we are lacking the same form of checking between documents. Introduce a script which cross-checks all the anchors and links in HTML output files and prints problems and use it as a test case for the 'docs' directory. Signed-off-by: Peter Krempa <pkrempa@redhat.com> Reviewed-by: Ján Tomko <jtomko@redhat.com>
This commit is contained in:
parent
dcff02d797
commit
8452124669
@ -350,3 +350,14 @@ run_target(
|
||||
],
|
||||
depends: install_web_deps,
|
||||
)
|
||||
|
||||
test(
|
||||
'check-html-references',
|
||||
python3_prog,
|
||||
args: [
|
||||
check_html_references_prog.path(),
|
||||
'--prefix',
|
||||
meson.build_root() / 'docs'
|
||||
],
|
||||
env: runutf8,
|
||||
)
|
||||
|
153
scripts/check-html-references.py
Executable file
153
scripts/check-html-references.py
Executable file
@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library. If not, see
|
||||
# <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# Check that external references between documentation HTML files are not broken.
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
ns = {'html': 'http://www.w3.org/1999/xhtml'}
|
||||
externallinks = []
|
||||
|
||||
|
||||
def get_file_list(prefix):
|
||||
filelist = []
|
||||
|
||||
for root, dir, files in os.walk(prefix):
|
||||
prefixbase = os.path.dirname(prefix)
|
||||
|
||||
if root.startswith(prefixbase):
|
||||
relroot = root[len(prefixbase):]
|
||||
else:
|
||||
relroot = root
|
||||
|
||||
for file in files:
|
||||
if not re.search('\\.html$', file):
|
||||
continue
|
||||
|
||||
# the 404 page doesn't play well
|
||||
if '404.html' in file:
|
||||
continue
|
||||
|
||||
fullfilename = os.path.join(root, file)
|
||||
relfilename = os.path.join(relroot, file)
|
||||
filelist.append((fullfilename, relfilename))
|
||||
|
||||
return filelist
|
||||
|
||||
|
||||
# loads an XHTML and extracts all anchors, local and remote links for the one file
|
||||
def process_file(filetuple):
|
||||
filename, relfilename = filetuple
|
||||
tree = ET.parse(filename)
|
||||
root = tree.getroot()
|
||||
|
||||
anchors = [relfilename]
|
||||
targets = []
|
||||
|
||||
for elem in root.findall('.//html:a', ns):
|
||||
target = elem.get('href')
|
||||
an = elem.get('id')
|
||||
|
||||
if an:
|
||||
anchors.append(relfilename + '#' + an)
|
||||
|
||||
if target:
|
||||
if re.search('://', target):
|
||||
externallinks.append(target)
|
||||
elif target[0] != '#' and 'mailto:' not in target:
|
||||
dirname = os.path.dirname(relfilename)
|
||||
targetname = os.path.normpath(os.path.join(dirname, target))
|
||||
|
||||
targets.append((targetname, filename, target))
|
||||
|
||||
# older docutils generate "<div class='section'"
|
||||
for elem in root.findall('.//html:div/[@class=\'section\']', ns):
|
||||
an = elem.get('id')
|
||||
|
||||
if an:
|
||||
anchors.append(relfilename + '#' + an)
|
||||
|
||||
# modern docutils generate a <section element
|
||||
for elem in root.findall('.//html:section', ns):
|
||||
an = elem.get('id')
|
||||
|
||||
if an:
|
||||
anchors.append(relfilename + '#' + an)
|
||||
|
||||
return (anchors, targets)
|
||||
|
||||
|
||||
def process_all(filelist):
|
||||
anchors = []
|
||||
targets = []
|
||||
|
||||
for filetuple in filelist:
|
||||
anchor, target = process_file(filetuple)
|
||||
|
||||
targets = targets + target
|
||||
anchors = anchors + anchor
|
||||
|
||||
return (targets, anchors)
|
||||
|
||||
|
||||
def check_targets(targets, anchors):
|
||||
errors = []
|
||||
for target, targetfrom, targetorig in targets:
|
||||
if target not in anchors:
|
||||
errors.append((targetfrom, targetorig))
|
||||
|
||||
if errors:
|
||||
errors.sort()
|
||||
|
||||
print('broken link targets:')
|
||||
|
||||
for file, target in errors:
|
||||
print(file + " broken link: " + target)
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='HTML reference checker')
|
||||
parser.add_argument('--prefix', default='.',
|
||||
help='build tree prefix')
|
||||
parser.add_argument('--external', action="store_true",
|
||||
help='print external references instead')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
files = get_file_list(args.prefix)
|
||||
|
||||
targets, anchors = process_all(files)
|
||||
|
||||
if args.external:
|
||||
prev = None
|
||||
externallinks.sort()
|
||||
for ext in externallinks:
|
||||
if ext != prev:
|
||||
print(ext)
|
||||
|
||||
prev = ext
|
||||
else:
|
||||
if check_targets(targets, anchors):
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
@ -6,6 +6,7 @@ scripts = [
|
||||
'check-driverimpls.py',
|
||||
'check-drivername.py',
|
||||
'check-file-access.py',
|
||||
'check-html-references.py',
|
||||
'check-remote-protocol.py',
|
||||
'check-symfile.py',
|
||||
'check-symsorting.py',
|
||||
|
Loading…
Reference in New Issue
Block a user