diff --git a/scripts/check-html-references.py b/scripts/check-html-references.py index 6d9116585e..8e726928e0 100755 --- a/scripts/check-html-references.py +++ b/scripts/check-html-references.py @@ -30,13 +30,6 @@ def get_file_list(prefix): filelist = [] for root, dir, files in os.walk(prefix): - prefixbase = os.path.dirname(prefix) - - if root.startswith(prefixbase): - relroot = root[len(prefixbase):] - else: - relroot = root - for file in files: if not re.search('\\.html$', file): continue @@ -45,20 +38,21 @@ def get_file_list(prefix): if '404.html' in file: continue - fullfilename = os.path.join(root, file) - relfilename = os.path.join(relroot, file) - filelist.append((fullfilename, relfilename)) + filelist.append(os.path.join(root, file)) return filelist # loads an XHTML and extracts all anchors, local and remote links for the one file -def process_file(filetuple): - filename, relfilename = filetuple +def process_file(filename): tree = ET.parse(filename) root = tree.getroot() + docname = root.get('data-sourcedoc') - anchors = [relfilename] + if not docname: + docname = filename + + anchors = [filename] targets = [] for elem in root.findall('.//html:a', ns): @@ -66,30 +60,30 @@ def process_file(filetuple): an = elem.get('id') if an: - anchors.append(relfilename + '#' + an) + anchors.append(filename + '#' + an) if target: if re.search('://', target): externallinks.append(target) elif target[0] != '#' and 'mailto:' not in target: - dirname = os.path.dirname(relfilename) - targetname = os.path.normpath(os.path.join(dirname, target)) + dirname = os.path.dirname(filename) + targetfull = os.path.normpath(os.path.join(dirname, target)) - targets.append((targetname, filename, target)) + targets.append((filename, docname, targetfull, target)) # older docutils generate "