mirror of https://github.com/lukechilds/node.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
446 lines
15 KiB
446 lines
15 KiB
#!/usr/bin/env python
|
|
# Copyright 2016 the V8 project authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Script to transform and merge sancov files into human readable json-format.
|
|
|
|
The script supports three actions:
|
|
all: Writes a json file with all instrumented lines of all executables.
|
|
merge: Merges sancov files with coverage output into an existing json file.
|
|
split: Split json file into separate files per covered source file.
|
|
|
|
The json data is structured as follows:
|
|
{
|
|
"version": 1,
|
|
"tests": ["executable1", "executable2", ...],
|
|
"files": {
|
|
"file1": [[<instr line 1>, <bit_mask>], [<instr line 2>, <bit_mask>], ...],
|
|
"file2": [...],
|
|
...
|
|
}
|
|
}
|
|
|
|
The executables are sorted and determine the test bit mask. Their index+1 is
|
|
the bit, e.g. executable1 = 1, executable3 = 4, etc. Hence, a line covered by
|
|
executable1 and executable3 will have bit_mask == 5 == 0b101. The number of
|
|
tests is restricted to 52 in version 1, to allow javascript JSON parsing of
|
|
the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.
|
|
|
|
The line-number-bit_mask pairs are sorted by line number and don't contain
|
|
duplicates.
|
|
|
|
Split json data preserves the same format, but only contains one file per
|
|
json file.
|
|
|
|
The sancov tool is expected to be in the llvm compiler-rt third-party
|
|
directory. It's not checked out by default and must be added as a custom deps:
|
|
'v8/third_party/llvm/projects/compiler-rt':
|
|
'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
from multiprocessing import Pool, cpu_count
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
# Files to exclude from coverage. Dropping their data early adds more speed.
|
|
# The contained cc files are already excluded from instrumentation, but inlined
|
|
# data is referenced through v8's object files.
|
|
EXCLUSIONS = [
|
|
'buildtools',
|
|
'src/third_party',
|
|
'third_party',
|
|
'test',
|
|
'testing',
|
|
]
|
|
|
|
# Executables found in the build output for which no coverage is generated.
|
|
# Exclude them from the coverage data file.
|
|
EXE_BLACKLIST = [
|
|
'generate-bytecode-expectations',
|
|
'hello-world',
|
|
'mksnapshot',
|
|
'parser-shell',
|
|
'process',
|
|
'shell',
|
|
]
|
|
|
|
# V8 checkout directory.
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
|
|
os.path.abspath(__file__))))
|
|
|
|
# Executable location. TODO(machenbach): Only release is supported for now.
|
|
BUILD_DIR = os.path.join(BASE_DIR, 'out', 'Release')
|
|
|
|
# Path prefix added by the llvm symbolizer including trailing slash.
|
|
OUTPUT_PATH_PREFIX = os.path.join(BUILD_DIR, '..', '..', '')
|
|
|
|
# The sancov tool location.
|
|
SANCOV_TOOL = os.path.join(
|
|
BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
|
|
'lib', 'sanitizer_common', 'scripts', 'sancov.py')
|
|
|
|
# Simple script to sanitize the PCs from objdump.
|
|
SANITIZE_PCS = os.path.join(BASE_DIR, 'tools', 'sanitizers', 'sanitize_pcs.py')
|
|
|
|
# The llvm symbolizer location.
|
|
SYMBOLIZER = os.path.join(
|
|
BASE_DIR, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',
|
|
'llvm-symbolizer')
|
|
|
|
# Number of cpus.
|
|
CPUS = cpu_count()
|
|
|
|
# Regexp to find sancov files as output by sancov_merger.py. Also grabs the
|
|
# executable name in group 1.
|
|
SANCOV_FILE_RE = re.compile(r'^(.*)\.result.sancov$')
|
|
|
|
|
|
def executables():
|
|
"""Iterates over executable files in the build directory."""
|
|
for f in os.listdir(BUILD_DIR):
|
|
file_path = os.path.join(BUILD_DIR, f)
|
|
if (os.path.isfile(file_path) and
|
|
os.access(file_path, os.X_OK) and
|
|
f not in EXE_BLACKLIST):
|
|
yield file_path
|
|
|
|
|
|
def process_symbolizer_output(output):
|
|
"""Post-process llvm symbolizer output.
|
|
|
|
Excludes files outside the v8 checkout or given in exclusion list above
|
|
from further processing. Drops the character index in each line.
|
|
|
|
Returns: A mapping of file names to lists of line numbers. The file names
|
|
have relative paths to the v8 base directory. The lists of line
|
|
numbers don't contain duplicate lines and are sorted.
|
|
"""
|
|
# Drop path prefix when iterating lines. The path is redundant and takes
|
|
# too much space. Drop files outside that path, e.g. generated files in
|
|
# the build dir and absolute paths to c++ library headers.
|
|
def iter_lines():
|
|
for line in output.strip().splitlines():
|
|
if line.startswith(OUTPUT_PATH_PREFIX):
|
|
yield line[len(OUTPUT_PATH_PREFIX):]
|
|
|
|
# Map file names to sets of instrumented line numbers.
|
|
file_map = {}
|
|
for line in iter_lines():
|
|
# Drop character number, we only care for line numbers. Each line has the
|
|
# form: <file name>:<line number>:<character number>.
|
|
file_name, number, _ = line.split(':')
|
|
file_map.setdefault(file_name, set([])).add(int(number))
|
|
|
|
# Remove exclusion patterns from file map. It's cheaper to do it after the
|
|
# mapping, as there are few excluded files and we don't want to do this
|
|
# check for numerous lines in ordinary files.
|
|
def keep(file_name):
|
|
for e in EXCLUSIONS:
|
|
if file_name.startswith(e):
|
|
return False
|
|
return True
|
|
|
|
# Return in serializable form and filter.
|
|
return {k: sorted(file_map[k]) for k in file_map if keep(k)}
|
|
|
|
|
|
def get_instrumented_lines(executable):
|
|
"""Return the instrumented lines of an executable.
|
|
|
|
Called trough multiprocessing pool.
|
|
|
|
Returns: Post-processed llvm output as returned by process_symbolizer_output.
|
|
"""
|
|
# The first two pipes are from llvm's tool sancov.py with 0x added to the hex
|
|
# numbers. The results are piped into the llvm symbolizer, which outputs for
|
|
# each PC: <file name with abs path>:<line number>:<character number>.
|
|
# We don't call the sancov tool to get more speed.
|
|
process = subprocess.Popen(
|
|
'objdump -d %s | '
|
|
'grep \'^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ '
|
|
'<__sanitizer_cov\(_with_check\|\)\(@plt\|\)>\' | '
|
|
'grep \'^\s\+[0-9a-f]\+\' -o | '
|
|
'%s | '
|
|
'%s --obj %s -functions=none' %
|
|
(executable, SANITIZE_PCS, SYMBOLIZER, executable),
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
stdin=subprocess.PIPE,
|
|
cwd=BASE_DIR,
|
|
shell=True,
|
|
)
|
|
output, _ = process.communicate()
|
|
assert process.returncode == 0
|
|
return process_symbolizer_output(output)
|
|
|
|
|
|
def merge_instrumented_line_results(exe_list, results):
|
|
"""Merge multiprocessing results for all instrumented lines.
|
|
|
|
Args:
|
|
exe_list: List of all executable names with absolute paths.
|
|
results: List of results as returned by get_instrumented_lines.
|
|
|
|
Returns: Dict to be used as json data as specified on the top of this page.
|
|
The dictionary contains all instrumented lines of all files
|
|
referenced by all executables.
|
|
"""
|
|
def merge_files(x, y):
|
|
for file_name, lines in y.iteritems():
|
|
x.setdefault(file_name, set([])).update(lines)
|
|
return x
|
|
result = reduce(merge_files, results, {})
|
|
|
|
# Return data as file->lines mapping. The lines are saved as lists
|
|
# with (line number, test bits (as int)). The test bits are initialized with
|
|
# 0, meaning instrumented, but no coverage.
|
|
# The order of the test bits is given with key 'tests'. For now, these are
|
|
# the executable names. We use a _list_ with two items instead of a tuple to
|
|
# ease merging by allowing mutation of the second item.
|
|
return {
|
|
'version': 1,
|
|
'tests': sorted(map(os.path.basename, exe_list)),
|
|
'files': {f: map(lambda l: [l, 0], sorted(result[f])) for f in result},
|
|
}
|
|
|
|
|
|
def write_instrumented(options):
|
|
"""Implements the 'all' action of this tool."""
|
|
exe_list = list(executables())
|
|
logging.info('Reading instrumented lines from %d executables.',
|
|
len(exe_list))
|
|
pool = Pool(CPUS)
|
|
try:
|
|
results = pool.imap_unordered(get_instrumented_lines, exe_list)
|
|
finally:
|
|
pool.close()
|
|
|
|
# Merge multiprocessing results and prepare output data.
|
|
data = merge_instrumented_line_results(exe_list, results)
|
|
|
|
logging.info('Read data from %d executables, which covers %d files.',
|
|
len(data['tests']), len(data['files']))
|
|
logging.info('Writing results to %s', options.json_output)
|
|
|
|
# Write json output.
|
|
with open(options.json_output, 'w') as f:
|
|
json.dump(data, f, sort_keys=True)
|
|
|
|
|
|
def get_covered_lines(args):
|
|
"""Return the covered lines of an executable.
|
|
|
|
Called trough multiprocessing pool. The args are expected to unpack to:
|
|
cov_dir: Folder with sancov files merged by sancov_merger.py.
|
|
executable: The executable that was called to produce the given coverage
|
|
data.
|
|
sancov_file: The merged sancov file with coverage data.
|
|
|
|
Returns: A tuple of post-processed llvm output as returned by
|
|
process_symbolizer_output and the executable name.
|
|
"""
|
|
cov_dir, executable, sancov_file = args
|
|
|
|
# Let the sancov tool print the covered PCs and pipe them through the llvm
|
|
# symbolizer.
|
|
process = subprocess.Popen(
|
|
'%s print %s 2> /dev/null | '
|
|
'%s --obj %s -functions=none' %
|
|
(SANCOV_TOOL,
|
|
os.path.join(cov_dir, sancov_file),
|
|
SYMBOLIZER,
|
|
os.path.join(BUILD_DIR, executable)),
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
stdin=subprocess.PIPE,
|
|
cwd=BASE_DIR,
|
|
shell=True,
|
|
)
|
|
output, _ = process.communicate()
|
|
assert process.returncode == 0
|
|
return process_symbolizer_output(output), executable
|
|
|
|
|
|
def merge_covered_line_results(data, results):
|
|
"""Merge multiprocessing results for covered lines.
|
|
|
|
The data is mutated, the results are merged into it in place.
|
|
|
|
Args:
|
|
data: Existing coverage data from json file containing all instrumented
|
|
lines.
|
|
results: List of results as returned by get_covered_lines.
|
|
"""
|
|
|
|
# List of executables and mapping to the test bit mask. The number of
|
|
# tests is restricted to 52, to allow javascript JSON parsing of
|
|
# the bitsets encoded as numbers. JS max safe int is (1 << 53) - 1.
|
|
exe_list = data['tests']
|
|
assert len(exe_list) <= 52, 'Max 52 different tests are supported.'
|
|
test_bit_masks = {exe:1<<i for i, exe in enumerate(exe_list)}
|
|
|
|
def merge_lines(old_lines, new_lines, mask):
|
|
"""Merge the coverage data of a list of lines.
|
|
|
|
Args:
|
|
old_lines: Lines as list of pairs with line number and test bit mask.
|
|
The new lines will be merged into the list in place.
|
|
new_lines: List of new (covered) lines (sorted).
|
|
mask: The bit to be set for covered lines. The bit index is the test
|
|
index of the executable that covered the line.
|
|
"""
|
|
i = 0
|
|
# Iterate over old and new lines, both are sorted.
|
|
for l in new_lines:
|
|
while old_lines[i][0] < l:
|
|
# Forward instrumented lines not present in this coverage data.
|
|
i += 1
|
|
# TODO: Add more context to the assert message.
|
|
assert i < len(old_lines), 'Covered line %d not in input file.' % l
|
|
assert old_lines[i][0] == l, 'Covered line %d not in input file.' % l
|
|
|
|
# Add coverage information to the line.
|
|
old_lines[i][1] |= mask
|
|
|
|
def merge_files(data, result):
|
|
"""Merge result into data.
|
|
|
|
The data is mutated in place.
|
|
|
|
Args:
|
|
data: Merged coverage data from the previous reduce step.
|
|
result: New result to be merged in. The type is as returned by
|
|
get_covered_lines.
|
|
"""
|
|
file_map, executable = result
|
|
files = data['files']
|
|
for file_name, lines in file_map.iteritems():
|
|
merge_lines(files[file_name], lines, test_bit_masks[executable])
|
|
return data
|
|
|
|
reduce(merge_files, results, data)
|
|
|
|
|
|
def merge(options):
|
|
"""Implements the 'merge' action of this tool."""
|
|
|
|
# Check if folder with coverage output exists.
|
|
assert (os.path.exists(options.coverage_dir) and
|
|
os.path.isdir(options.coverage_dir))
|
|
|
|
# Inputs for multiprocessing. List of tuples of:
|
|
# Coverage dir, executable name, sancov file name.
|
|
inputs = []
|
|
for f in os.listdir(options.coverage_dir):
|
|
match = SANCOV_FILE_RE.match(f)
|
|
if match:
|
|
inputs.append((options.coverage_dir, match.group(1), f))
|
|
|
|
logging.info('Merging %d sancov files into %s',
|
|
len(inputs), options.json_input)
|
|
|
|
# Post-process covered lines in parallel.
|
|
pool = Pool(CPUS)
|
|
try:
|
|
results = pool.imap_unordered(get_covered_lines, inputs)
|
|
finally:
|
|
pool.close()
|
|
|
|
# Load existing json data file for merging the results.
|
|
with open(options.json_input, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
# Merge muliprocessing results. Mutates data.
|
|
merge_covered_line_results(data, results)
|
|
|
|
logging.info('Merged data from %d executables, which covers %d files.',
|
|
len(data['tests']), len(data['files']))
|
|
logging.info('Writing results to %s', options.json_output)
|
|
|
|
# Write merged results to file.
|
|
with open(options.json_output, 'w') as f:
|
|
json.dump(data, f, sort_keys=True)
|
|
|
|
|
|
def split(options):
|
|
"""Implements the 'split' action of this tool."""
|
|
# Load existing json data file for splitting.
|
|
with open(options.json_input, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
logging.info('Splitting off %d coverage files from %s',
|
|
len(data['files']), options.json_input)
|
|
|
|
for file_name, coverage in data['files'].iteritems():
|
|
# Preserve relative directories that are part of the file name.
|
|
file_path = os.path.join(options.output_dir, file_name + '.json')
|
|
try:
|
|
os.makedirs(os.path.dirname(file_path))
|
|
except OSError:
|
|
# Ignore existing directories.
|
|
pass
|
|
|
|
with open(file_path, 'w') as f:
|
|
# Flat-copy the old dict.
|
|
new_data = dict(data)
|
|
|
|
# Update current file.
|
|
new_data['files'] = {file_name: coverage}
|
|
|
|
# Write json data.
|
|
json.dump(new_data, f, sort_keys=True)
|
|
|
|
|
|
def main(args=None):
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--coverage-dir',
|
|
help='Path to the sancov output files.')
|
|
parser.add_argument('--json-input',
|
|
help='Path to an existing json file with coverage data.')
|
|
parser.add_argument('--json-output',
|
|
help='Path to a file to write json output to.')
|
|
parser.add_argument('--output-dir',
|
|
help='Directory where to put split output files to.')
|
|
parser.add_argument('action', choices=['all', 'merge', 'split'],
|
|
help='Action to perform.')
|
|
|
|
options = parser.parse_args(args)
|
|
if options.action.lower() == 'all':
|
|
if not options.json_output:
|
|
print '--json-output is required'
|
|
return 1
|
|
write_instrumented(options)
|
|
elif options.action.lower() == 'merge':
|
|
if not options.coverage_dir:
|
|
print '--coverage-dir is required'
|
|
return 1
|
|
if not options.json_input:
|
|
print '--json-input is required'
|
|
return 1
|
|
if not options.json_output:
|
|
print '--json-output is required'
|
|
return 1
|
|
merge(options)
|
|
elif options.action.lower() == 'split':
|
|
if not options.json_input:
|
|
print '--json-input is required'
|
|
return 1
|
|
if not options.output_dir:
|
|
print '--output-dir is required'
|
|
return 1
|
|
split(options)
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|
|
|