diff --git a/prism/etc/scripts/prism-log-extract b/prism/etc/scripts/prism-log-extract new file mode 100755 index 00000000..25795033 --- /dev/null +++ b/prism/etc/scripts/prism-log-extract @@ -0,0 +1,249 @@ +#!/usr/bin/env python + +# The prism-log-extract script extracts and collates info from a +# collection of PRISM log files. + +# The basic usage is "prism-log-extract " where +# is one or more log files or directories containing log files. +# The default behaviour is to extract all known fields from all logs +# and then print the resulting table of values in CSV format. + +# Run "prism-log-extract --help" for details of further options. + +import os,sys,re,signal +from optparse import OptionParser + +#================================================================================================== +# Global variables +#================================================================================================== + +# Details of all the fields that can be extracted from logs +all_fields_details = [\ + {'name': 'log_dir', 'type': 'string'}, \ + {'name': 'log_file', 'type': 'string'}, \ + {'name': 'model_file', 'type': 'file', 'regexp': 'Parsing model file "(.+)"...'}, \ + {'name': 'model_consts', 'type': 'string', 'regexp': 'Model constants: (.+)'}, \ + {'name': 'model_type', 'regexp': 'Type: *(.+)'}, \ + {'name': 'states', 'regexp': 'States: *(.+) \((.+) initial\)'}, \ + {'name': 'time_constr', 'regexp': 'Time for model construction: *(.+) sec'}, \ + {'name': 'prop_file', 'type': 'file', 'regexp': 'Parsing properties file "(.+)"...'}, \ + {'name': 'prop_consts', 'type': 'string', 'regexp': 'Property constants: (.+)'}, \ + {'name': 'iters_check', 'regexp': 'took ([^ \n]+) iterations', 'match': 'last'}, \ + {'name': 'time_check', 'regexp': 'Time for model checking: *(.+) sec'}, \ + {'name': 'result', 'regexp': '^Result.*: ([^( \n]+)'}, \ +] + +# Names of all fields +all_fields = list(map(lambda x: x['name'], all_fields_details)) + +# Meta-fields +meta_fields = {\ + 'all': all_fields, \ + 'model': ['model_file', 'model_consts'], \ + 'prop': ['prop_file', 'prop_consts'], \ + 'benchmark' : ['model_file', 'model_consts', 'prop_file', 'prop_consts'], \ +} + +#================================================================================================== +# Utility functions +#================================================================================================== + +# Returns a sorted list of files / directories in dir +def sorted_list_dir(dir): + list = os.listdir(dir); + list.sort() + return list + +#================================================================================================== +# Functions +#================================================================================================== + +# Takes a list of field names, including "meta-fields" (e.g. 'model' +# is shorthand for 'model_file','model_consts') and expands the meta-fields +def expand_meta_fields(fields): + fields_expanded = [] + for field in fields: + fields_expanded.extend(meta_fields[field] if field in meta_fields else [field]) + return fields_expanded + +# Get the details of a field +def get_field_details(field): + return next(filter(lambda x: x['name'] == field, all_fields_details)) + +# Extract info from a list of files/directories +def grep_for_info(fileOrDirs, fields): + infos = [] + for fileOrDir in fileOrDirs: + infos += grep_for_info_file_or_dir(fileOrDir, fields) + return infos + +# Extract info from a single file/directory (recurse unless asked not to) +def grep_for_info_file_or_dir(fileOrDir, fields): + infos = [] + if os.path.isdir(fileOrDir): + for file in [file for file in sorted_list_dir(fileOrDir) if not file in [".","..",".svn"]]: + if os.path.isdir(os.path.join(fileOrDir, file)): + if not options.nonRec: + infos += grep_for_info_file_or_dir(os.path.join(fileOrDir, file), fields) + else: + infos += grep_for_info_file(os.path.join(fileOrDir, file), fields) + else: + infos += grep_for_info_file(fileOrDir, fields) + return infos + +# Extract info from a log file +def grep_for_info_file(logFile, fields): + if options.extension and not logFile.endswith('.'+options.extension): + return [] + info = {} + # Initialise all fields + for field in fields: + info[field] = '' + # For some fields, there is a specific way to define them + if 'log_dir' in fields: + info['log_dir'] = os.path.basename(os.path.dirname(logFile)) + if 'log_file' in fields: + info['log_file'] = os.path.basename(logFile) + # For most fields, a regexp is used to grep the log + for line in open(logFile, 'r').readlines(): + for field in fields: + field_details = get_field_details(field) + if 'regexp' in field_details and (info[field] == '' or ('match' in field_details and field_details['match'] == 'last')): + regexp = field_details['regexp'] + m = re.search(regexp, line) + if not m is None: + info[field] = m.group(1) + # Some field processing based on type + for field in info.keys(): + field_details = get_field_details(field) + if 'type' in field_details and field_details['type'] == 'file': + info[field] = os.path.basename(info[field]) + if 'type' in field_details and field_details['type'] in ['string', 'file']: + info[field] = '"' + info[field] + '"' + # If there is not at least a model_file, we assume something went wrong + if (info['model_file']): + return [info] + else: + return [] + +# Print info from a log, i.e. a list of fields, comma-separated +def print_info(info, fields): + values = [] + for field in fields: + values.append(info[field]) + print(','.join(values)) + +#================================================================================================== +# Main program +#================================================================================================== + +def printUsage(): + print("Usage: prism-log-extract ...") + +def signal_handler(signal, frame): + sys.exit(1) + +# Parse options +signal.signal(signal.SIGINT, signal_handler) +parser = OptionParser(usage="usage: %prog [options] args") +parser.add_option("--fields", dest="fields", metavar="X", default="", help="Fields to extract from the log (comma-separated)") +parser.add_option("--groupby", dest="groupby", metavar="X", default="", help="Group log entries by these fields") +parser.add_option("--groupkey", dest="groupkey", metavar="X", default="", help="Key used for uniqueness of grouped log entries") +parser.add_option("--non-recursive", action="store_true", dest="nonRec", default=False, help="Don't recurse into directories") +parser.add_option("--extension", dest="extension", metavar="ext", default="", help="Process files with name .ext") +(options, args) = parser.parse_args() +if len(args) < 1: + parser.print_help() + sys.exit(1) + +# Determine fields to be extracted +if options.fields: + fields = options.fields.split(',') + fields = expand_meta_fields(fields) + for field in fields: + if not field in all_fields: + print('Error: Unknown field "' + field + '" (valid fields are: ' + ', '.join(all_fields) + ')') + sys.exit(1) +# Default to all fields if none specified +else: + fields = []+all_fields +# print('Extracting fields: ' + ','.join(fields)) + +# Process grouping info +group_by = None +group_key = None +if options.groupby: + group_by = options.groupby + group_by = expand_meta_fields([group_by])[0] + # Check group_by fields are valid + if not group_by in all_fields: + print('Error: Unknown "group by" field "' + group_by + '" (valid fields are: ' + ', '.join(all_fields) + ')') + sys.exit(1) + # Use default group_key if not provided + group_key = options.groupkey.split(',') if options.groupkey else ['benchmark'] + group_key = expand_meta_fields(group_key) + # Check group_key fields are valid + for key in group_key: + if not key in all_fields: + print('Error: Unknown "group key" field "' + key + '" (valid fields are: ' + ', '.join(all_fields) + ')') + sys.exit(1) + if key in group_by: + print('Error: "group key" field "' + key + ' is already used in "group by"') + sys.exit(1) + # Add group by/key fields to overall list of fields to use + fields_new = [] + fields_new.extend([group_by]) + fields_new.extend(group_key) + fields_new.extend(x for x in fields if x not in fields_new) + fields = fields_new +# print('Group: By: ' + ','.join([group_by]) + ', Key: ' + ','.join(group_key)) + +# Extract chosen fields from all files/dirs +infos = grep_for_info(args, fields) + +# Group entries if requested +if group_by: + + # Get all values for group by/key + group_by_vals = set(map(lambda x: x[group_by], infos)) + group_key_vals = sorted(set(map(lambda info: '.'.join([info[key] for key in group_key if key in info]), infos))) + + # Modify list of fields for header + # Key fields shown once at the start; others are repeated and prefixed with group + fields_new = [] + fields_new += group_key + for group_val in group_by_vals: + group_val_trim = group_val.replace('"', '') + fields_new.extend([group_val_trim+':'+field for field in fields if field not in [group_by]+group_key]) + + # Iterate through each key/group value and find (at most 1) matching entry + infos_new = [] + for group_key_val in group_key_vals: + info_new = {} + # Get first matching entry and use to fill group key fields + first_info_match = next(filter(lambda info: group_key_val == '.'.join([info[key] for key in group_key if key in info]), infos)) + info_new.update({x: first_info_match[x] for x in group_key}) + # For each group + for group_val in group_by_vals: + group_val_trim = group_val.replace('"', '') + info_matches = [info for info in infos if (group_val == info[group_by] and group_key_val == '.'.join([info[key] for key in group_key if key in info]))] + # >1 match: error + if len(info_matches) > 1: + print('Error: multiple entries matching ' + group_key_val + ' in group ' + group_val) + sys.exit(1) + # 1 match: store field values with names prefixed with group + if len(info_matches) > 0: + info = info_matches[0] + info_new.update({group_val_trim+':'+field : val for field, val in info.items() if field not in [group_by]+group_key}) + # 0 matches: store empty field values with names prefixed with group + else: + info_new.update({group_val_trim+':'+field : "" for field in fields if field not in [group_by]+group_key}) + infos_new.append(info_new) + + fields = fields_new + infos = infos_new + +# Print entries (header, then rows) +print(','.join(fields)) +for info in infos: + print_info(info, fields)