Browse Source

New prism-log-extract script for processing PRISM log files.

The prism-log-extract script extracts and collates info from a
collection of PRISM log files.

The basic usage is "prism-log-extract <targets>" where <targets>
is one or more log files or directories containing log files.
The default behaviour is to extract all known fields from all logs
and then print the resulting table of values in CSV format.

Run "prism-log-extract --help" for details of further options.
accumulation-v4.7
Dave Parker 6 years ago
parent
commit
85a51698a4
  1. 249
      prism/etc/scripts/prism-log-extract

249
prism/etc/scripts/prism-log-extract

@ -0,0 +1,249 @@
#!/usr/bin/env python
# The prism-log-extract script extracts and collates info from a
# collection of PRISM log files.
# The basic usage is "prism-log-extract <targets>" where <targets>
# is one or more log files or directories containing log files.
# The default behaviour is to extract all known fields from all logs
# and then print the resulting table of values in CSV format.
# Run "prism-log-extract --help" for details of further options.
import os,sys,re,signal
from optparse import OptionParser
#==================================================================================================
# Global variables
#==================================================================================================
# Details of all the fields that can be extracted from logs
all_fields_details = [\
{'name': 'log_dir', 'type': 'string'}, \
{'name': 'log_file', 'type': 'string'}, \
{'name': 'model_file', 'type': 'file', 'regexp': 'Parsing model file "(.+)"...'}, \
{'name': 'model_consts', 'type': 'string', 'regexp': 'Model constants: (.+)'}, \
{'name': 'model_type', 'regexp': 'Type: *(.+)'}, \
{'name': 'states', 'regexp': 'States: *(.+) \((.+) initial\)'}, \
{'name': 'time_constr', 'regexp': 'Time for model construction: *(.+) sec'}, \
{'name': 'prop_file', 'type': 'file', 'regexp': 'Parsing properties file "(.+)"...'}, \
{'name': 'prop_consts', 'type': 'string', 'regexp': 'Property constants: (.+)'}, \
{'name': 'iters_check', 'regexp': 'took ([^ \n]+) iterations', 'match': 'last'}, \
{'name': 'time_check', 'regexp': 'Time for model checking: *(.+) sec'}, \
{'name': 'result', 'regexp': '^Result.*: ([^( \n]+)'}, \
]
# Names of all fields
all_fields = list(map(lambda x: x['name'], all_fields_details))
# Meta-fields
meta_fields = {\
'all': all_fields, \
'model': ['model_file', 'model_consts'], \
'prop': ['prop_file', 'prop_consts'], \
'benchmark' : ['model_file', 'model_consts', 'prop_file', 'prop_consts'], \
}
#==================================================================================================
# Utility functions
#==================================================================================================
# Returns a sorted list of files / directories in dir
def sorted_list_dir(dir):
list = os.listdir(dir);
list.sort()
return list
#==================================================================================================
# Functions
#==================================================================================================
# Takes a list of field names, including "meta-fields" (e.g. 'model'
# is shorthand for 'model_file','model_consts') and expands the meta-fields
def expand_meta_fields(fields):
fields_expanded = []
for field in fields:
fields_expanded.extend(meta_fields[field] if field in meta_fields else [field])
return fields_expanded
# Get the details of a field
def get_field_details(field):
return next(filter(lambda x: x['name'] == field, all_fields_details))
# Extract info from a list of files/directories
def grep_for_info(fileOrDirs, fields):
infos = []
for fileOrDir in fileOrDirs:
infos += grep_for_info_file_or_dir(fileOrDir, fields)
return infos
# Extract info from a single file/directory (recurse unless asked not to)
def grep_for_info_file_or_dir(fileOrDir, fields):
infos = []
if os.path.isdir(fileOrDir):
for file in [file for file in sorted_list_dir(fileOrDir) if not file in [".","..",".svn"]]:
if os.path.isdir(os.path.join(fileOrDir, file)):
if not options.nonRec:
infos += grep_for_info_file_or_dir(os.path.join(fileOrDir, file), fields)
else:
infos += grep_for_info_file(os.path.join(fileOrDir, file), fields)
else:
infos += grep_for_info_file(fileOrDir, fields)
return infos
# Extract info from a log file
def grep_for_info_file(logFile, fields):
if options.extension and not logFile.endswith('.'+options.extension):
return []
info = {}
# Initialise all fields
for field in fields:
info[field] = ''
# For some fields, there is a specific way to define them
if 'log_dir' in fields:
info['log_dir'] = os.path.basename(os.path.dirname(logFile))
if 'log_file' in fields:
info['log_file'] = os.path.basename(logFile)
# For most fields, a regexp is used to grep the log
for line in open(logFile, 'r').readlines():
for field in fields:
field_details = get_field_details(field)
if 'regexp' in field_details and (info[field] == '' or ('match' in field_details and field_details['match'] == 'last')):
regexp = field_details['regexp']
m = re.search(regexp, line)
if not m is None:
info[field] = m.group(1)
# Some field processing based on type
for field in info.keys():
field_details = get_field_details(field)
if 'type' in field_details and field_details['type'] == 'file':
info[field] = os.path.basename(info[field])
if 'type' in field_details and field_details['type'] in ['string', 'file']:
info[field] = '"' + info[field] + '"'
# If there is not at least a model_file, we assume something went wrong
if (info['model_file']):
return [info]
else:
return []
# Print info from a log, i.e. a list of fields, comma-separated
def print_info(info, fields):
values = []
for field in fields:
values.append(info[field])
print(','.join(values))
#==================================================================================================
# Main program
#==================================================================================================
def printUsage():
print("Usage: prism-log-extract ...")
def signal_handler(signal, frame):
sys.exit(1)
# Parse options
signal.signal(signal.SIGINT, signal_handler)
parser = OptionParser(usage="usage: %prog [options] args")
parser.add_option("--fields", dest="fields", metavar="X", default="", help="Fields to extract from the log (comma-separated)")
parser.add_option("--groupby", dest="groupby", metavar="X", default="", help="Group log entries by these fields")
parser.add_option("--groupkey", dest="groupkey", metavar="X", default="", help="Key used for uniqueness of grouped log entries")
parser.add_option("--non-recursive", action="store_true", dest="nonRec", default=False, help="Don't recurse into directories")
parser.add_option("--extension", dest="extension", metavar="ext", default="", help="Process files with name .ext")
(options, args) = parser.parse_args()
if len(args) < 1:
parser.print_help()
sys.exit(1)
# Determine fields to be extracted
if options.fields:
fields = options.fields.split(',')
fields = expand_meta_fields(fields)
for field in fields:
if not field in all_fields:
print('Error: Unknown field "' + field + '" (valid fields are: ' + ', '.join(all_fields) + ')')
sys.exit(1)
# Default to all fields if none specified
else:
fields = []+all_fields
# print('Extracting fields: ' + ','.join(fields))
# Process grouping info
group_by = None
group_key = None
if options.groupby:
group_by = options.groupby
group_by = expand_meta_fields([group_by])[0]
# Check group_by fields are valid
if not group_by in all_fields:
print('Error: Unknown "group by" field "' + group_by + '" (valid fields are: ' + ', '.join(all_fields) + ')')
sys.exit(1)
# Use default group_key if not provided
group_key = options.groupkey.split(',') if options.groupkey else ['benchmark']
group_key = expand_meta_fields(group_key)
# Check group_key fields are valid
for key in group_key:
if not key in all_fields:
print('Error: Unknown "group key" field "' + key + '" (valid fields are: ' + ', '.join(all_fields) + ')')
sys.exit(1)
if key in group_by:
print('Error: "group key" field "' + key + ' is already used in "group by"')
sys.exit(1)
# Add group by/key fields to overall list of fields to use
fields_new = []
fields_new.extend([group_by])
fields_new.extend(group_key)
fields_new.extend(x for x in fields if x not in fields_new)
fields = fields_new
# print('Group: By: ' + ','.join([group_by]) + ', Key: ' + ','.join(group_key))
# Extract chosen fields from all files/dirs
infos = grep_for_info(args, fields)
# Group entries if requested
if group_by:
# Get all values for group by/key
group_by_vals = set(map(lambda x: x[group_by], infos))
group_key_vals = sorted(set(map(lambda info: '.'.join([info[key] for key in group_key if key in info]), infos)))
# Modify list of fields for header
# Key fields shown once at the start; others are repeated and prefixed with group
fields_new = []
fields_new += group_key
for group_val in group_by_vals:
group_val_trim = group_val.replace('"', '')
fields_new.extend([group_val_trim+':'+field for field in fields if field not in [group_by]+group_key])
# Iterate through each key/group value and find (at most 1) matching entry
infos_new = []
for group_key_val in group_key_vals:
info_new = {}
# Get first matching entry and use to fill group key fields
first_info_match = next(filter(lambda info: group_key_val == '.'.join([info[key] for key in group_key if key in info]), infos))
info_new.update({x: first_info_match[x] for x in group_key})
# For each group
for group_val in group_by_vals:
group_val_trim = group_val.replace('"', '')
info_matches = [info for info in infos if (group_val == info[group_by] and group_key_val == '.'.join([info[key] for key in group_key if key in info]))]
# >1 match: error
if len(info_matches) > 1:
print('Error: multiple entries matching ' + group_key_val + ' in group ' + group_val)
sys.exit(1)
# 1 match: store field values with names prefixed with group
if len(info_matches) > 0:
info = info_matches[0]
info_new.update({group_val_trim+':'+field : val for field, val in info.items() if field not in [group_by]+group_key})
# 0 matches: store empty field values with names prefixed with group
else:
info_new.update({group_val_trim+':'+field : "" for field in fields if field not in [group_by]+group_key})
infos_new.append(info_new)
fields = fields_new
infos = infos_new
# Print entries (header, then rows)
print(','.join(fields))
for info in infos:
print_info(info, fields)
Loading…
Cancel
Save