New prism-log-extract script for processing PRISM log files.

The prism-log-extract script extracts and collates info from a collection of PRISM log files. The basic usage is "prism-log-extract <targets>" where <targets> is one or more log files or directories containing log files. The default behaviour is to extract all known fields from all logs and then print the resulting table of values in CSV format. Run "prism-log-extract --help" for details of further options.
6 years ago · 85a51698a4
1 changed files with 249 additions and 0 deletions
--- a/prism/etc/scripts/prism-log-extract
+++ b/prism/etc/scripts/prism-log-extract
@ -0,0 +1,249 @@
+#!/usr/bin/env python
+
+# The prism-log-extract script extracts and collates info from a
+# collection of PRISM log files.
+
+# The basic usage is "prism-log-extract <targets>" where <targets>
+# is one or more log files or directories containing log files.
+# The default behaviour is to extract all known fields from all logs
+# and then print the resulting table of values in CSV format.
+
+# Run "prism-log-extract --help" for details of further options.
+
+import os,sys,re,signal
+from optparse import OptionParser
+
+#==================================================================================================
+# Global variables
+#==================================================================================================
+
+# Details of all the fields that can be extracted from logs
+all_fields_details = [\
+    {'name': 'log_dir', 'type': 'string'}, \
+    {'name': 'log_file', 'type': 'string'}, \
+    {'name': 'model_file', 'type': 'file', 'regexp': 'Parsing model file "(.+)"...'}, \
+    {'name': 'model_consts', 'type': 'string', 'regexp': 'Model constants: (.+)'}, \
+    {'name': 'model_type', 'regexp': 'Type: *(.+)'}, \
+    {'name': 'states', 'regexp': 'States: *(.+) \((.+) initial\)'}, \
+    {'name': 'time_constr', 'regexp': 'Time for model construction: *(.+) sec'}, \
+    {'name': 'prop_file', 'type': 'file', 'regexp': 'Parsing properties file "(.+)"...'}, \
+    {'name': 'prop_consts', 'type': 'string', 'regexp': 'Property constants: (.+)'}, \
+    {'name': 'iters_check', 'regexp': 'took ([^ \n]+) iterations', 'match': 'last'}, \
+    {'name': 'time_check', 'regexp': 'Time for model checking: *(.+) sec'}, \
+    {'name': 'result', 'regexp': '^Result.*: ([^( \n]+)'}, \
+]
+
+# Names of all fields
+all_fields = list(map(lambda x: x['name'], all_fields_details))
+
+# Meta-fields
+meta_fields = {\
+    'all': all_fields, \
+    'model': ['model_file', 'model_consts'], \
+    'prop': ['prop_file', 'prop_consts'], \
+    'benchmark' : ['model_file', 'model_consts', 'prop_file', 'prop_consts'], \
+}
+
+#==================================================================================================
+# Utility functions
+#==================================================================================================
+
+# Returns a sorted list of files / directories in dir
+def sorted_list_dir(dir):
+    list = os.listdir(dir);
+    list.sort()
+    return list
+
+#==================================================================================================
+# Functions
+#==================================================================================================
+
+# Takes a list of field names, including "meta-fields" (e.g. 'model'
+# is shorthand for 'model_file','model_consts') and expands the meta-fields
+def expand_meta_fields(fields):
+    fields_expanded = []
+    for field in fields:
+        fields_expanded.extend(meta_fields[field] if field in meta_fields else [field])
+    return fields_expanded
+
+# Get the details of a field
+def get_field_details(field):
+    return next(filter(lambda x: x['name'] == field, all_fields_details))
+
+# Extract info from a list of files/directories
+def grep_for_info(fileOrDirs, fields):
+    infos = []
+    for fileOrDir in fileOrDirs:
+        infos += grep_for_info_file_or_dir(fileOrDir, fields)
+    return infos
+
+# Extract info from a single file/directory (recurse unless asked not to)
+def grep_for_info_file_or_dir(fileOrDir, fields):
+    infos = []
+    if os.path.isdir(fileOrDir):
+        for file in [file for file in sorted_list_dir(fileOrDir) if not file in [".","..",".svn"]]:
+            if os.path.isdir(os.path.join(fileOrDir, file)):
+                if not options.nonRec:
+                    infos += grep_for_info_file_or_dir(os.path.join(fileOrDir, file), fields)
+            else:
+                infos += grep_for_info_file(os.path.join(fileOrDir, file), fields)
+    else:
+        infos += grep_for_info_file(fileOrDir, fields)
+    return infos
+
+# Extract info from a log file
+def grep_for_info_file(logFile, fields):
+    if options.extension and not logFile.endswith('.'+options.extension):
+        return []
+    info = {}
+    # Initialise all fields
+    for field in fields:
+        info[field] = ''
+    # For some fields, there is a specific way to define them
+    if 'log_dir' in fields:
+        info['log_dir'] = os.path.basename(os.path.dirname(logFile))
+    if 'log_file' in fields:
+        info['log_file'] = os.path.basename(logFile)
+    # For most fields, a regexp is used to grep the log
+    for line in open(logFile, 'r').readlines():
+        for field in fields:
+            field_details = get_field_details(field)
+            if 'regexp' in field_details and (info[field] == '' or ('match' in field_details and field_details['match'] == 'last')):
+                regexp = field_details['regexp']
+                m = re.search(regexp, line)
+                if not m is None:
+                    info[field] = m.group(1)
+    # Some field processing based on type
+    for field in info.keys():
+        field_details = get_field_details(field)
+        if 'type' in field_details and field_details['type'] == 'file':
+            info[field] = os.path.basename(info[field])
+        if 'type' in field_details and field_details['type'] in ['string', 'file']:
+            info[field] = '"' + info[field] + '"'
+    # If there is not at least a model_file, we assume something went wrong
+    if (info['model_file']):
+        return [info]
+    else:
+        return []
+
+# Print info from a log, i.e. a list of fields, comma-separated
+def print_info(info, fields):
+    values = []
+    for field in fields:
+        values.append(info[field])
+    print(','.join(values))
+
+#==================================================================================================
+# Main program
+#==================================================================================================
+
+def printUsage():
+    print("Usage: prism-log-extract ...")
+
+def signal_handler(signal, frame):
+    sys.exit(1)
+
+# Parse options
+signal.signal(signal.SIGINT, signal_handler)
+parser = OptionParser(usage="usage: %prog [options] args")
+parser.add_option("--fields", dest="fields", metavar="X", default="", help="Fields to extract from the log (comma-separated)")
+parser.add_option("--groupby", dest="groupby", metavar="X", default="", help="Group log entries by these fields")
+parser.add_option("--groupkey", dest="groupkey", metavar="X", default="", help="Key used for uniqueness of grouped log entries")
+parser.add_option("--non-recursive", action="store_true", dest="nonRec", default=False, help="Don't recurse into directories")
+parser.add_option("--extension", dest="extension", metavar="ext", default="", help="Process files with name .ext")
+(options, args) = parser.parse_args()
+if len(args) < 1:
+    parser.print_help()
+    sys.exit(1)
+
+# Determine fields to be extracted
+if options.fields:
+    fields = options.fields.split(',')
+    fields = expand_meta_fields(fields)
+    for field in fields:
+        if not field in all_fields:
+            print('Error: Unknown field "' + field + '" (valid fields are: ' + ', '.join(all_fields) + ')')
+            sys.exit(1)
+# Default to all fields if none specified
+else:
+    fields = []+all_fields
+# print('Extracting fields: ' + ','.join(fields))
+
+# Process grouping info
+group_by = None
+group_key = None
+if options.groupby:
+    group_by = options.groupby
+    group_by = expand_meta_fields([group_by])[0]
+    # Check group_by fields are valid
+    if not group_by in all_fields:
+        print('Error: Unknown "group by" field "' + group_by + '" (valid fields are: ' + ', '.join(all_fields) + ')')
+        sys.exit(1)
+    # Use default group_key if not provided
+    group_key = options.groupkey.split(',') if options.groupkey else ['benchmark']
+    group_key = expand_meta_fields(group_key)
+    # Check group_key fields are valid
+    for key in group_key:
+        if not key in all_fields:
+            print('Error: Unknown "group key" field "' + key + '" (valid fields are: ' + ', '.join(all_fields) + ')')
+            sys.exit(1)
+        if key in group_by:
+            print('Error: "group key" field "' + key + ' is already used in "group by"')
+            sys.exit(1)
+    # Add group by/key fields to overall list of fields to use
+    fields_new = []
+    fields_new.extend([group_by])
+    fields_new.extend(group_key)
+    fields_new.extend(x for x in fields if x not in fields_new)
+    fields = fields_new
+# print('Group: By: ' + ','.join([group_by]) + ', Key: ' + ','.join(group_key))
+   
+# Extract chosen fields from all files/dirs
+infos = grep_for_info(args, fields)
+
+# Group entries if requested
+if group_by:
+
+    # Get all values for group by/key
+    group_by_vals = set(map(lambda x: x[group_by], infos))
+    group_key_vals = sorted(set(map(lambda info: '.'.join([info[key] for key in group_key if key in info]), infos)))
+    
+    # Modify list of fields for header
+    # Key fields shown once at the start; others are repeated and prefixed with group
+    fields_new = []
+    fields_new += group_key
+    for group_val in group_by_vals:
+        group_val_trim = group_val.replace('"', '')
+        fields_new.extend([group_val_trim+':'+field for field in fields if field not in [group_by]+group_key])
+
+    # Iterate through each key/group value and find (at most 1) matching entry
+    infos_new = []
+    for group_key_val in group_key_vals:
+        info_new = {}
+        # Get first matching entry and use to fill group key fields
+        first_info_match = next(filter(lambda info: group_key_val == '.'.join([info[key] for key in group_key if key in info]), infos))
+        info_new.update({x: first_info_match[x] for x in group_key})
+        # For each group
+        for group_val in group_by_vals:
+            group_val_trim = group_val.replace('"', '')
+            info_matches = [info for info in infos if (group_val == info[group_by] and group_key_val == '.'.join([info[key] for key in group_key if key in info]))]
+            # >1 match: error
+            if len(info_matches) > 1:
+                print('Error: multiple entries matching ' + group_key_val + ' in group ' + group_val)
+                sys.exit(1)
+            # 1 match: store field values with names prefixed with group
+            if len(info_matches) > 0:
+                info = info_matches[0]
+                info_new.update({group_val_trim+':'+field : val for field, val in info.items() if field not in [group_by]+group_key})
+            # 0 matches: store empty field values with names prefixed with group
+            else:
+                info_new.update({group_val_trim+':'+field : "" for field in fields if field not in [group_by]+group_key})
+        infos_new.append(info_new)
+    
+    fields = fields_new
+    infos = infos_new
+
+# Print entries (header, then rows)
+print(','.join(fields))
+for info in infos:
+    print_info(info, fields)