| 1 | #!/usr/bin/env python3
|
|---|
| 2 |
|
|---|
| 3 | """
|
|---|
| 4 | This is a script to compare a directory or a file with the existing patterns in the DRB.
|
|---|
| 5 | It provides flags to pass a directory, a file, get maximum number of matches, and type
|
|---|
| 6 | check for the *.c and *.f95/*.F95 files.
|
|---|
| 7 | It extracts the #pragma omp or !$omp statements and compare for the longest common subsequence.
|
|---|
| 8 | It then evaluates the matching value between 0 and 1, and reports top n matches.
|
|---|
| 9 |
|
|---|
| 10 | Usage: python compareExistingPatterns.py [-f/-d] [filename_path/directory_path] -n [int] -t [c/f]
|
|---|
| 11 | """
|
|---|
| 12 |
|
|---|
| 13 | import argparse
|
|---|
| 14 | import gc
|
|---|
| 15 | import itertools
|
|---|
| 16 | import os
|
|---|
| 17 | import re
|
|---|
| 18 | import sys
|
|---|
| 19 | from collections import defaultdict
|
|---|
| 20 |
|
|---|
| 21 | import pylcs
|
|---|
| 22 |
|
|---|
| 23 | # This is the path for the benchmark programs
|
|---|
| 24 | DRB_PATH_FORTRAN = "../micro-benchmarks-fortran"
|
|---|
| 25 | DRB_PATH_C = "../micro-benchmarks"
|
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 | # Function to get patterns in a directory
|
|---|
| 29 | def getPatternsDir(filePathToCheck):
|
|---|
| 30 | sub_str = defaultdict(list)
|
|---|
| 31 | for file in os.listdir(filePathToCheck):
|
|---|
| 32 | if file.endswith(".F95") or file.endswith(".f95"):
|
|---|
| 33 | with open(os.path.join(filePathToCheck, file), 'r') as _f:
|
|---|
| 34 | temp_list = list()
|
|---|
| 35 | for line in _f:
|
|---|
| 36 | if re.findall(r'!\$[^\]]+', line):
|
|---|
| 37 | line = line.replace("!$", "")
|
|---|
| 38 | line = line.replace("omp", "")
|
|---|
| 39 | line = line.replace("end", "")
|
|---|
| 40 | temp_list.append(line.strip())
|
|---|
| 41 | sub_str[file].append(" ".join(temp_list))
|
|---|
| 42 | if file.endswith(".c"):
|
|---|
| 43 | with open(os.path.join(filePathToCheck, file), 'r') as _f:
|
|---|
| 44 | temp_list = list()
|
|---|
| 45 | for line in _f:
|
|---|
| 46 | if "#pragma omp" in line:
|
|---|
| 47 | line = line.replace("#pragma", "")
|
|---|
| 48 | line = line.replace("omp", "")
|
|---|
| 49 | line = line.replace("end", "")
|
|---|
| 50 | temp_list.append(line.strip())
|
|---|
| 51 | sub_str[file].append(" ".join(temp_list))
|
|---|
| 52 |
|
|---|
| 53 | return (sub_str)
|
|---|
| 54 |
|
|---|
| 55 |
|
|---|
| 56 | # Function to get patterns in a file
|
|---|
| 57 | def getPatternsFile(filename):
|
|---|
| 58 | sub_str = defaultdict(list)
|
|---|
| 59 | temp_list = list()
|
|---|
| 60 | with open(filename, 'r') as _f:
|
|---|
| 61 | for line in _f:
|
|---|
| 62 | if re.findall(r'!\$[^\]]+', line):
|
|---|
| 63 | line = line.replace("!$", "")
|
|---|
| 64 | line = line.replace("omp", "")
|
|---|
| 65 | line = line.replace("end", "")
|
|---|
| 66 | filename = filename.split('/')[-1]
|
|---|
| 67 | temp_list.append(line.strip())
|
|---|
| 68 | if "#pragma omp" in line:
|
|---|
| 69 | line = line.replace("#pragma", "")
|
|---|
| 70 | line = line.replace("omp", "")
|
|---|
| 71 | line = line.replace("end", "")
|
|---|
| 72 | filename = filename.split('/')[-1]
|
|---|
| 73 | temp_list.append(line.strip())
|
|---|
| 74 |
|
|---|
| 75 | sub_str[filename].append(" ".join(temp_list))
|
|---|
| 76 | return sub_str
|
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 | # Get the percentage similarity with the existing benchmark
|
|---|
| 80 | def checkPatternSimilarity(parentPatternsDict, toCheckDict):
|
|---|
| 81 | if not toCheckDict:
|
|---|
| 82 | print("The new program doest not contain OpenMP Code")
|
|---|
| 83 | return -1
|
|---|
| 84 |
|
|---|
| 85 | _res = defaultdict(dict)
|
|---|
| 86 |
|
|---|
| 87 | for toCheckKey, toCheckValue in toCheckDict.items():
|
|---|
| 88 | for parKey, parValue in parentPatternsDict.items():
|
|---|
| 89 | lcs_len = pylcs.lcs(toCheckValue[0], parValue[0])
|
|---|
| 90 | lcs_perc = lcs_len / len(toCheckValue[0])
|
|---|
| 91 | _res[toCheckKey][parKey] = lcs_perc
|
|---|
| 92 |
|
|---|
| 93 | return _res
|
|---|
| 94 |
|
|---|
| 95 |
|
|---|
| 96 | # Get the top n matching programs
|
|---|
| 97 | def top_nmatch(dict, num):
|
|---|
| 98 | _res = {}
|
|---|
| 99 | for k, v in dict.items():
|
|---|
| 100 | _temp = [(kc, vc) for kc, vc in sorted(v.items(), key=lambda item: item[1], reverse=True)][:num]
|
|---|
| 101 | _res[k] = _temp
|
|---|
| 102 |
|
|---|
| 103 | return _res
|
|---|
| 104 |
|
|---|
| 105 |
|
|---|
| 106 | # main function starts here
|
|---|
| 107 | if __name__ == "__main__":
|
|---|
| 108 | parser = argparse.ArgumentParser(description="Compare pattern with the existing benchmark files."
|
|---|
| 109 | "One of these two optional arguments is required")
|
|---|
| 110 | parser.add_argument("--dirpath", "-d", help="pass the dir path to compare", type=str, action='store')
|
|---|
| 111 | parser.add_argument("--filepath", "-f", help="pass the file path to compare", type=str, action='store')
|
|---|
| 112 | parser.add_argument("--nmatch", "-n", help="pass the num of matches to print", type=int, action='store')
|
|---|
| 113 | parser.add_argument("--filetype", "-t", help="pass the type of file", type=str, action='store')
|
|---|
| 114 | args = parser.parse_args()
|
|---|
| 115 |
|
|---|
| 116 | if not args.dirpath and not args.filepath:
|
|---|
| 117 | parser.print_help()
|
|---|
| 118 |
|
|---|
| 119 | try:
|
|---|
| 120 | if args.filetype == "c":
|
|---|
| 121 | _getParentPatternsDir = getPatternsDir(DRB_PATH_C)
|
|---|
| 122 | elif args.filetype == "f":
|
|---|
| 123 | _getParentPatternsDir = getPatternsDir(DRB_PATH_FORTRAN)
|
|---|
| 124 | else:
|
|---|
| 125 | print("Please provide either c/f as type -t")
|
|---|
| 126 | exit(1)
|
|---|
| 127 |
|
|---|
| 128 | if args.dirpath:
|
|---|
| 129 | _getPatternsDir = getPatternsDir(args.dirpath)
|
|---|
| 130 | res = checkPatternSimilarity(_getParentPatternsDir, _getPatternsDir)
|
|---|
| 131 |
|
|---|
| 132 | if args.filepath:
|
|---|
| 133 | _getPatternsFile = getPatternsFile(args.filepath)
|
|---|
| 134 | res = checkPatternSimilarity(_getParentPatternsDir, _getPatternsFile)
|
|---|
| 135 |
|
|---|
| 136 | if res == -1:
|
|---|
| 137 | exit(1)
|
|---|
| 138 |
|
|---|
| 139 | top_n_match = top_nmatch(res, args.nmatch)
|
|---|
| 140 | print(top_n_match)
|
|---|
| 141 | except:
|
|---|
| 142 | print("Error occurred. Check the log for more details.")
|
|---|
| 143 | else:
|
|---|
| 144 | print("Completed Successfully!")
|
|---|
| 145 | finally:
|
|---|
| 146 | gc.collect()
|
|---|