source: CIVL/examples/omp/dataracebench-1.3.2/scripts/compareWithExistingPatterns.py@ a389857

main test-branch
Last change on this file since a389857 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 5.2 KB
Line 
1#!/usr/bin/env python3
2
3"""
4This is a script to compare a directory or a file with the existing patterns in the DRB.
5It provides flags to pass a directory, a file, get maximum number of matches, and type
6check for the *.c and *.f95/*.F95 files.
7It extracts the #pragma omp or !$omp statements and compare for the longest common subsequence.
8It then evaluates the matching value between 0 and 1, and reports top n matches.
9
10Usage: python compareExistingPatterns.py [-f/-d] [filename_path/directory_path] -n [int] -t [c/f]
11"""
12
13import argparse
14import gc
15import itertools
16import os
17import re
18import sys
19from collections import defaultdict
20
21import pylcs
22
23# This is the path for the benchmark programs
24DRB_PATH_FORTRAN = "../micro-benchmarks-fortran"
25DRB_PATH_C = "../micro-benchmarks"
26
27
28# Function to get patterns in a directory
29def getPatternsDir(filePathToCheck):
30 sub_str = defaultdict(list)
31 for file in os.listdir(filePathToCheck):
32 if file.endswith(".F95") or file.endswith(".f95"):
33 with open(os.path.join(filePathToCheck, file), 'r') as _f:
34 temp_list = list()
35 for line in _f:
36 if re.findall(r'!\$[^\]]+', line):
37 line = line.replace("!$", "")
38 line = line.replace("omp", "")
39 line = line.replace("end", "")
40 temp_list.append(line.strip())
41 sub_str[file].append(" ".join(temp_list))
42 if file.endswith(".c"):
43 with open(os.path.join(filePathToCheck, file), 'r') as _f:
44 temp_list = list()
45 for line in _f:
46 if "#pragma omp" in line:
47 line = line.replace("#pragma", "")
48 line = line.replace("omp", "")
49 line = line.replace("end", "")
50 temp_list.append(line.strip())
51 sub_str[file].append(" ".join(temp_list))
52
53 return (sub_str)
54
55
56# Function to get patterns in a file
57def getPatternsFile(filename):
58 sub_str = defaultdict(list)
59 temp_list = list()
60 with open(filename, 'r') as _f:
61 for line in _f:
62 if re.findall(r'!\$[^\]]+', line):
63 line = line.replace("!$", "")
64 line = line.replace("omp", "")
65 line = line.replace("end", "")
66 filename = filename.split('/')[-1]
67 temp_list.append(line.strip())
68 if "#pragma omp" in line:
69 line = line.replace("#pragma", "")
70 line = line.replace("omp", "")
71 line = line.replace("end", "")
72 filename = filename.split('/')[-1]
73 temp_list.append(line.strip())
74
75 sub_str[filename].append(" ".join(temp_list))
76 return sub_str
77
78
79# Get the percentage similarity with the existing benchmark
80def checkPatternSimilarity(parentPatternsDict, toCheckDict):
81 if not toCheckDict:
82 print("The new program doest not contain OpenMP Code")
83 return -1
84
85 _res = defaultdict(dict)
86
87 for toCheckKey, toCheckValue in toCheckDict.items():
88 for parKey, parValue in parentPatternsDict.items():
89 lcs_len = pylcs.lcs(toCheckValue[0], parValue[0])
90 lcs_perc = lcs_len / len(toCheckValue[0])
91 _res[toCheckKey][parKey] = lcs_perc
92
93 return _res
94
95
96# Get the top n matching programs
97def top_nmatch(dict, num):
98 _res = {}
99 for k, v in dict.items():
100 _temp = [(kc, vc) for kc, vc in sorted(v.items(), key=lambda item: item[1], reverse=True)][:num]
101 _res[k] = _temp
102
103 return _res
104
105
106# main function starts here
107if __name__ == "__main__":
108 parser = argparse.ArgumentParser(description="Compare pattern with the existing benchmark files."
109 "One of these two optional arguments is required")
110 parser.add_argument("--dirpath", "-d", help="pass the dir path to compare", type=str, action='store')
111 parser.add_argument("--filepath", "-f", help="pass the file path to compare", type=str, action='store')
112 parser.add_argument("--nmatch", "-n", help="pass the num of matches to print", type=int, action='store')
113 parser.add_argument("--filetype", "-t", help="pass the type of file", type=str, action='store')
114 args = parser.parse_args()
115
116 if not args.dirpath and not args.filepath:
117 parser.print_help()
118
119 try:
120 if args.filetype == "c":
121 _getParentPatternsDir = getPatternsDir(DRB_PATH_C)
122 elif args.filetype == "f":
123 _getParentPatternsDir = getPatternsDir(DRB_PATH_FORTRAN)
124 else:
125 print("Please provide either c/f as type -t")
126 exit(1)
127
128 if args.dirpath:
129 _getPatternsDir = getPatternsDir(args.dirpath)
130 res = checkPatternSimilarity(_getParentPatternsDir, _getPatternsDir)
131
132 if args.filepath:
133 _getPatternsFile = getPatternsFile(args.filepath)
134 res = checkPatternSimilarity(_getParentPatternsDir, _getPatternsFile)
135
136 if res == -1:
137 exit(1)
138
139 top_n_match = top_nmatch(res, args.nmatch)
140 print(top_n_match)
141 except:
142 print("Error occurred. Check the log for more details.")
143 else:
144 print("Completed Successfully!")
145 finally:
146 gc.collect()
Note: See TracBrowser for help on using the repository browser.