4"""Classes that draw conclusions out of a comparison and represent them."""
6from collections
import Counter
8FORMAT_RED =
'\033[01;31m{0}\033[00m'
9FORMAT_GREEN =
'\033[01;32m{0}\033[00m'
10FORMAT_MAGENTA =
'\033[01;35m{0}\033[00m'
11FORMAT_CYAN =
'\033[01;36m{0}\033[00m'
14RATING_FAILURE =
'failure'
15RATING_REGRESSION =
'regression'
16RATING_IMPROVEMENT =
'improvement'
17RATING_NO_CHANGE =
'no_change'
18RATING_SMALL_CHANGE =
'small_change'
21 RATING_FAILURE, RATING_REGRESSION, RATING_IMPROVEMENT, RATING_NO_CHANGE,
26 RATING_FAILURE: FORMAT_MAGENTA,
27 RATING_REGRESSION: FORMAT_RED,
28 RATING_IMPROVEMENT: FORMAT_CYAN,
29 RATING_NO_CHANGE: FORMAT_GREEN,
30 RATING_SMALL_CHANGE: FORMAT_NORMAL,
35 """All conclusions drawn from a comparison.
37 This is initialized empty and then processes pairs of results for each test
38 case, determining the rating for that case, which can be:
39 "failure" if either or both runs for the case failed.
40 "regression" if there is a significant increase in time for the test case.
41 "improvement" if there is a significant decrease in time for the test case.
42 "no_change" if the time for the test case did not change at all.
43 "small_change" if the time for the test case changed but within the threshold.
47 """Initializes an empty ComparisonConclusions.
50 threshold_significant: Float with the tolerance beyond which changes in
51 measurements are considered significant.
53 The change is considered as a multiplication rather than an addition
54 of a fraction of the previous measurement, that is, a
55 threshold_significant of 1.0 will flag test cases that became over
56 100% slower (> 200% of the previous time measured) or over 100% faster
57 (< 50% of the previous time measured).
59 threshold_significant 0.02 -> 98.04% to 102% is not significant
60 threshold_significant 0.1 -> 90.9% to 110% is not significant
61 threshold_significant 0.25 -> 80% to 125% is not significant
62 threshold_significant 1 -> 50% to 200% is not significant
63 threshold_significant 4 -> 20% to 500% is not significant
69 self.
params = {
'threshold': threshold_significant}
74 """Feeds a test case results to the ComparisonConclusions.
77 case_name: String identifying the case.
78 before: Measurement for the "before" version of the code.
79 after: Measurement for the "after" version of the code.
89 if not before
or not after:
91 rating = RATING_FAILURE
93 ratio = (float(after) / before) - 1.0
95 rating = RATING_REGRESSION
97 rating = RATING_IMPROVEMENT
99 rating = RATING_NO_CHANGE
101 rating = RATING_SMALL_CHANGE
103 case_result =
CaseResult(case_name, before, after, ratio, rating)
105 self.
summary.ProcessCaseResult(case_result)
109 """Gets the ComparisonSummary with consolidated totals."""
113 """Gets a dict mapping each test case identifier to its CaseResult."""
117 """Returns a conclusions dict with all the conclusions drawn.
120 A serializable dict with the format illustrated below:
134 "comparison_by_case": {
135 "testing/resources/new_test.pdf": {
141 "testing/resources/test1.pdf": {
145 "rating": "regression"
147 "testing/resources/test2.pdf": {
151 "rating": "regression"
153 "testing/resources/test3.pdf": {
157 "rating": "small_change"
159 "testing/resources/test4.pdf": {
163 "rating": "no_change"
165 "testing/resources/test5.pdf": {
169 "rating": "improvement"
175 output_dict[
'version'] = 1
178 output_dict[
'comparison_by_case'] = {
179 cr.case_name.decode(
'utf-8'): cr.GetOutputDict()
186 """Totals computed for a comparison."""
195 """Gets the number of test cases processed."""
199 """Gets the number of test cases processed with a given rating."""
203 """Returns a dict that can be serialized with all the totals."""
205 for rating
in RATINGS:
206 result[rating] = self.
GetCount(rating)
211 """The conclusion for the comparison of a single test case."""
213 def __init__(self, case_name, before, after, ratio, rating):
214 """Initializes an empty ComparisonConclusions.
217 case_name: String identifying the case.
218 before: Measurement for the "before" version of the code.
219 after: Measurement for the "after" version of the code.
220 ratio: Difference between |after| and |before| as a fraction of |before|.
221 rating: Rating for this test case.
230 """Returns a dict with the test case's conclusions."""
240 """Prints a conclusions dict in a human-readable way.
243 conclusions_dict: Dict to print.
244 colored: Whether to color the output to highlight significant changes.
245 key: String with the CaseResult dictionary key to sort the cases.
249 print(
'{0:>11s} {1:>15s} {2}'.format(
'% Change',
'Time after',
'Test case'))
252 color = FORMAT_NORMAL
257 conclusions_dict[
'comparison_by_case'].iteritems(),
258 key=
lambda kv: kv[1][key])
260 case_pairs = sorted(conclusions_dict[
'comparison_by_case'].iteritems())
262 for case_name, case_dict
in case_pairs:
264 color = RATING_TO_COLOR[case_dict[
'rating']]
266 if case_dict[
'rating'] == RATING_FAILURE:
267 print(
u'{} to measure time for {}'.format(
268 color.format(
'Failed'), case_name).
encode(
'utf-8'))
271 print(
u'{0} {1:15,d} {2}'.format(
272 color.format(
'{:+11.4%}'.format(case_dict[
'ratio'])),
273 case_dict[
'after'], case_name).
encode(
'utf-8'))
276 totals = conclusions_dict[
'summary']
278 print(
'Test cases run: %d' % totals[
'total'])
281 color = FORMAT_MAGENTA
if totals[RATING_FAILURE]
else FORMAT_GREEN
282 print(
'Failed to measure: %s' % color.format(totals[RATING_FAILURE]))
285 color = FORMAT_RED
if totals[RATING_REGRESSION]
else FORMAT_GREEN
286 print(
'Regressions: %s' % color.format(totals[RATING_REGRESSION]))
289 color = FORMAT_CYAN
if totals[RATING_IMPROVEMENT]
else FORMAT_GREEN
290 print(
'Improvements: %s' % color.format(totals[RATING_IMPROVEMENT]))
QDebug print(QDebug debug, QSslError::SslError error)