blob: bf005df3faf62901842bbd2bde055dbe1f664f57 [file] [log] [blame]
Daniel Corbette2370302020-10-22 11:19:55 -04001#!/usr/bin/python3
2#
3# Release estimator for HAProxy
4#
5# A tool that monitors the HAProxy stable branches and calculates a proposed
6# release date for the next minor release based on the bug fixes that are in
7# the queue.
8#
9# Copyright 2020 HAProxy Technologies, Daniel Corbett <dcorbett@haproxy.com>
10#
11# This program is free software; you can redistribute it and/or
12# modify it under the terms of the GNU General Public License
13# as published by the Free Software Foundation; either version
14# 3 of the License, or (at your option) any later version.
15#
16#
17
18from lxml import html
19import requests
20import traceback
21import smtplib
22import math
23import copy
24import time
25import sys
26import argparse
27from datetime import datetime
28from datetime import timedelta
29from email.mime.text import MIMEText
30
31# Do not report on versions older than
32# MAX_VERSION_AGE.
33MAX_VERSION_AGE = 1095 # days
34
35# Do not report on non-lts releases (odd releases) that
36# are older than MAX_VERSION_AGE_NONLTS
37MAX_VERSION_AGE_NONLTS = 547 # days
38
39# For each severity/issue type, set thresholds
40# count - indicates how many bugs for this issue type should be in the queue
41# time - indicates how many days should be added to the release date
42THRESHOLDS = {
43 'BUG' :{ 'count' : 1, 'time' : 28},
44 'BUILD' :{ 'count' : 1, 'time' : 28},
45 'MINOR' : { 'count' : 1, 'time' : 28},
46 'MEDIUM' : { 'count' : 1, 'time' : 30},
47 'MAJOR' : { 'count' : 1, 'time' : 14 },
48 'CRITICAL' : { 'count' : 1, 'time' : 2 }
49}
50
51# Increase the urgency of a release as estimated time (in days) gets closer.
52RELEASE_URGENCY = { 'WARNING' : 7, 'NOTICE' : 21, 'INFO' : '' }
53
54def search_set(s, f):
55 for t in s:
56 if f in t:
57 return True
58
59def check_for_email(s, parser):
60 if "@" not in s:
61 parser.print_help()
62 sys.exit()
63
64def main():
65 global MAX_VERSION_AGE
66 global MAX_VERSION_AGE_NONLTS
67 global THRESHOLDS
68 global RELEASE_URGENCY
69
70 SEND_MAIL=False
71 VERSIONS = []
72 issues = {}
73 BUGQUEUE = {}
74 BUGS = { "bugs" :[] }
75 email_message = """Hi,
76
77This is a friendly bot that watches fixes pending for the next haproxy-stable release! One such e-mail is sent periodically once patches are waiting in the last maintenance branch, and an ideal release date is computed based on the severity of these fixes and their merge date. Responses to this mail must be sent to the mailing list.
78
79"""
80
81 parser = argparse.ArgumentParser(description='HAProxy Stable Release Estimator')
82 parser.add_argument('--print', action="store_true",
83 help='Print email only')
84 parser.add_argument('--to-email', nargs=1, required=False,
85 help='Send email to <email>')
86 parser.add_argument('--from-email', nargs=1, required=False,
87 help='Send email from <email>')
88 parser.add_argument('--send-mail', action="store_true",
89 help='Send email')
90 args = parser.parse_args()
91
92 if not args.print and not args.send_mail and not args.to_email and not args.from_email:
93 parser.print_help()
94 sys.exit()
95
96 if args.send_mail and (not args.to_email or not args.from_email):
97 parser.print_help()
98 sys.exit()
99
100 if args.to_email:
101 check_for_email(args.to_email[0], parser)
102 TO_EMAIL = args.to_email[0]
103
104 if args.from_email:
105 check_for_email(args.from_email[0], parser)
106 FROM_EMAIL = args.from_email[0]
107
108 if args.send_mail:
109 SEND_MAIL = True
110
111 if SEND_MAIL:
112 try:
113 TO_EMAIL
114 FROM_EMAIL
115 except:
116 parser.print_help()
117 sys.exit()
118
119 #
120 # Let's get the list of the current stable versions
121 #
122
123 page = requests.get('http://www.haproxy.org/bugs/')
124 tree = html.fromstring(page.content)
125
126 for x in (tree.xpath('//th')):
127 if x.xpath('./a/text()'):
128 VERSIONS.append(x.xpath('./a/text()')[0])
129
130
131 #
132 # For each version let's check it's age. We'll apply the following logic:
133 # - Skip the release if it's:
134 # * older than MAX_VERSION_AGE days
135 # * older than MAX_VERSION_AGE_NONLTS days and an odd numbered release (1.9,2.1,2.3)
136 #
137 # For all other valid releases we will then collect the number of bug fixes
138 # in queue for each of the defined severity levels:
139 # - BUG
140 # - BUILD
141 # - MINOR
142 # - MEDIUM
143 # - MAJOR
144 # - CRITICAL
145 #
146 # We'll then begin calculating the proposed release date based on the last
147 # release date plus the first commit date of the first bug fix for the defined
148 # severity level.
149 #
150 # By default the proposed release dates use the following padding:
151 # (Can be modified in THRESHOLDS)
152 # - BUG/BUILD/MINOR - 28 days
153 # - MEDIUM - 30 days
154 # - MAJOR - 14 days
155 # - CRITICAL - 2 days
156 #
157 # After we have a proposed release date we will assign a release urgency
158 # to it. As we get closer to the proposed release date the urgency level changes.
159 # By default the urgency levels and their times are:
160 # - WARNING - proposed date is 7 days or less
161 # - NOTICE - proposed date is 21 days or less
162 # - INFO - proposed date is longer than the above
163 #
164
165 for version in VERSIONS:
166 BUGQUEUE[version] = { "total" : 0, "last": "" }
167 VERSION_THRESHOLDS = copy.deepcopy(THRESHOLDS)
168 print("Collecting information on %s" % (version))
169 page = requests.get('http://www.haproxy.org/bugs/bugs-%s.html' % (version))
170 tree = html.fromstring(page.content)
171
172 issues[version] = {}
173 issues_count = {}
174 release_soon = False
175 num_to_word = {
176 1 : 'one',
177 2 : 'two',
178 3 : 'three',
179 4 : 'four',
180 5 : 'five',
181 6 : 'six',
182 7 : 'seven',
183 8 : 'eight',
184 9 : 'nine',
185 10 : 'ten',
186 11 : 'eleven',
187 12 : 'twelve',
188 13 : 'thirteen',
189 }
190
191 # parse out the CHANGELOG link
192 CHANGELOG = tree.xpath('//a[contains(@href,"CHANGELOG")]/@href')[0]
193
194 last_version = tree.xpath('//td[contains(text(), "last")]/../td/a/text()')[0]
195 first_version = "%s.0" % (version)
196
197 # Get CHANGELOG for release
198 changelog_page = requests.get(CHANGELOG)
199 try:
200 for l in changelog_page.content.decode('utf-8').split('\n'):
201 # the below is a bit of a hack to parse out valid years in the CHANGELOG
202 if (last_version in l) and ('201' in l or '202' in l or '200' in l) and '/' in l:
203 # set the date in which this version was last released
204 last_release_date = l.split(' ')[0]
205 last_release_datetime = datetime.strptime(last_release_date.strip(), '%Y/%m/%d')
206 BUGQUEUE[version]['last'] = last_release_date
207 break
208 for l in changelog_page.content.decode('utf-8').split('\n'):
209 # the below is a bit of a hack to parse out valid years in the CHANGELOG
210 if (first_version in l) and ('201' in l or '202' in l or '200' in l) and '/' in l:
211 # set the date in which this version was first released
212 first_release_date = l.split(' ')[0]
213 first_release_datetime = datetime.strptime(first_release_date.strip(), '%Y/%m/%d')
214 BUGQUEUE[version]['first'] = first_release_datetime
215 break
216 except:
217 print(traceback.format_exc())
218 last_release_date = False
219
220 # get unix timestamp for today and timestamp of first release date
221 today_ts = datetime.today().timestamp()
222 first_version_ts = BUGQUEUE[version]['first'].timestamp()
223
224 # calculate the age of this version in days and years
225 version_age = math.ceil((today_ts-first_version_ts)/86400)
226 version_age_years = math.ceil(version_age/365)
227
228 # We do not want to monitor versions that are older
229 # than MAX_VERSION_AGE or MAX_VERSION_AGE_NONLTS
230 if version_age >= MAX_VERSION_AGE:
231 print("\t - Version: %s is older than %d days, skipping" % (version, MAX_VERSION_AGE))
232 continue
233
234 if version_age > MAX_VERSION_AGE_NONLTS:
235 if int(version.split('.')[1]) % 2 > 0:
236 print("\t - Version: %s is not LTS and is older than %d days, skipping" % (version, MAX_VERSION_AGE_NONLTS))
237 continue
238
239 # If the release is older than 1 year let's increase the time until
240 # a release is due. <base time threshold> * <version age years>
241 if version_age_years > 1:
242 for k in VERSION_THRESHOLDS.keys():
243 VERSION_THRESHOLDS[k]['time'] *= int(version_age_years)
244
245 # Let's capture the bug table which contains each bug & their severity
246 bug_table = tree.xpath('//th[contains(text(), "Severity")]/ancestor::table[last()]')[0]
247
248 # Loop through bug table and parse out the title of each bug
249 # found within the links and their merge date.
250 # Example is: 2020-10-19 BUG/MINOR: disable dynamic OCSP load with BoringSSL
251 for x in bug_table.xpath('.//a[contains(@href,"commitdiff")]'):
252 # Capture the bug label
253 # Example: BUG/MINOR: disable dynamic OCSP load with BoringSSL
254 issue_tmp = x.xpath('./text()')[0]
255 # Capture the date
256 # Example: 2020-10-19
257 date_tmp = x.xpath('../preceding-sibling::td/text()')[0]
258
259 # Split the bug into a severity
260 if "/" in issue_tmp:
261 bug_type = issue_tmp.split(':')[0].split('/')[1].strip()
262 else:
263 bug_type = issue_tmp.split(':')[0].strip()
264 bug_text = ":".join(issue_tmp.split(':')[1:]).strip()
265 if bug_type not in issues[version].keys():
266 issues[version][bug_type] = set()
267 issues[version][bug_type].add("%s|%s" % (date_tmp, bug_text))
268
269 # Loop through the issue_types (severities) (MINOR, MEDIUM, MAJOR, etc.)
270 # We'll check if the severity has already been accounted for
271 # If not, we'll set the timestamp to the timestamp of the current issue
272 # If so, we'll check if the current bugs timestamp is less than the
273 # previous one. This will help us to determine when we first saw this
274 # severity type as calculations are based on the first time seeing a
275 # severity type. We'll then set the number of issues for each severity.
276 for issue_type in issues[version]:
277 issues_count[issue_type] = {}
278 for k in issues[version][issue_type]:
279 if 'timestamp' not in issues_count[issue_type].keys():
280 issues_count[issue_type]['timestamp'] = int(time.mktime(datetime.strptime(k.split('|')[0], "%Y-%m-%d").timetuple()))
281 else:
282 if issues_count[issue_type]['timestamp'] > int(time.mktime(datetime.strptime(k.split('|')[0], "%Y-%m-%d").timetuple())):
283 issues_count[issue_type]['timestamp'] = int(time.mktime(datetime.strptime(k.split('|')[0], "%Y-%m-%d").timetuple()))
284 issues_count[issue_type]['count'] = len(issues[version][issue_type])
285
286 release_date = None
287 total_count = 0
288
289 # Let's check the count for each severity type and see if they
290 # are greater than our thresholds count. This can be used to
291 # hold off on calculating release estimates until a certain number of
292 # MINOR bugs have accumulated.
293 for issue_type in issues_count.keys():
294 if issues_count[issue_type]['count'] >= VERSION_THRESHOLDS[issue_type]['count']:
295 # If the total number of issues is greater than the threshold
296 # for a severity we'll attempt to set a release date.
297 # We'll use the timestamp from the first time an issue was
298 # seen and add on the number of days specified within the
299 # THRESHOLDS for that issue type. We'll also increment
300 # the total number of issues that have been fixed in this
301 # version across all severities/issue types.
302 total_count += issues_count[issue_type]['count']
303 issue_timestamp_delta = datetime.fromtimestamp(int(issues_count[issue_type]['timestamp'])) + timedelta(days=int(VERSION_THRESHOLDS[issue_type]['time']))
304 if not release_date: release_date = issue_timestamp_delta
305 elif release_date > issue_timestamp_delta: release_date = issue_timestamp_delta
306
307 if release_date: release_soon = True
308 if release_soon:
309 time_until_release = release_date - datetime.now()
310
311 # If a release date has been sent, let's calculate how long
312 # in words until that release. i.e. "less than 2 weeks"
313 if release_soon:
314 for k in sorted(RELEASE_URGENCY.keys()):
315 if not RELEASE_URGENCY[k]:
316 release_urgency_msg = k
317 elif time_until_release.days <= RELEASE_URGENCY[k]:
318 release_urgency_msg = k
319 rounded_week_time = math.ceil(time_until_release.days/7.0)
320 if abs(rounded_week_time) > 1:
321 week_word = 'weeks'
322 else:
323 week_word = 'week'
324 try:
325 # We now have all of the required information for building
326 # the email message.
327 # TODO: Fix alignment
328 email_message = """%s
329 Last release %s was issued on %s. There are currently %d patches in the queue cut down this way:
330""" % (email_message, last_version, last_release_datetime.strftime("%Y-%m-%d"), total_count)
331 for issue_type in sorted(issues_count.keys()):
332 email_message = "%s - %d %s, first one merged on %s\n" % (email_message, issues_count[issue_type]['count'],issue_type,datetime.fromtimestamp(int(issues_count[issue_type]['timestamp'])).strftime("%Y-%m-%d"))
333 email_message = "%s\nThus the computed ideal release date for %s would be %s, " % (email_message, ".".join(last_version.split(".")[:-1])+"."+str(int(last_version.split(".")[-1])+1), release_date.strftime("%Y-%m-%d"))
334 if rounded_week_time < 0:
335 email_message = "%swhich was %s %s ago.\n" % (email_message, num_to_word[abs(rounded_week_time)], week_word)
336 elif rounded_week_time == 0:
337 email_message = "%swhich was within the last week.\n" % (email_message)
338 else:
339 email_message = "%swhich is in %s %s or less.\n" % (email_message, num_to_word[rounded_week_time], week_word)
340 except Exception as err:
341 print(traceback.format_exc())
342 sys.exit()
343 # Total number of bugs fixed in this version
344 # since last release.
345 BUGQUEUE[version]['total'] = total_count
346
347 email_subject = "stable-bot: Bugfixes waiting for a release "
348
349 # Add each version & their number of bugs to the subject
350 for k in sorted(BUGQUEUE.keys(), reverse=True):
351 if BUGQUEUE[k]['total'] > 0:
352 email_subject = "%s %s (%d)," % ( email_subject, k, BUGQUEUE[k]['total'])
353
354 email_subject = email_subject.rstrip(",")
355 email_message = "%s\nThe current list of patches in the queue is:\n" % (email_message)
356 uniq_issues = set()
357
358 # Parse out unique issues across all versions so that we can
359 # print them once with the list of affected versions.
360 for k in BUGQUEUE.keys():
361 for issue_type in sorted(issues[k].keys()):
362 for issue in issues[k][issue_type]:
363 uniq_issues.add("%s|%s" % (issue_type,issue.split('|')[1]))
364
365 # Loop through the unique issues and determine which versions
366 # are affected.
367 for i in uniq_issues:
368 affected_versions = []
369 for k in BUGQUEUE.keys():
370 try:
371 if search_set(issues[k][i.split('|')[0]], i.split('|')[1]):
372 affected_versions.append(k)
373 except Exception as e:
374 pass
375 if affected_versions:
376 affected_versions.sort()
377 try:
378 BUGS["bugs"].append({ "affected_versions" : affected_versions, "bug":i.split('|')[1], "severity":i.split('|')[0] })
379 except:
380 BUGS["bugs"] = [ { "affected_versions" : affected_versions, "bug":i.split('|')[1], "severity":i.split('|')[0] } ]
381
382 BUGS["bugs"] = sorted(BUGS["bugs"], key = lambda i: i['severity'])
383
384 # Add each issue with affected versions to email message
385 # Example:
386 # - 1.8, 2.0, 2.1, 2.2 - MINOR : stats: fix validity of the json schema
387 for bug in BUGS["bugs"]:
388 email_message = "%s - %s %s %s : %s\n" % (email_message, ", ".join(bug["affected_versions"]).ljust(14), "-".rjust(12), bug["severity"].ljust(7), bug["bug"])
389
390 email_message="%s\n-- \nThe haproxy stable-bot is freely provided by HAProxy Technologies to help improve the quality of each HAProxy release. If you have any issue with these emails or if you want to suggest some improvements, please post them on the list so that the solutions suiting the most users can be found.\n" % (email_message)
391
392 # If a message with actual issues exists let's either print it or send out
393 # an email.
394 if "first one merged on" in email_message:
395 if args.print:
396 print(email_subject)
397 print(email_message)
398 if SEND_MAIL:
399 print('Send email to:%s from:%s' % (TO_EMAIL, FROM_EMAIL), end="")
400 msg = MIMEText(email_message)
401 msg['to'] = TO_EMAIL
402 msg['from'] = FROM_EMAIL
403 msg['subject'] = email_subject
404 msg.add_header('reply-to', TO_EMAIL)
405 try:
406 server = smtplib.SMTP('127.0.0.1', timeout=10)
407 server.sendmail(msg['from'], [msg['to']], msg.as_string())
408 print(" - Email sent")
409 except (ConnectionRefusedError, smtplib.SMTPConnectError):
410 print("- Error: SMTP Connection Error")
411 sys.exit()
412 except smtplib.SMTPServerDisconnected:
413 print('- Error: SMTP Server Disconnect (possible timeout)')
414 sys.exit()
415 except (smtplib.SMTPRecipientsRefused, smtplib.SMTPSenderRefused):
416 print('- Error: Recipients or Sender Refused')
417 sys.exit()
418 except (smtplib.SMTPHeloError, smtplib.SMTPAuthenticationError):
419 print('- Error: SMTP rejected HELO or requires Authentication')
420 sys.exit()
421 except:
422 print(traceback.format_exc())
423 sys.exit()
424
425
426if __name__ == "__main__":
427 main()
428
429sys.exit()