Blame - scripts/mailmapper - filogic/uboot

blob: 2e2d7faff573d215361b97e89588a4524f347cb1 [file] [log] [blame]

Masahiro Yamada	c05e791	2018-01-21 18:34:57 +0900	[diff] [blame]	1	#!/usr/bin/env python2
Heinrich Schuchardt	c57590b	2018-06-03 18:59:13 +0200	[diff] [blame^]	2	# SPDX-License-Identifier: GPL-2.0+
Masahiro Yamada	030bfe8	2014-07-16 17:49:45 +0900	[diff] [blame]	3	#
				4	# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com>
Masahiro Yamada	030bfe8	2014-07-16 17:49:45 +0900	[diff] [blame]	5
				6	'''
				7	A tool to create/update the mailmap file
				8
				9	The command 'git shortlog' summarizes git log output in a format suitable
				10	for inclusion in release announcements. Each commit will be grouped by
				11	author and title.
				12
				13	One problem is that the authors' name and/or email address is sometimes
				14	spelled differently. The .mailmap feature can be used to coalesce together
				15	commits by the same persion.
				16	(See 'man git-shortlog' for furthur information of this feature.)
				17
				18	This tool helps to create/update the mailmap file.
				19
				20	It runs 'git shortlog' internally and searches differently spelled author
				21	names which share the same email address. The author name with the most
				22	commits is asuumed to be a canonical real name. If the number of commits
				23	from the cananonical name is equal to or greater than 'MIN_COMMITS',
				24	the entry for the cananical name will be output. ('MIN_COMMITS' is used
				25	here because we do not want to create a fat mailmap by adding every author
				26	with only a few commits.)
				27
				28	If there exists a mailmap file specified by the mailmap.file configuration
				29	options or '.mailmap' at the toplevel of the repository, it is used as
				30	a base file. (The mailmap.file configuration takes precedence over the
				31	'.mailmap' file if both exist.)
				32
				33	The base file and the newly added entries are merged together and sorted
				34	alphabetically (but the comment block is kept untouched), and then printed
				35	to standard output.
				36
				37	Usage
				38	-----
				39
				40	scripts/mailmapper
				41
				42	prints the mailmapping to standard output.
				43
				44	scripts/mailmapper > tmp; mv tmp .mailmap
				45
				46	will be useful for updating '.mailmap' file.
				47	'''
				48
				49	import sys
				50	import os
				51	import subprocess
				52
				53	# The entries only for the canonical names with MIN_COMMITS or more commits.
				54	# This limitation is used so as not to create a too big mailmap file.
				55	MIN_COMMITS = 50
				56
				57	try:
				58	toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
				59	except subprocess.CalledProcessError:
Masahiro Yamada	880828d	2014-08-16 00:59:26 +0900	[diff] [blame]	60	sys.exit('Please run in a git repository.')
Masahiro Yamada	030bfe8	2014-07-16 17:49:45 +0900	[diff] [blame]	61
				62	# strip '\n'
				63	toplevel = toplevel.rstrip()
				64
				65	# Change the current working directory to the toplevel of the respository
				66	# for our easier life.
				67	os.chdir(toplevel)
				68
				69	# First, create 'auther name' vs 'number of commits' database.
				70	# We assume the name with the most commits as the canonical real name.
				71	shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n'])
				72
				73	commits_per_name = {}
				74
				75	for line in shortlog.splitlines():
				76	try:
				77	commits, name = line.split(None, 1)
				78	except ValueError:
				79	# ignore lines with an empty author name
				80	pass
				81	commits_per_name[name] = int(commits)
				82
				83	# Next, coalesce the auther names with the same email address
				84	shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e'])
				85
				86	mail_vs_name = {}
				87	output = {}
				88
				89	for line in shortlog.splitlines():
				90	# tmp, mail = line.rsplit(None, 1) is not safe
				91	# because weird email addresses might include whitespaces
				92	tmp, mail = line.split('<')
				93	mail = '<' + mail.rstrip()
				94	try:
				95	_, name = tmp.rstrip().split(None, 1)
				96	except ValueError:
				97	# author name is empty
				98	name = ''
				99	if mail in mail_vs_name:
				100	# another name for the same email address
				101	prev_name = mail_vs_name[mail]
				102	# Take the name with more commits
				103	major_name = sorted([prev_name, name],
				104	key=lambda x: commits_per_name[x] if x else 0)[1]
				105	mail_vs_name[mail] = major_name
				106	if commits_per_name[major_name] > MIN_COMMITS:
				107	output[mail] = major_name
				108	else:
				109	mail_vs_name[mail] = name
				110
				111	# [1] If there exists a mailmap file at the location pointed to
				112	# by the mailmap.file configuration option, update it.
				113	# [2] If the file .mailmap exists at the toplevel of the repository, update it.
				114	# [3] Otherwise, create a new mailmap file.
				115	mailmap_files = []
				116
				117	try:
				118	config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file'])
				119	except subprocess.CalledProcessError:
				120	config_mailmap = ''
				121
				122	config_mailmap = config_mailmap.rstrip()
				123	if config_mailmap:
				124	mailmap_files.append(config_mailmap)
				125
				126	mailmap_files.append('.mailmap')
				127
				128	infile = None
				129
				130	for map_file in mailmap_files:
				131	try:
				132	infile = open(map_file)
				133	except:
				134	# Failed to open. Try next.
				135	continue
				136	break
				137
				138	comment_block = []
				139	output_lines = []
				140
				141	if infile:
				142	for line in infile:
				143	if line[0] == '#' or line[0] == '\n':
				144	comment_block.append(line)
				145	else:
				146	output_lines.append(line)
				147	break
				148	for line in infile:
				149	output_lines.append(line)
				150	infile.close()
				151
				152	for mail, name in output.items():
				153	output_lines.append(name + ' ' + mail + '\n')
				154
				155	output_lines.sort()
				156
				157	sys.stdout.write(''.join(comment_block + output_lines))