Viewing: branch_comm
#!/usr/bin/env python3
import re
import subprocess
import sys
class Change(object):
def __init__(self):
self.commit = ''
self.author_name = ''
self.author_email = ''
self.author_date = 0
self.subject = ''
self.body = ''
self.number = 0
self.change_id = ''
self.reviewed_on = ''
self.lustre_commit = ''
self.lustre_change = ''
self.lustre_change_number = 0
self.cray_bug_id = ''
self.hpe_bug_id = ''
self._parent = self
self._rank = 0
def _find(self):
if self._parent != self:
self._parent = self._parent._find()
return self._parent
def _union(self, c2):
r1 = self._find()
r2 = c2._find()
if r1._rank > r2._rank:
r2._parent = r1
elif r1._rank < r2._rank:
r1._parent = r2
elif r1 != r2:
r2._parent = r1
r1._rank += 1
GIT_LOG_FIELDS = ['commit', 'author_name', 'author_email', 'author_date', 'subject', 'body']
GIT_LOG_KEYS = ['%H', '%an', '%ae', '%at', '%s', '%b']
GIT_LOG_FORMAT = '%x1f'.join(GIT_LOG_KEYS) + '%x1e'
def _change_from_record(rec):
change = Change()
change.__dict__.update(dict(list(zip(GIT_LOG_FIELDS, rec.split('\x1f')))))
change.author_date = int(change.author_date)
for line in change.body.splitlines():
# Sometimes we have 'key : value' so we strip both sides.
lis = line.split(':', 1)
if len(lis) == 2:
key = lis[0].strip()
val = lis[1].strip()
if key in ['Change-Id', 'Reviewed-on', 'Lustre-commit', 'Lustre-change', 'Cray-bug-id', 'HPE-bug-id']:
change.__dict__[key.replace('-', '_').lower()] = val
obj = re.match(r'[A-Za-z]+://[\w\.]+/(\d+)$', change.reviewed_on)
if obj:
change.number = int(obj.group(1))
obj = re.match(r'[A-Za-z]+://[\w\.]+/(\d+)$', change.lustre_change)
if obj:
change.lustre_change_number = int(obj.group(1))
return change
def _head(lis):
if lis:
return lis[0]
else:
return None
class Branch(object):
def __init__(self, name, paths):
self.name = name
self.paths = paths
self.log = [] # Oldest commit is first.
self.by_commit = {} # str -> change
self.by_subject = {} # str -> list of changes
self.by_change_id = {} # str -> list of changes
self.by_number = {} # str -> list of changes
def _add_change_from_record(self, rec):
# TODO Handle reverted commits.
change = _change_from_record(rec)
self.log.append(change)
assert change.commit
assert change.commit not in self.by_commit
self.by_commit[change.commit] = change
assert change.subject
lis = self.by_subject.setdefault(change.subject, [])
# XXX Do we want this?
# if lis:
# lis[0]._union(change)
lis.append(change)
for bug_id in (change.cray_bug_id, change.hpe_bug_id):
if bug_id and (' ' in change.subject):
# Split subject in to issue and rest.
issue, rest = change.subject.split(None, 1)
# Make new subject using external bug id
subject = ' '.join((bug_id, rest))
lis = self.by_subject.setdefault(subject, [])
lis.append(change)
# Equivalate by change_id.
if change.change_id:
lis = self.by_change_id.setdefault(change.change_id, [])
if lis:
lis[0]._union(change)
lis.append(change)
# Equivalate by number (from reviewed_on).
if change.number:
lis = self.by_number.setdefault(change.number, [])
if lis:
lis[0]._union(change)
lis.append(change)
def load(self):
self.log = []
self.by_commit = {}
self.by_subject = {}
self.by_change_id = {}
self.by_number = {}
git_base = ['git'] # [, '--git-dir=' + self.path + '/.git']
# rc = subprocess.call(git_base + ['fetch', 'origin'])
# assert rc == 0
pipe = subprocess.Popen(git_base + ['log',
'--format=' + GIT_LOG_FORMAT,
'--reverse',
self.name
] + self.paths,
stdout=subprocess.PIPE,
text=True)
out, _ = pipe.communicate()
rc = pipe.wait()
assert rc == 0
for rec in out.split('\x1e\n'):
if rec:
self._add_change_from_record(rec)
def find_port(self, change):
# Try to find a port of change in this branch. change may or
# may not belong to branch.
#
# TODO Return oldest member of equivalence class.
port = (self.by_commit.get(change.commit) or
self.by_commit.get(change.lustre_commit) or
self.by_commit.get(change.lustre_change) or # Handle misuse.
_head(self.by_change_id.get(change.change_id)) or
_head(self.by_change_id.get(change.lustre_commit)) or # ...
_head(self.by_change_id.get(change.lustre_change)) or
_head(self.by_number.get(change.number)) or # Do we need this?
_head(self.by_number.get(change.lustre_change_number)) or
_head(self.by_subject.get(change.subject))) # Do we want this?
if port:
return port._find()
else:
return None
def branch_comm(b1, b2):
n1 = len(b1.log)
n2 = len(b2.log)
i1 = 0
i2 = 0
printed = set() # commits
def change_is_printed(c):
return (c.commit in printed) or (c.lustre_commit in printed)
def change_set_printed(c):
printed.add(c.commit)
if c.lustre_commit:
printed.add(c.lustre_commit)
# Suppress initial common commits.
while i1 < n1 and i2 < n2:
# XXX Should we use _find() on c1 and c2 here?
# XXX Or c1 = b1.find_port(c1)?
c1 = b1.log[i1]
c2 = b2.log[i2]
if c1.commit == c2.commit:
i1 += 1
i2 += 1
continue
else:
break
while i1 < n1 and i2 < n2:
c1 = b1.log[i1]
if change_is_printed(c1):
i1 += 1
continue
c2 = b2.log[i2]
if change_is_printed(c2):
i2 += 1
continue
p1 = b1.find_port(c2)
if p1 and change_is_printed(p1):
change_set_printed(c2)
i2 += 1
continue
p2 = b2.find_port(c1)
if p2 and change_is_printed(p2):
change_set_printed(c1)
i1 += 1
continue
# Neither of c1 and c2 has been printed, nor has any port or either.
# XXX Do we need c1._find() here?
if c1 == p1 or c2 == p2:
# c1 and c2 are ports of the same change.
change_set_printed(c1)
change_set_printed(c2)
if p1:
change_set_printed(p1)
if p2:
change_set_printed(p2)
i1 += 1
i2 += 1
# c1 is common to both branches.
print('\t\t%s\t%s' % (c1.commit, c1.subject)) # TODO Add a '*' if subjects different...
continue
if p1 and not p2:
# b1 has c2, b2 does not have c1, (port of c2 must be after c1).
change_set_printed(c1)
i1 += 1
# c1 is unique to b1.
print('%s\t\t\t%s' % (c1.commit, c1.subject))
continue
if p2 and not p1:
# b2 has c1, b1 does not have c2, (port of c1 must be after c2).
change_set_printed(c2)
i2 += 1
# c2 is unique to b2.
print('\t%s\t\t%s' % (c2.commit, c2.subject))
continue
# Now neither is ported or both are ported (and the order is weird).
if p2:
change_set_printed(c1)
change_set_printed(p2)
i1 += 1
# c1 is common to both branches.
print('\t\t%s\t%s' % (c1.commit, c1.subject))
continue
else:
change_set_printed(c1)
i1 += 1
# c1 is unique to b1.
print('%s\t\t\t%s' % (c1.commit, c1.subject))
continue
for c1 in b1.log[i1:]:
if change_is_printed(c1):
continue
assert i2 == n2
# All commits from b2 have been printed. Therefore if c1 has
# been ported to b2 then the port has already been printed. So
# c1 is unique to b1 and must be printed.
change_set_printed(c1)
print('%s\t\t\t%s' % (c1.commit, c1.subject))
for c2 in b2.log[i2:]:
if change_is_printed(c2):
continue
assert i1 == n1
# ...
change_set_printed(c2)
print('\t%s\t\t%s' % (c2.commit, c2.subject))
USAGE = """usage: '_PROGNAME_ BRANCH1 BRANCH2 [PATH]...'
Compare commits to Lustre branches.
Prints commits unique to BRANCH1 in column 1.
Prints commits unique to BRANCH2 in column 2.
Prints commits common to both branches in column 3.
Prints commit subject in column 4.
Skips initial common commits.
The output format is inspired by comm(1). To filter commits by branch,
pipe the output to awk. For example:
$ ... | awk -F'\\t' '$1 != ""' # only commits unique to BRANCH1
$ ... | awk -F'\\t' '$2 != ""' # only commits unique to BRANCH2
$ ... | awk -F'\\t' '$3 != ""' # only common commits
$ ... | awk -F'\\t' '$3 == ""' # exclude common commmits
This assumes that both branches are in the repository that contains
the current directory. To compare branches from different upstream
repositories (for example 'origin/master' and 'other/b_post_cmd3') do:
$ cd fs/lustre-release
$ git fetch origin
$ git remote add other ...
$ git fetch other
$ _PROGNAME_ origin/master other/b_post_cmd3"""
def main():
if len(sys.argv) < 3:
print(USAGE.replace('_PROGNAME_', sys.argv[0]), file=sys.stderr)
sys.exit(1)
paths = sys.argv[3:]
b1 = Branch(sys.argv[1], paths)
b1.load()
b2 = Branch(sys.argv[2], paths)
b2.load()
branch_comm(b1, b2)
if __name__ == '__main__':
main()