allura
修訂 | cbc17a7b8f378ef4c144142740d38f7c1815375d (tree) |
---|---|
時間 | 2011-10-06 02:14:52 |
作者 | Rick Copeland <rcopeland@geek...> |
Commiter | Rick Copeland |
[#1540] Cache linear runs of single-parent commits
Signed-off-by: Rick Copeland <rcopeland@geek.net>
@@ -9,7 +9,9 @@ class Commit(Document): | ||
9 | 9 | class __mongometa__: |
10 | 10 | name = 'repo_ci' |
11 | 11 | session = main_doc_session |
12 | - indexes = ('parent_ids') | |
12 | + indexes = [ | |
13 | + ('parent_ids',), | |
14 | + ('child_ids',) ] | |
13 | 15 | User = dict(name=str, email=str, date=datetime) |
14 | 16 | |
15 | 17 | _id = Field(str) |
@@ -20,6 +22,21 @@ class Commit(Document): | ||
20 | 22 | parent_ids = Field([str]) |
21 | 23 | child_ids = Field([str]) |
22 | 24 | |
25 | + def __repr__(self): | |
26 | + return '%s %s' % ( | |
27 | + self._id[:7], self.summary) | |
28 | + | |
29 | + @property | |
30 | + def summary(self): | |
31 | + if self.message: | |
32 | + summary = [] | |
33 | + for line in self.message.splitlines(): | |
34 | + line = line.rstrip() | |
35 | + if line: summary.append(line) | |
36 | + else: return ' '.join(summary) | |
37 | + return ' '.join(summary) | |
38 | + return '' | |
39 | + | |
23 | 40 | class Tree(Document): |
24 | 41 | class __mongometa__: |
25 | 42 | name = 'repo_tree' |
@@ -46,3 +63,25 @@ class DiffInfo(Document): | ||
46 | 63 | |
47 | 64 | _id = Field(str) |
48 | 65 | differences = Field([dict(name=str, lhs_id=str, rhs_id=str)]) |
66 | + | |
67 | +class BasicBlock(Document): | |
68 | + class __mongometa__: | |
69 | + name = 'repo_basic_block' | |
70 | + session = main_doc_session | |
71 | + indexes = [ | |
72 | + ('commit_ids',), | |
73 | + ('score') ] | |
74 | + | |
75 | + _id = Field(str) | |
76 | + parent_commit_ids = Field([str]) | |
77 | + commit_ids = Field([str]) | |
78 | + commit_times = Field([datetime]) | |
79 | + score = Field(int) | |
80 | + | |
81 | + def __repr__(self): | |
82 | + return '%s: (P %s, T %s..%s (%d commits))' % ( | |
83 | + self._id[:6], | |
84 | + [ oid[:6] for oid in self.parent_commit_ids ], | |
85 | + self.commit_ids[0][:6], | |
86 | + self.commit_ids[-1][:6], | |
87 | + len(self.commit_ids)) |
@@ -1,6 +1,7 @@ | ||
1 | 1 | import sys |
2 | 2 | import logging |
3 | -from itertools import chain | |
3 | +from collections import defaultdict | |
4 | +from itertools import chain, izip | |
4 | 5 | from datetime import datetime |
5 | 6 | |
6 | 7 | from pylons import c |
@@ -13,6 +14,17 @@ from allura.lib import utils | ||
13 | 14 | |
14 | 15 | log = logging.getLogger(__name__) |
15 | 16 | |
17 | +QSIZE=100 | |
18 | + | |
19 | +def dolog(): | |
20 | + h.set_context('test', 'code') | |
21 | + repo = c.app.repo._impl._git | |
22 | + oid = repo.commit(repo.heads[0]).hexsha | |
23 | + log.info('start') | |
24 | + for i, ci in enumerate(commitlog(oid)): | |
25 | + print repr(ci) | |
26 | + log.info('done') | |
27 | + | |
16 | 28 | def main(): |
17 | 29 | if len(sys.argv) > 1: |
18 | 30 | h.set_context('test') |
@@ -22,6 +34,7 @@ def main(): | ||
22 | 34 | M.repo.Tree.m.remove({}) |
23 | 35 | M.repo.Trees.m.remove({}) |
24 | 36 | M.repo.DiffInfo.m.remove({}) |
37 | + M.repo.BasicBlock.m.remove({}) | |
25 | 38 | repo = c.app.repo._impl._git |
26 | 39 | |
27 | 40 | # Get all commits |
@@ -35,6 +48,7 @@ def main(): | ||
35 | 48 | |
36 | 49 | # Skip commits that are already in the DB |
37 | 50 | commit_ids = unknown_commit_ids(all_commit_ids) |
51 | + # commit_ids = commit_ids[:500] | |
38 | 52 | log.info('Refreshing %d commits', len(commit_ids)) |
39 | 53 | |
40 | 54 | # Refresh commits |
@@ -47,6 +61,7 @@ def main(): | ||
47 | 61 | # Refresh child references |
48 | 62 | seen = set() |
49 | 63 | parents = set() |
64 | + | |
50 | 65 | for i, oid in enumerate(commit_ids): |
51 | 66 | ci = M.repo.Commit.m.get(_id=oid) |
52 | 67 | refresh_children(ci) |
@@ -56,11 +71,22 @@ def main(): | ||
56 | 71 | log.info('Refresh child (a) info %d: %s', (i+1), ci._id) |
57 | 72 | for j, oid in enumerate(parents-seen): |
58 | 73 | ci = M.repo.Commit.m.get(_id=oid) |
59 | - parents.update(ci.parent_ids) | |
74 | + if ci is None: continue | |
75 | + refresh_children(ci) | |
60 | 76 | if (i + j + 1) % 100 == 0: |
61 | 77 | log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id) |
62 | 78 | |
63 | 79 | # Refresh basic blocks |
80 | + bbb = BasicBlockBuilder(commit_ids) | |
81 | + bbb.run() | |
82 | + | |
83 | + # Verify the log | |
84 | + log.info('Logging via basic blocks') | |
85 | + with open('log.txt', 'w') as fp: | |
86 | + for i, ci in enumerate(commitlog(commit_ids[0])): | |
87 | + print >> fp, repr(ci) | |
88 | + log.info('%r', ci) | |
89 | + log.info('... done (%d commits from %s)', i, commit_ids[0]) | |
64 | 90 | |
65 | 91 | # Refresh trees |
66 | 92 | cache = {} |
@@ -110,10 +136,71 @@ def refresh_commit_info(ci, seen): | ||
110 | 136 | return True |
111 | 137 | |
112 | 138 | def refresh_children(ci): |
139 | + ''' | |
140 | + TODO: make sure we remove basic blocks created by previous refreshes when | |
141 | + there are extra children added. | |
142 | + ''' | |
113 | 143 | M.repo.Commit.m.update_partial( |
114 | 144 | dict(_id={'$in': ci.parent_ids}), |
115 | 145 | {'$addToSet': dict(child_ids=ci._id)}) |
116 | 146 | |
147 | +class BasicBlockBuilder(object): | |
148 | + | |
149 | + def __init__(self, commit_ids): | |
150 | + self.commit_ids = commit_ids | |
151 | + self.block_index = {} # by commit ID | |
152 | + self.blocks = {} # by block ID | |
153 | + self.reasons = {} # reasons to stop merging blocks | |
154 | + | |
155 | + def run(self): | |
156 | + for oids in utils.chunked_iter(self.commit_ids, QSIZE): | |
157 | + oids = list(oids) | |
158 | + commits = list(M.repo.Commit.m.find(dict(_id={'$in':oids}))) | |
159 | + for ci in commits: | |
160 | + if ci._id in self.block_index: continue | |
161 | + self.block_index[ci._id] = ci._id | |
162 | + self.blocks[ci._id] = M.repo.BasicBlock(dict( | |
163 | + _id=ci._id, | |
164 | + parent_commit_ids=ci.parent_ids, | |
165 | + commit_ids=[ci._id], | |
166 | + commit_times=[ci.authored.date])) | |
167 | + self.merge_blocks() | |
168 | + log.info('%d basic blocks', len(self.blocks)) | |
169 | + for bid, bb in sorted(self.blocks.items()): | |
170 | + log.info('%32s: %r', self.reasons.get(bid, 'none'), bb) | |
171 | + for bb in self.blocks.itervalues(): | |
172 | + bb.score = len(bb.commit_ids) | |
173 | + bb.m.save() | |
174 | + return self.blocks | |
175 | + | |
176 | + def merge_blocks(self): | |
177 | + while True: | |
178 | + for bid, bb in self.blocks.iteritems(): | |
179 | + if len(bb.parent_commit_ids) != 1: | |
180 | + self.reasons[bid] = '%d parents' % len(bb.parent_commit_ids) | |
181 | + continue | |
182 | + p_oid = bb.parent_commit_ids[0] | |
183 | + p_bid = self.block_index.get(p_oid) | |
184 | + if p_bid is None: | |
185 | + self.reasons[bid] = 'parent commit not found' | |
186 | + continue | |
187 | + p_bb = self.blocks.get(p_bid) | |
188 | + if p_bb is None: | |
189 | + self.reasons[bid] = 'parent block not found' | |
190 | + continue | |
191 | + if p_bb.commit_ids[0] != p_oid: | |
192 | + self.reasons[bid] = 'parent does not start with parent commit' | |
193 | + continue | |
194 | + bb.commit_ids += p_bb.commit_ids | |
195 | + bb.commit_times += p_bb.commit_times | |
196 | + bb.parent_commit_ids = p_bb.parent_commit_ids | |
197 | + for oid in p_bb.commit_ids: | |
198 | + self.block_index[oid] = bid | |
199 | + break | |
200 | + else: | |
201 | + break | |
202 | + del self.blocks[p_bid] | |
203 | + | |
117 | 204 | def refresh_tree(t, seen): |
118 | 205 | if t.binsha in seen: return |
119 | 206 | seen.add(t.binsha) |
@@ -148,7 +235,6 @@ def trees(id, cache): | ||
148 | 235 | yield x |
149 | 236 | |
150 | 237 | def unknown_commit_ids(all_commit_ids): |
151 | - QSIZE=100 | |
152 | 238 | result = [] |
153 | 239 | for chunk in utils.chunked_iter(all_commit_ids, QSIZE): |
154 | 240 | q = M.repo.Commit.m.find(_id={'$in':chunk}) |
@@ -188,6 +274,73 @@ def compute_diffs(tree_cache, rhs_ci, lhs_ci=None): | ||
188 | 274 | di.m.save() |
189 | 275 | return tree_cache |
190 | 276 | |
277 | +def commitlog(commit_id, skip=0, limit=sys.maxint): | |
278 | + | |
279 | + seen = set() | |
280 | + def _visit(commit_id): | |
281 | + if commit_id in seen: return | |
282 | + bb = M.repo.BasicBlock.m.find( | |
283 | + dict(commit_ids=commit_id)).sort('score', -1).first() | |
284 | + if bb is None: return | |
285 | + index = False | |
286 | + for pos, (oid, time) in enumerate(izip(bb.commit_ids, bb.commit_times)): | |
287 | + if oid == commit_id: index = True | |
288 | + elif not index: continue | |
289 | + seen.add(oid) | |
290 | + ci_times[oid] = time | |
291 | + if pos+1 < len(bb.commit_ids): | |
292 | + ci_parents[oid] = [ bb.commit_ids[pos+1] ] | |
293 | + else: | |
294 | + ci_parents[oid] = bb.parent_commit_ids | |
295 | + for oid in bb.parent_commit_ids: | |
296 | + _visit(oid) | |
297 | + | |
298 | + def _gen_ids(commit_id, skip, limit): | |
299 | + # Traverse the graph in topo order, yielding commit IDs | |
300 | + commits = set([commit_id]) | |
301 | + new_parent = None | |
302 | + while commits and limit: | |
303 | + # next commit is latest commit that's valid to log | |
304 | + if new_parent in commits: | |
305 | + ci = new_parent | |
306 | + else: | |
307 | + ci = max(commits, key=lambda ci:ci_times[ci]) | |
308 | + commits.remove(ci) | |
309 | + if skip: | |
310 | + skip -= 1 | |
311 | + continue | |
312 | + else: | |
313 | + limit -= 1 | |
314 | + yield ci | |
315 | + # remove this commit from its parents children and add any childless | |
316 | + # parents to the 'ready set' | |
317 | + new_parent = None | |
318 | + for oid in ci_parents[ci]: | |
319 | + children = ci_children[oid] | |
320 | + children.discard(ci) | |
321 | + if not children: | |
322 | + commits.add(oid) | |
323 | + new_parent = oid | |
324 | + | |
325 | + # Load all the blocks to build a commit graph | |
326 | + ci_times = {} | |
327 | + ci_parents = {} | |
328 | + ci_children = defaultdict(set) | |
329 | + log.info('Build commit graph') | |
330 | + _visit(commit_id) | |
331 | + for oid, parents in ci_parents.iteritems(): | |
332 | + for ci_parent in parents: | |
333 | + ci_children[ci_parent].add(oid) | |
334 | + | |
335 | + # Convert oids to commit objects | |
336 | + log.info('Traverse commit graph') | |
337 | + for oids in utils.chunked_iter(_gen_ids(commit_id, skip, limit), QSIZE): | |
338 | + oids = list(oids) | |
339 | + index = dict( | |
340 | + (ci._id, ci) for ci in M.repo.Commit.m.find(dict(_id={'$in': oids}))) | |
341 | + for oid in oids: | |
342 | + yield index[oid] | |
343 | + | |
191 | 344 | def _diff_trees(lhs, rhs, index, *path): |
192 | 345 | def _fq(name): |
193 | 346 | return '/'.join(reversed( |
@@ -226,3 +379,4 @@ def _diff_trees(lhs, rhs, index, *path): | ||
226 | 379 | |
227 | 380 | if __name__ == '__main__': |
228 | 381 | main() |
382 | + # dolog() |