• R/O
  • HTTP
  • SSH
  • HTTPS

提交

標籤
無標籤

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

allura


Commit MetaInfo

修訂cbc17a7b8f378ef4c144142740d38f7c1815375d (tree)
時間2011-10-06 02:14:52
作者Rick Copeland <rcopeland@geek...>
CommiterRick Copeland

Log Message

[#1540] Cache linear runs of single-parent commits

Signed-off-by: Rick Copeland <rcopeland@geek.net>

Change Summary

差異

--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -9,7 +9,9 @@ class Commit(Document):
99 class __mongometa__:
1010 name = 'repo_ci'
1111 session = main_doc_session
12- indexes = ('parent_ids')
12+ indexes = [
13+ ('parent_ids',),
14+ ('child_ids',) ]
1315 User = dict(name=str, email=str, date=datetime)
1416
1517 _id = Field(str)
@@ -20,6 +22,21 @@ class Commit(Document):
2022 parent_ids = Field([str])
2123 child_ids = Field([str])
2224
25+ def __repr__(self):
26+ return '%s %s' % (
27+ self._id[:7], self.summary)
28+
29+ @property
30+ def summary(self):
31+ if self.message:
32+ summary = []
33+ for line in self.message.splitlines():
34+ line = line.rstrip()
35+ if line: summary.append(line)
36+ else: return ' '.join(summary)
37+ return ' '.join(summary)
38+ return ''
39+
2340 class Tree(Document):
2441 class __mongometa__:
2542 name = 'repo_tree'
@@ -46,3 +63,25 @@ class DiffInfo(Document):
4663
4764 _id = Field(str)
4865 differences = Field([dict(name=str, lhs_id=str, rhs_id=str)])
66+
67+class BasicBlock(Document):
68+ class __mongometa__:
69+ name = 'repo_basic_block'
70+ session = main_doc_session
71+ indexes = [
72+ ('commit_ids',),
73+ ('score') ]
74+
75+ _id = Field(str)
76+ parent_commit_ids = Field([str])
77+ commit_ids = Field([str])
78+ commit_times = Field([datetime])
79+ score = Field(int)
80+
81+ def __repr__(self):
82+ return '%s: (P %s, T %s..%s (%d commits))' % (
83+ self._id[:6],
84+ [ oid[:6] for oid in self.parent_commit_ids ],
85+ self.commit_ids[0][:6],
86+ self.commit_ids[-1][:6],
87+ len(self.commit_ids))
--- a/Allura/test-light.py
+++ b/Allura/test-light.py
@@ -1,6 +1,7 @@
11 import sys
22 import logging
3-from itertools import chain
3+from collections import defaultdict
4+from itertools import chain, izip
45 from datetime import datetime
56
67 from pylons import c
@@ -13,6 +14,17 @@ from allura.lib import utils
1314
1415 log = logging.getLogger(__name__)
1516
17+QSIZE=100
18+
19+def dolog():
20+ h.set_context('test', 'code')
21+ repo = c.app.repo._impl._git
22+ oid = repo.commit(repo.heads[0]).hexsha
23+ log.info('start')
24+ for i, ci in enumerate(commitlog(oid)):
25+ print repr(ci)
26+ log.info('done')
27+
1628 def main():
1729 if len(sys.argv) > 1:
1830 h.set_context('test')
@@ -22,6 +34,7 @@ def main():
2234 M.repo.Tree.m.remove({})
2335 M.repo.Trees.m.remove({})
2436 M.repo.DiffInfo.m.remove({})
37+ M.repo.BasicBlock.m.remove({})
2538 repo = c.app.repo._impl._git
2639
2740 # Get all commits
@@ -35,6 +48,7 @@ def main():
3548
3649 # Skip commits that are already in the DB
3750 commit_ids = unknown_commit_ids(all_commit_ids)
51+ # commit_ids = commit_ids[:500]
3852 log.info('Refreshing %d commits', len(commit_ids))
3953
4054 # Refresh commits
@@ -47,6 +61,7 @@ def main():
4761 # Refresh child references
4862 seen = set()
4963 parents = set()
64+
5065 for i, oid in enumerate(commit_ids):
5166 ci = M.repo.Commit.m.get(_id=oid)
5267 refresh_children(ci)
@@ -56,11 +71,22 @@ def main():
5671 log.info('Refresh child (a) info %d: %s', (i+1), ci._id)
5772 for j, oid in enumerate(parents-seen):
5873 ci = M.repo.Commit.m.get(_id=oid)
59- parents.update(ci.parent_ids)
74+ if ci is None: continue
75+ refresh_children(ci)
6076 if (i + j + 1) % 100 == 0:
6177 log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
6278
6379 # Refresh basic blocks
80+ bbb = BasicBlockBuilder(commit_ids)
81+ bbb.run()
82+
83+ # Verify the log
84+ log.info('Logging via basic blocks')
85+ with open('log.txt', 'w') as fp:
86+ for i, ci in enumerate(commitlog(commit_ids[0])):
87+ print >> fp, repr(ci)
88+ log.info('%r', ci)
89+ log.info('... done (%d commits from %s)', i, commit_ids[0])
6490
6591 # Refresh trees
6692 cache = {}
@@ -110,10 +136,71 @@ def refresh_commit_info(ci, seen):
110136 return True
111137
112138 def refresh_children(ci):
139+ '''
140+ TODO: make sure we remove basic blocks created by previous refreshes when
141+ there are extra children added.
142+ '''
113143 M.repo.Commit.m.update_partial(
114144 dict(_id={'$in': ci.parent_ids}),
115145 {'$addToSet': dict(child_ids=ci._id)})
116146
147+class BasicBlockBuilder(object):
148+
149+ def __init__(self, commit_ids):
150+ self.commit_ids = commit_ids
151+ self.block_index = {} # by commit ID
152+ self.blocks = {} # by block ID
153+ self.reasons = {} # reasons to stop merging blocks
154+
155+ def run(self):
156+ for oids in utils.chunked_iter(self.commit_ids, QSIZE):
157+ oids = list(oids)
158+ commits = list(M.repo.Commit.m.find(dict(_id={'$in':oids})))
159+ for ci in commits:
160+ if ci._id in self.block_index: continue
161+ self.block_index[ci._id] = ci._id
162+ self.blocks[ci._id] = M.repo.BasicBlock(dict(
163+ _id=ci._id,
164+ parent_commit_ids=ci.parent_ids,
165+ commit_ids=[ci._id],
166+ commit_times=[ci.authored.date]))
167+ self.merge_blocks()
168+ log.info('%d basic blocks', len(self.blocks))
169+ for bid, bb in sorted(self.blocks.items()):
170+ log.info('%32s: %r', self.reasons.get(bid, 'none'), bb)
171+ for bb in self.blocks.itervalues():
172+ bb.score = len(bb.commit_ids)
173+ bb.m.save()
174+ return self.blocks
175+
176+ def merge_blocks(self):
177+ while True:
178+ for bid, bb in self.blocks.iteritems():
179+ if len(bb.parent_commit_ids) != 1:
180+ self.reasons[bid] = '%d parents' % len(bb.parent_commit_ids)
181+ continue
182+ p_oid = bb.parent_commit_ids[0]
183+ p_bid = self.block_index.get(p_oid)
184+ if p_bid is None:
185+ self.reasons[bid] = 'parent commit not found'
186+ continue
187+ p_bb = self.blocks.get(p_bid)
188+ if p_bb is None:
189+ self.reasons[bid] = 'parent block not found'
190+ continue
191+ if p_bb.commit_ids[0] != p_oid:
192+ self.reasons[bid] = 'parent does not start with parent commit'
193+ continue
194+ bb.commit_ids += p_bb.commit_ids
195+ bb.commit_times += p_bb.commit_times
196+ bb.parent_commit_ids = p_bb.parent_commit_ids
197+ for oid in p_bb.commit_ids:
198+ self.block_index[oid] = bid
199+ break
200+ else:
201+ break
202+ del self.blocks[p_bid]
203+
117204 def refresh_tree(t, seen):
118205 if t.binsha in seen: return
119206 seen.add(t.binsha)
@@ -148,7 +235,6 @@ def trees(id, cache):
148235 yield x
149236
150237 def unknown_commit_ids(all_commit_ids):
151- QSIZE=100
152238 result = []
153239 for chunk in utils.chunked_iter(all_commit_ids, QSIZE):
154240 q = M.repo.Commit.m.find(_id={'$in':chunk})
@@ -188,6 +274,73 @@ def compute_diffs(tree_cache, rhs_ci, lhs_ci=None):
188274 di.m.save()
189275 return tree_cache
190276
277+def commitlog(commit_id, skip=0, limit=sys.maxint):
278+
279+ seen = set()
280+ def _visit(commit_id):
281+ if commit_id in seen: return
282+ bb = M.repo.BasicBlock.m.find(
283+ dict(commit_ids=commit_id)).sort('score', -1).first()
284+ if bb is None: return
285+ index = False
286+ for pos, (oid, time) in enumerate(izip(bb.commit_ids, bb.commit_times)):
287+ if oid == commit_id: index = True
288+ elif not index: continue
289+ seen.add(oid)
290+ ci_times[oid] = time
291+ if pos+1 < len(bb.commit_ids):
292+ ci_parents[oid] = [ bb.commit_ids[pos+1] ]
293+ else:
294+ ci_parents[oid] = bb.parent_commit_ids
295+ for oid in bb.parent_commit_ids:
296+ _visit(oid)
297+
298+ def _gen_ids(commit_id, skip, limit):
299+ # Traverse the graph in topo order, yielding commit IDs
300+ commits = set([commit_id])
301+ new_parent = None
302+ while commits and limit:
303+ # next commit is latest commit that's valid to log
304+ if new_parent in commits:
305+ ci = new_parent
306+ else:
307+ ci = max(commits, key=lambda ci:ci_times[ci])
308+ commits.remove(ci)
309+ if skip:
310+ skip -= 1
311+ continue
312+ else:
313+ limit -= 1
314+ yield ci
315+ # remove this commit from its parents children and add any childless
316+ # parents to the 'ready set'
317+ new_parent = None
318+ for oid in ci_parents[ci]:
319+ children = ci_children[oid]
320+ children.discard(ci)
321+ if not children:
322+ commits.add(oid)
323+ new_parent = oid
324+
325+ # Load all the blocks to build a commit graph
326+ ci_times = {}
327+ ci_parents = {}
328+ ci_children = defaultdict(set)
329+ log.info('Build commit graph')
330+ _visit(commit_id)
331+ for oid, parents in ci_parents.iteritems():
332+ for ci_parent in parents:
333+ ci_children[ci_parent].add(oid)
334+
335+ # Convert oids to commit objects
336+ log.info('Traverse commit graph')
337+ for oids in utils.chunked_iter(_gen_ids(commit_id, skip, limit), QSIZE):
338+ oids = list(oids)
339+ index = dict(
340+ (ci._id, ci) for ci in M.repo.Commit.m.find(dict(_id={'$in': oids})))
341+ for oid in oids:
342+ yield index[oid]
343+
191344 def _diff_trees(lhs, rhs, index, *path):
192345 def _fq(name):
193346 return '/'.join(reversed(
@@ -226,3 +379,4 @@ def _diff_trees(lhs, rhs, index, *path):
226379
227380 if __name__ == '__main__':
228381 main()
382+ # dolog()