allura
修訂 | 875b6f5f4a97e3c6b0baa71b2e6c1c98755a946b (tree) |
---|---|
時間 | 2011-10-06 02:14:52 |
作者 | Rick Copeland <rcopeland@geek...> |
Commiter | Rick Copeland |
[#1540] Using cached ancestor stuff
Signed-off-by: Rick Copeland <rcopeland@geek.net>
@@ -11,7 +11,7 @@ from .auth import AuthGlobals, User, ProjectRole, OpenId, EmailAddress, ApiToken | ||
11 | 11 | from .openid_model import OpenIdStore, OpenIdAssociation, OpenIdNonce |
12 | 12 | from .filesystem import File |
13 | 13 | from .notification import Notification, Mailbox |
14 | -from .repository import Repository, RepositoryImplementation, RepoObject, Commit, Tree, Blob | |
14 | +from .repository import Repository, RepositoryImplementation, RepoObject, Commit, Tree, Blob, CommitAncestor | |
15 | 15 | from .repository import LogCache, LastCommitFor, MergeRequest, GitLikeTree |
16 | 16 | from .stats import Stats |
17 | 17 | from .oauth import OAuthToken, OAuthConsumerToken, OAuthRequestToken, OAuthAccessToken |
@@ -18,14 +18,13 @@ from ming.utils import LazyProperty | ||
18 | 18 | from ming.orm import FieldProperty, session, Mapper |
19 | 19 | from ming.orm.declarative import MappedClass |
20 | 20 | |
21 | - | |
22 | 21 | from allura.lib.patience import SequenceMatcher |
23 | 22 | from allura.lib import helpers as h |
24 | 23 | from allura.lib import utils |
25 | 24 | |
26 | 25 | from .artifact import Artifact, VersionedArtifact, Feed |
27 | 26 | from .auth import User |
28 | -from .session import repository_orm_session, project_orm_session | |
27 | +from .session import repository_orm_session, project_orm_session, main_doc_session | |
29 | 28 | from .notification import Notification |
30 | 29 | |
31 | 30 | log = logging.getLogger(__name__) |
@@ -49,9 +48,17 @@ class RepositoryImplementation(object): | ||
49 | 48 | raise NotImplementedError, 'commit' |
50 | 49 | |
51 | 50 | def new_commits(self, all_commits=False): # pragma no cover |
52 | - '''Return any commit object_ids in the native repo that are not (yet) stored | |
53 | - in the database in topological order (parents first)''' | |
54 | - raise NotImplementedError, 'commit' | |
51 | + '''Return a list of (oid, commit) in topological order (heads first). | |
52 | + | |
53 | + "commit" is a repo-native object, NOT a Commit object. | |
54 | + If all_commits is False, only return commits not already indexed. | |
55 | + ''' | |
56 | + raise NotImplementedError, 'new_commits' | |
57 | + | |
58 | + def commit_parents(self, commit): | |
59 | + '''Return a list of (oid, commit) for the parents of the given (native) | |
60 | + commit''' | |
61 | + raise NotImplementedError, 'commit_parents' | |
55 | 62 | |
56 | 63 | def commit_context(self, object_id): # pragma no cover |
57 | 64 | '''Returns {'prev':Commit, 'next':Commit}''' |
@@ -281,6 +288,42 @@ class Repository(Artifact): | ||
281 | 288 | content_type, encoding = 'application/octet-stream', None |
282 | 289 | return content_type, encoding |
283 | 290 | |
291 | + def refresh_ancestor_graph(self, commits): | |
292 | + '''Make sure the CommitAncestor collection is up-to-date based on | |
293 | + the given list of (oid, native_commit) commits | |
294 | + ''' | |
295 | + PAGESIZE = 1024 | |
296 | + ca_doc = mapper(CommitAncestor).doc_cls | |
297 | + sess = main_doc_session | |
298 | + ancestor_cache = {} # ancestor_cache[oid] = [ a_oid0, a_oid1...] | |
299 | + def _ancestors(oid, ci, indent=''): | |
300 | + if oid in ancestor_cache: | |
301 | + return ancestor_cache[oid] | |
302 | + stored_ancestors = [] | |
303 | + for ca in sess.find(ca_doc, dict(object_id=oid)): | |
304 | + stored_ancestors.extend(ca.ancestor_ids) | |
305 | + if stored_ancestors: | |
306 | + # Ancestors already stored in MongoDB | |
307 | + ancestor_cache[oid] = stored_ancestors | |
308 | + return stored_ancestors | |
309 | + ancestor_ids = set() | |
310 | + for p_oid, p_ci in self._impl.commit_parents(ci): | |
311 | + ancestor_ids.add(p_oid) | |
312 | + ancestor_ids.update(_ancestors(p_oid, p_ci, indent + ' ')) | |
313 | + result = ancestor_cache[oid] = list(ancestor_ids) | |
314 | + for i in xrange(0, len(result), PAGESIZE): | |
315 | + sess.insert(ca_doc( | |
316 | + dict( | |
317 | + object_id=oid, | |
318 | + ancestor_ids=result[i:i+PAGESIZE]))) | |
319 | + | |
320 | + # Compute graph in chunks to save memory | |
321 | + for i, (oid, ci) in enumerate(reversed(commits)): | |
322 | + _ancestors(oid, ci) | |
323 | + if i and i % PAGESIZE == 0: | |
324 | + log.info('=== Clear ancestor cache === ') | |
325 | + ancestor_cache = {} | |
326 | + | |
284 | 327 | def refresh(self, all_commits=False, notify=True): |
285 | 328 | '''Find any new commits in the repository and update''' |
286 | 329 | self._impl.refresh_heads() |
@@ -288,8 +331,12 @@ class Repository(Artifact): | ||
288 | 331 | session(self).flush() |
289 | 332 | sess = session(Commit) |
290 | 333 | log.info('Refreshing repository %s', self) |
291 | - commit_ids = self._impl.new_commits(all_commits) | |
292 | - log.info('... %d new commits', len(commit_ids)) | |
334 | + commits = self._impl.new_commits(all_commits) | |
335 | + log.info('... %d new commits', len(commits)) | |
336 | + self.refresh_ancestor_graph(commits) | |
337 | + | |
338 | + | |
339 | + return | |
293 | 340 | # Refresh history |
294 | 341 | i=0 |
295 | 342 | seen_object_ids = set() |
@@ -623,6 +670,23 @@ class LogCache(RepoObject): | ||
623 | 670 | lc.object_ids, lc.candidates = repo._impl.log(object_id, 0, 50) |
624 | 671 | return lc |
625 | 672 | |
673 | +class CommitAncestor(MappedClass): | |
674 | + class __mongometa__: | |
675 | + session = repository_orm_session | |
676 | + name='commit_ancestor' | |
677 | + indexes = [ | |
678 | + ('object_id'), ('ancestor_id') ] | |
679 | + | |
680 | + _id = FieldProperty(S.ObjectId) | |
681 | + object_id = FieldProperty(str) | |
682 | + ancestor_ids = FieldProperty([str]) | |
683 | + | |
684 | + @LazyProperty | |
685 | + def ancestor(self): | |
686 | + ci = Commit.query.get(object_id=self.ancestor_id) | |
687 | + if ci is None: return ci | |
688 | + ci.set_context(self.repo) | |
689 | + | |
626 | 690 | class Commit(RepoObject): |
627 | 691 | class __mongometa__: |
628 | 692 | polymorphic_identity='commit' |
@@ -8,7 +8,7 @@ import tg | ||
8 | 8 | import git |
9 | 9 | |
10 | 10 | from ming.base import Object |
11 | -from ming.orm import Mapper, session | |
11 | +from ming.orm import Mapper, session, mapper | |
12 | 12 | from ming.utils import LazyProperty |
13 | 13 | |
14 | 14 | from allura.lib import helpers as h |
@@ -115,20 +115,23 @@ class GitImplementation(M.RepositoryImplementation): | ||
115 | 115 | return result |
116 | 116 | |
117 | 117 | def new_commits(self, all_commits=False): |
118 | - graph = {} | |
119 | - | |
120 | - to_visit = [ self._git.commit(rev=hd.object_id) for hd in self._repo.heads ] | |
121 | - while to_visit: | |
122 | - obj = to_visit.pop() | |
123 | - if obj.hexsha in graph: continue | |
124 | - if not all_commits: | |
125 | - # Look up the object | |
126 | - if M.Commit.query.find(dict(object_id=obj.hexsha)).count(): | |
127 | - graph[obj.hexsha] = set() # mark as parentless | |
128 | - continue | |
129 | - graph[obj.hexsha] = set(p.hexsha for p in obj.parents) | |
130 | - to_visit += obj.parents | |
131 | - return list(topological_sort(graph)) | |
118 | + PAGESIZE=1024 | |
119 | + commit_ids = [ (ci.hexsha, ci) for ci in self._git.iter_commits(topo_order=True) ] | |
120 | + if all_commits: return commit_ids | |
121 | + result = [] | |
122 | + commit_doc = mapper(M.Commit).doc_cls | |
123 | + sess = M.main_doc_session | |
124 | + for i in xrange(0, len(commit_ids), PAGESIZE): | |
125 | + chunk = commit_ids[i:i+PAGESIZE] | |
126 | + found_commit_ids = set( | |
127 | + ci.object_id for ci in sess.find( | |
128 | + commit_doc, object_id={'$in': chunk}, | |
129 | + fields=['_id', 'object_id'])) | |
130 | + result += [ (oid, ci) for (oid, ci) in chunk if oid not in found_commit_ids ] | |
131 | + return result | |
132 | + | |
133 | + def commit_parents(self, ci): | |
134 | + return [ (p_ci.hexsha, p_ci) for p_ci in ci.parents ] | |
132 | 135 | |
133 | 136 | def commit_context(self, commit): |
134 | 137 | prev_ids = commit.parent_ids |