[PATCH 2/2] Resolve stratum and chunk refs in batches

Jannis Pohlmann jannis.pohlmann at codethink.co.uk
Mon Jan 7 18:40:37 GMT 2013


This uses the functionality introduced in the previous commit to
reduce the number of HTTP requests made during builds and generally
while creating source pools.

I noticed a weird thing in resolve_ref() while I wrote the
corresponding batch function resolve_refs(): unless I'm misreading
the code the repository caching at the end of resolve_ref() clones
repositories from the remote repo cache that already failed to
resolve the needed ref remotely. How would they be useful after
cloning? Anyway, resolve_refs() is implemented in the same way
but I added a FIXME note.
---
 morphlib/app.py             | 97 ++++++++++++++++++++++++++++++++++++++++++---
 morphlib/remoterepocache.py |  8 +++-
 2 files changed, 98 insertions(+), 7 deletions(-)

diff --git a/morphlib/app.py b/morphlib/app.py
index 9239bf3..b194911 100755
--- a/morphlib/app.py
+++ b/morphlib/app.py
@@ -280,6 +280,66 @@ class Morph(cliapp.Application):
             absref, tree = repo.resolve_ref(ref)
         return absref, tree
 
+    def resolve_refs(self, refs, lrc, rrc, update=True):
+        resolved = {}
+
+        # First resolve refs in all repositories that are already cached.
+        local_references = [x for x in refs if lrc.has_repo(x[0])]
+        for reponame, ref in local_references:
+            repo = lrc.get_repo(reponame)
+            if update:
+                self.status(msg='Updating cached git repository %(reponame)s',
+                            reponame=reponame)
+                repo.update()
+            absref, tree = repo.resolve_ref(ref)
+            resolved[(reponame, ref)] = {
+                    'repo': reponame,
+                    'repo-url': repo.url,
+                    'ref': ref,
+                    'sha1': absref,
+                    'tree': tree
+            }
+
+        # Then, if we have a remote repo cache, resolve refs in all
+        # repositories that we haven't cached locally yet.
+        if rrc:
+            remote_references = [x for x in refs if not x in local_references]
+            if remote_references:
+                self.status(msg='Resolving %(count)i references via '
+                                'remote repository cache',
+                            count=len(remote_references))
+                resolved_remote_refs = rrc.resolve_refs(remote_references)
+                for reponame, ref in remote_references:
+                    for reference in resolved_remote_refs.keys():
+                            del resolved_remote_refs[reference]
+                    resolved.update(resolved_remote_refs)
+
+        # Lastly, attempt to cache repositories for any ref that has not
+        # been resolved successfully so far.
+        #
+        # FIXME Doesn't this only ever cache repositories from the remote
+        # repo cache that don't have the ref anyway? It is the same that
+        # the resolve_ref() method does though...
+        uncached_references = [x for x in refs if not x in resolved]
+        for reponame, ref in uncached_references:
+            if update:
+                self.status(msg='Caching git repository %(reponame)s',
+                            reponame=reponame)
+                repo = lrc.cache_repo(reponame)
+                repo.update()
+            else:
+                repo = lrc.get_repo(reponame)
+            absref, tree = repo.resolve_ref(ref)
+            resolved[(reponame, ref)] = {
+                    'repo': reponame,
+                    'repo-url': repo.url,
+                    'ref': ref,
+                    'sha1': absref,
+                    'tree': tree
+            }
+
+        return resolved
+
     def traverse_morphs(self, triplets, lrc, rrc, update=True,
                         visit=lambda rn, rf, fn, arf, m: None):
         morph_factory = morphlib.morphologyfactory.MorphologyFactory(lrc, rrc,
@@ -289,6 +349,32 @@ class Morph(cliapp.Application):
         resolved_refs = {}
         resolved_morphologies = {}
 
+        def resolve_refs(morphology, *fields):
+            # Resolve the references used in morphology at once.
+            refs = []
+            for field in fields:
+                if field in morphology and morphology[field]:
+                    refs.extend([(s['repo'], s['ref'])
+                                 for s in morphology[field]])
+            sha1s = self.resolve_refs(refs, lrc, rrc, update)
+
+            # Mark them all as resolved so they are not resolved twice.
+            for info in sha1s.itervalues():
+                if 'error' in info:
+                    raise cliapp.AppException(
+                            'Failed to resolve reference "%s" '
+                            'in repository %s' % (info['ref'], info['repo']))
+                else:
+                    reference = (info['repo'], info['ref'])
+                    resolved_refs[reference] = (info['sha1'], info['tree'])
+
+        def load_morphology(reponame, absref, filename):
+            reference = (reponame, absref, filename)
+            if not reference in resolved_morphologies:
+                resolved_morphologies[reference] = \
+                    morph_factory.get_morphology(*reference)
+            return resolved_morphologies[reference]
+
         while queue:
             reponame, ref, filename = queue.popleft()
             update_repo = update and reponame not in updated_repos
@@ -303,17 +389,18 @@ class Morph(cliapp.Application):
             updated_repos.add(reponame)
 
             # Fetch the (repo, ref, filename) morphology, cache result.
-            reference = (reponame, absref, filename)
-            if not reference in resolved_morphologies:
-                resolved_morphologies[reference] = \
-                    morph_factory.get_morphology(reponame, absref, filename)
-            morphology = resolved_morphologies[reference]
+            morphology = load_morphology(reponame, absref, filename)
 
             visit(reponame, ref, filename, absref, tree, morphology)
+
+            # Resolve the refs of all strata and/or chunks in the
+            # morphology at once.
             if morphology['kind'] == 'system':
+                resolve_refs(morphology, 'strata')
                 queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph'])
                              for s in morphology['strata'])
             elif morphology['kind'] == 'stratum':
+                resolve_refs(morphology, 'build-depends', 'chunks')
                 if morphology['build-depends']:
                     queue.extend((s['repo'], s['ref'], '%s.morph' % s['morph'])
                                  for s in morphology['build-depends'])
diff --git a/morphlib/remoterepocache.py b/morphlib/remoterepocache.py
index 4a73518..2d2d3f4 100644
--- a/morphlib/remoterepocache.py
+++ b/morphlib/remoterepocache.py
@@ -94,7 +94,9 @@ class RemoteRepoCache(object):
         request_data = []
         for n in xrange(0, len(tuples)):
             request_data.append({'repo': urls[n], 'ref': tuples[n][1]})
+        request_data = json.dumps(request_data)
         response_data = self._make_post_request('sha1s', request_data)
+        response_data = json.loads(response_data)
         data = {}
         for n in xrange(0, len(tuples)):
             data[tuples[n]] = {
@@ -102,7 +104,7 @@ class RemoteRepoCache(object):
                 'repo-url': response_data[n]['repo'],
                 'ref': response_data[n]['ref'],
             }
-            if 'error' in resonse_data[n]:
+            if 'error' in response_data[n]:
                 data[tuples[n]]['error'] = response_data[n]['error']
             else:
                 data[tuples[n]]['sha1'] = response_data[n]['sha1']
@@ -130,5 +132,7 @@ class RemoteRepoCache(object):
         if not server_url.endswith('/'):
             server_url += '/'
         url = urlparse.urljoin(server_url, '/1.0/%s' % path)
-        handle = urllib2.urlopen(url, data)
+        request = urllib2.Request(
+                url, data, {'Content-Type': 'application/json'})
+        handle = urllib2.urlopen(request, data)
         return handle.read()
-- 
1.7.11.4





More information about the baserock-dev mailing list