Implement git ref reading purely in Python

Its much faster to read the refs from 114 projects when the reader
is pure Python and just doing file IO than forking 114 git commands
and parsing their output.

The reader caches refs based upon file mtimes.  If any single ref
file has been modified since the last read, we re-read the entire
repository's ref namespace.  This simplifies the code as we don't
need to worry about shooting down symbolic-refs, but it may cause
more IO than is necessary if only one ref gets updated.

This change drops `repo branches` in Android from 1.658s to 0.206s.
Likewise, `repo sync` improves dramatically as well.

Signed-off-by: Shawn O. Pearce <sop@google.com>
diff --git a/git_refs.py b/git_refs.py
new file mode 100644
index 0000000..9851e78
--- /dev/null
+++ b/git_refs.py
@@ -0,0 +1,133 @@
+#
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+HEAD    = 'HEAD'
+R_HEADS = 'refs/heads/'
+R_TAGS  = 'refs/tags/'
+R_PUB   = 'refs/published/'
+R_M     = 'refs/remotes/m/'
+
+
+class GitRefs(object):
+  def __init__(self, gitdir):
+    self._gitdir = gitdir
+    self._phyref = None
+    self._symref = None
+    self._mtime = {}
+
+  @property
+  def all(self):
+    if self._phyref is None or self._NeedUpdate():
+      self._LoadAll()
+    return self._phyref
+
+  def get(self, name):
+    try:
+      return self.all[name]
+    except KeyError:
+      return ''
+
+  def _NeedUpdate(self):
+    for name, mtime in self._mtime.iteritems():
+      try:
+        if mtime != os.path.getmtime(os.path.join(self._gitdir, name)):
+          return True
+      except OSError:
+        return True
+    return False
+
+  def _LoadAll(self):
+    self._phyref = {}
+    self._symref = {}
+    self._mtime = {}
+
+    self._ReadPackedRefs()
+    self._ReadLoose('refs/')
+    self._ReadLoose1(os.path.join(self._gitdir, HEAD), HEAD)
+
+    scan = self._symref
+    attempts = 0
+    while scan and attempts < 5:
+      scan_next = {}
+      for name, dest in scan.iteritems():
+        if dest in self._phyref:
+          self._phyref[name] = self._phyref[dest]
+        else:
+          scan_next[name] = dest
+      scan = scan_next
+      attempts += 1
+
+  def _ReadPackedRefs(self):
+    path = os.path.join(self._gitdir, 'packed-refs')
+    try:
+      fd = open(path, 'r')
+      mtime = os.path.getmtime(path)
+    except IOError:
+      return
+    except OSError:
+      return
+    try:
+      for line in fd:
+        if line[0] == '#':
+          continue
+        if line[0] == '^':
+          continue
+
+        line = line[:-1]
+        p = line.split(' ')
+        id = p[0]
+        name = p[1]
+
+        self._phyref[name] = id
+    finally:
+      fd.close()
+    self._mtime['packed-refs'] = mtime
+
+  def _ReadLoose(self, prefix):
+    base = os.path.join(self._gitdir, prefix)
+    for name in os.listdir(base):
+      p = os.path.join(base, name)
+      if os.path.isdir(p):
+        self._mtime[prefix] = os.path.getmtime(base)
+        self._ReadLoose(prefix + name + '/')
+      elif name.endswith('.lock'):
+        pass
+      else:
+        self._ReadLoose1(p, prefix + name)
+
+  def _ReadLoose1(self, path, name):
+    try:
+      fd = open(path, 'r')
+      mtime = os.path.getmtime(path)
+    except OSError:
+      return
+    except IOError:
+      return
+    try:
+      id = fd.readline()
+    finally:
+      fd.close()
+
+    if not id:
+      return
+    id = id[:-1]
+
+    if id.startswith('ref: '):
+      self._symref[name] = id[5:]
+    else:
+      self._phyref[name] = id
+    self._mtime[name] = mtime
diff --git a/project.py b/project.py
index 311379c..086f0d7 100644
--- a/project.py
+++ b/project.py
@@ -28,11 +28,7 @@
 from error import ManifestInvalidRevisionError
 from remote import Remote
 
-HEAD    = 'HEAD'
-R_HEADS = 'refs/heads/'
-R_TAGS  = 'refs/tags/'
-R_PUB   = 'refs/published/'
-R_M     = 'refs/remotes/m/'
+from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M
 
 def _error(fmt, *args):
   msg = fmt % args
@@ -226,6 +222,7 @@
     else:
       self.work_git = None
     self.bare_git = self._GitGetByExec(self, bare=True)
+    self.bare_ref = GitRefs(gitdir)
 
   @property
   def Exists(self):
@@ -301,7 +298,7 @@
     """Get all existing local branches.
     """
     current = self.CurrentBranch
-    all = self.bare_git.ListRefs()
+    all = self._allrefs
     heads = {}
     pubd = {}
 
@@ -1030,32 +1027,13 @@
 
   @property
   def _allrefs(self):
-    return self.bare_git.ListRefs()
+    return self.bare_ref.all
 
   class _GitGetByExec(object):
     def __init__(self, project, bare):
       self._project = project
       self._bare = bare
 
-    def ListRefs(self, *args):
-      cmdv = ['for-each-ref', '--format=%(objectname) %(refname)']
-      cmdv.extend(args)
-      p = GitCommand(self._project,
-                     cmdv,
-                     bare = self._bare,
-                     capture_stdout = True,
-                     capture_stderr = True)
-      r = {}
-      for line in p.process.stdout:
-        id, name = line[:-1].split(' ', 2)
-        r[name] = id
-      if p.Wait() != 0:
-        raise GitError('%s for-each-ref %s: %s' % (
-                       self._project.name,
-                       str(args),
-                       p.stderr))
-      return r
-
     def LsOthers(self):
       p = GitCommand(self._project,
                      ['ls-files',