sync: Run gc --auto in parallel
We can't just let this run wild with a high (or even low) -j, since
that would hose a system. Instead, limit the total number of threads
across all git gc subprocesses to the number of CPUs reported by the
multiprocessing module (available in Python 2.6 and above).
Change-Id: Icca0161a1e6116ffa5f7cfc6f5faecda510a7fb9
diff --git a/subcmds/sync.py b/subcmds/sync.py
index b83f2d4..9e4a975 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -39,6 +39,11 @@
def _rlimit_nofile():
return (256, 256)
+try:
+ import multiprocessing
+except ImportError:
+ multiprocessing = None
+
from git_command import GIT
from git_refs import R_HEADS, HEAD
from project import Project
@@ -299,10 +304,56 @@
pm.end()
self._fetch_times.Save()
- for project in projects:
- project.bare_git.gc('--auto')
+
+ self._GCProjects(projects)
return fetched
+ def _GCProjects(self, projects):
+ if multiprocessing:
+ cpu_count = multiprocessing.cpu_count()
+ else:
+ cpu_count = 1
+ jobs = min(self.jobs, cpu_count)
+
+ if jobs < 2:
+ for project in projects:
+ project.bare_git.gc('--auto')
+ return
+
+ config = {'pack.threads': cpu_count / jobs if cpu_count > jobs else 1}
+
+ threads = set()
+ sem = _threading.Semaphore(jobs)
+ err_event = _threading.Event()
+
+ def GC(project):
+ try:
+ try:
+ project.bare_git.gc('--auto', config=config)
+ except GitError:
+ err_event.set()
+ except:
+ err_event.set()
+ raise
+ finally:
+ sem.release()
+
+ for project in projects:
+ if err_event.isSet():
+ break
+ sem.acquire()
+ t = _threading.Thread(target=GC, args=(project,))
+ t.daemon = True
+ threads.add(t)
+ t.start()
+
+ for t in threads:
+ t.join()
+
+ if err_event.isSet():
+ print >>sys.stderr, '\nerror: Exited sync due to gc errors'
+ sys.exit(1)
+
def UpdateProjectList(self):
new_project_paths = []
for project in self.GetProjects(None, missing_ok=True):