Merge "sync: Run gc --auto in parallel"
diff --git a/.pydevproject b/.pydevproject
index b74568a..880abd6 100644
--- a/.pydevproject
+++ b/.pydevproject
@@ -5,6 +5,6 @@
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 <path>/repo</path>
 </pydev_pathproperty>
-<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.4</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
 </pydev_project>
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..9f81ee1
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,301 @@
+# lint Python modules using external checkers.
+#
+# This is the main checker controling the other ones and the reports
+# generation. It is itself both a raw checker and an astng checker in order
+# to:
+# * handle message activation / deactivation at the module level
+# * handle some basic but necessary stats'data (number of classes, methods...)
+#
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Profiled execution.
+profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=SVN
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Set the cache size for astng objects.
+cache-size=500
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable only checker(s) with the given id(s). This option conflicts with the
+# disable-checker option
+#enable-checker=
+
+# Enable all checker(s) except those with the given id(s). This option
+# conflicts with the enable-checker option
+#disable-checker=
+
+# Enable all messages in the listed categories.
+#enable-msg-cat=
+
+# Disable all messages in the listed categories.
+#disable-msg-cat=
+
+# Enable the message(s) with the given id(s).
+enable=RP0004
+
+# Disable the message(s) with the given id(s).
+disable=R0903,R0912,R0913,R0914,R0915,W0141,C0111,C0103,C0323,C0322,C0324,W0603,W0703,R0911,C0301,C0302,R0902,R0904,W0142,W0212,E1101,E1103,R0201,W0201,W0122,W0232,W0311,RP0001,RP0003,RP0101,RP0002,RP0401,RP0701,RP0801
+
+[REPORTS]
+
+# set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html
+output-format=text
+
+# Include message's id in output
+include-ids=yes
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note).You have access to the variables errors warning, statement which
+# respectivly contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (R0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (R0004).
+comment=no
+
+# checks for
+# * unused variables / imports
+# * undefined variables
+# * redefinition of variable from builtins or from an outer scope
+# * use of variable before assigment
+#
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching names used for dummy variables (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+# try to find bugs in the code using type inference
+#
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of classes names for which member attributes should not be checked
+# (useful for classes with attributes dynamicaly set).
+ignored-classes=SQLObject
+
+# When zope mode is activated, consider the acquired-members option to ignore
+# access to some undefined attributes.
+zope=no
+
+# List of members which are usually get through zope's acquisition mecanism and
+# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
+acquired-members=REQUEST,acl_users,aq_parent
+
+
+# checks for :
+# * doc strings
+# * modules / classes / functions / methods / arguments / variables name
+# * number of arguments, local variables, branchs, returns and statements in
+# functions, methods
+# * required module attributes
+# * dangerous default values as arguments
+# * redefinition of function / method / class
+# * uses of the global statement
+#
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=_main|__.*__
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))|(log)$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_,e,d1,d2,v,f,l,d
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=map,filter,apply,input
+
+
+# checks for sign of poor/misdesign:
+# * number of methods, attributes, local variables...
+# * size, complexity of functions, methods
+#
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branchs=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=20
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=30
+
+
+# checks for
+# * external modules dependencies
+# * relative / wildcard imports
+# * cyclic imports
+# * uses of deprecated modules
+#
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report R0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report R0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report R0402 must
+# not be disabled)
+int-import-graph=
+
+
+# checks for :
+# * methods without self as first argument
+# * overridden methods signature
+# * access only to existant members via self
+# * attributes not defined in the __init__ method
+# * supported interfaces implementation
+# * unreachable code
+#
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+
+# checks for similarities and duplicated code. This computation may be
+# memory / CPU intensive, so you should disable it if you experiments some
+# problems.
+#
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+# checks for:
+# * warning notes in the code like FIXME, XXX
+# * PEP 263: source code with non ascii character but no encoding declaration
+#
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+# checks for :
+# * unauthorized constructions
+# * strict indentation
+# * line length
+# * use of <> instead of !=
+#
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).  In repo it is 2 spaces.
+indent-string='  '
diff --git a/SUBMITTING_PATCHES b/SUBMITTING_PATCHES
index cba6741..50e2cf7 100644
--- a/SUBMITTING_PATCHES
+++ b/SUBMITTING_PATCHES
@@ -2,6 +2,7 @@
 
  - Make small logical changes.
  - Provide a meaningful commit message.
+ - Check for coding errors with pylint
  - Make sure all code is under the Apache License, 2.0.
  - Publish your changes for review:
 
@@ -33,7 +34,14 @@
 probably need to split up your commit to finer grained pieces.
 
 
-(2) Check the license
+(2) Check for coding errors with pylint
+
+Run pylint on changed modules using the provided configuration:
+
+  pylint --rcfile=.pylintrc file.py
+
+
+(3) Check the license
 
 repo is licensed under the Apache License, 2.0.
 
@@ -49,7 +57,7 @@
 has been applied and pushed out.
 
 
-(3) Sending your patches.
+(4) Sending your patches.
 
 Do not email your patches to anyone.
 
diff --git a/command.py b/command.py
index 5a5f468..d543e3a 100644
--- a/command.py
+++ b/command.py
@@ -60,6 +60,32 @@
     """
     raise NotImplementedError
 
+  def _ResetPathToProjectMap(self, projects):
+    self._by_path = dict((p.worktree, p) for p in projects)
+
+  def _UpdatePathToProjectMap(self, project):
+    self._by_path[project.worktree] = project
+
+  def _GetProjectByPath(self, path):
+    project = None
+    if os.path.exists(path):
+      oldpath = None
+      while path \
+        and path != oldpath \
+        and path != self.manifest.topdir:
+        try:
+          project = self._by_path[path]
+          break
+        except KeyError:
+          oldpath = path
+          path = os.path.dirname(path)
+    else:
+      try:
+        project = self._by_path[path]
+      except KeyError:
+        pass
+    return project
+
   def GetProjects(self, args, missing_ok=False):
     """A list of projects that match the arguments.
     """
@@ -74,40 +100,38 @@
     groups = [x for x in re.split('[,\s]+', groups) if x]
 
     if not args:
-      for project in all_projects.values():
+      all_projects_list = all_projects.values()
+      derived_projects = []
+      for project in all_projects_list:
+        if project.Registered:
+          # Do not search registered subproject for derived projects
+          # since its parent has been searched already
+          continue
+        derived_projects.extend(project.GetDerivedSubprojects())
+      all_projects_list.extend(derived_projects)
+      for project in all_projects_list:
         if ((missing_ok or project.Exists) and
             project.MatchesGroups(groups)):
           result.append(project)
     else:
-      by_path = None
+      self._ResetPathToProjectMap(all_projects.values())
 
       for arg in args:
         project = all_projects.get(arg)
 
         if not project:
           path = os.path.abspath(arg).replace('\\', '/')
+          project = self._GetProjectByPath(path)
 
-          if not by_path:
-            by_path = dict()
-            for p in all_projects.values():
-              by_path[p.worktree] = p
-
-          if os.path.exists(path):
-            oldpath = None
-            while path \
-              and path != oldpath \
-              and path != self.manifest.topdir:
-              try:
-                project = by_path[path]
-                break
-              except KeyError:
-                oldpath = path
-                path = os.path.dirname(path)
-          else:
-            try:
-              project = by_path[path]
-            except KeyError:
-              pass
+          # If it's not a derived project, update path->project mapping and
+          # search again, as arg might actually point to a derived subproject.
+          if project and not project.Derived:
+            search_again = False
+            for subproject in project.GetDerivedSubprojects():
+              self._UpdatePathToProjectMap(subproject)
+              search_again = True
+            if search_again:
+              project = self._GetProjectByPath(path) or project
 
         if not project:
           raise NoSuchProjectError(arg)
@@ -123,7 +147,7 @@
     result.sort(key=_getpath)
     return result
 
-# pylint: disable-msg=W0223
+# pylint: disable=W0223
 # Pylint warns that the `InteractiveCommand` and `PagedCommand` classes do not
 # override method `Execute` which is abstract in `Command`.  Since that method
 # is always implemented in classes derived from `InteractiveCommand` and
@@ -142,7 +166,7 @@
   def WantPager(self, opt):
     return True
 
-# pylint: enable-msg=W0223
+# pylint: enable=W0223
 
 class MirrorSafeCommand(object):
   """Command permits itself to run within a mirror,
diff --git a/docs/manifest-format.txt b/docs/manifest-format.txt
index f499868..a36af67 100644
--- a/docs/manifest-format.txt
+++ b/docs/manifest-format.txt
@@ -45,7 +45,8 @@
     <!ELEMENT manifest-server (EMPTY)>
     <!ATTLIST url              CDATA #REQUIRED>
   
-    <!ELEMENT project (annotation?)>
+    <!ELEMENT project (annotation?,
+                       project*)>
     <!ATTLIST project name     CDATA #REQUIRED>
     <!ATTLIST project path     CDATA #IMPLIED>
     <!ATTLIST project remote   IDREF #IMPLIED>
@@ -152,7 +153,10 @@
 
 One or more project elements may be specified.  Each element
 describes a single Git repository to be cloned into the repo
-client workspace.
+client workspace.  You may specify Git-submodules by creating a
+nested project.  Git-submodules will be automatically
+recognized and inherit their parent's attributes, but those
+may be overridden by an explicitly specified project element.
 
 Attribute `name`: A unique name for this project.  The project's
 name is appended onto its remote's fetch URL to generate the actual
@@ -163,7 +167,8 @@
 where ${remote_fetch} is the remote's fetch attribute and
 ${project_name} is the project's name attribute.  The suffix ".git"
 is always appended as repo assumes the upstream is a forest of
-bare Git repositories.
+bare Git repositories.  If the project has a parent element, its
+name will be prefixed by the parent's.
 
 The project name must match the name Gerrit knows, if Gerrit is
 being used for code reviews.
@@ -171,6 +176,8 @@
 Attribute `path`: An optional path relative to the top directory
 of the repo client where the Git working directory for this project
 should be placed.  If not supplied the project name is used.
+If the project has a parent element, its path will be prefixed
+by the parent's.
 
 Attribute `remote`: Name of a previously defined remote element.
 If not supplied the remote given by the default element is used.
@@ -190,6 +197,8 @@
 definition is implicitly in the following manifest groups:
 default, name:monkeys, and path:barrel-of.  If you place a project in the
 group "notdefault", it will not be automatically downloaded by repo.
+If the project has a parent element, the `name` and `path` here
+are the prefixed ones.
 
 Element annotation
 ------------------
diff --git a/editor.py b/editor.py
index 62afbb9..489c6cd 100644
--- a/editor.py
+++ b/editor.py
@@ -91,7 +91,7 @@
 
       try:
         rc = subprocess.Popen(args, shell=shell).wait()
-      except OSError, e:
+      except OSError as e:
         raise EditorError('editor failed, %s: %s %s'
           % (str(e), editor, path))
       if rc != 0:
diff --git a/git_command.py b/git_command.py
index 82709b9..a40e6c0 100644
--- a/git_command.py
+++ b/git_command.py
@@ -217,7 +217,7 @@
                            stdin = stdin,
                            stdout = stdout,
                            stderr = stderr)
-    except Exception, e:
+    except Exception as e:
       raise GitError('%s: %s' % (command[1], e))
 
     if ssh_proxy:
diff --git a/git_config.py b/git_config.py
index afaa6f1..ae28855 100644
--- a/git_config.py
+++ b/git_config.py
@@ -449,7 +449,7 @@
     try:
       Trace(': %s', ' '.join(command))
       p = subprocess.Popen(command)
-    except Exception, e:
+    except Exception as e:
       _ssh_master = False
       print >>sys.stderr, \
         '\nwarn: cannot enable ssh control master for %s:%s\n%s' \
@@ -592,9 +592,9 @@
           else:
             host, port = info.split()
             self._review_url = self._SshReviewUrl(userEmail, host, port)
-        except urllib2.HTTPError, e:
+        except urllib2.HTTPError as e:
           raise UploadError('%s: %s' % (self.review, str(e)))
-        except urllib2.URLError, e:
+        except urllib2.URLError as e:
           raise UploadError('%s: %s' % (self.review, str(e)))
 
         REVIEW_CACHE[u] = self._review_url
diff --git a/main.py b/main.py
index 278fd36..d993ee4 100755
--- a/main.py
+++ b/main.py
@@ -146,13 +146,13 @@
           else:
             print >>sys.stderr, 'real\t%dh%dm%.3fs' \
               % (hours, minutes, seconds)
-    except DownloadError, e:
+    except DownloadError as e:
       print >>sys.stderr, 'error: %s' % str(e)
       return 1
-    except ManifestInvalidRevisionError, e:
+    except ManifestInvalidRevisionError as e:
       print >>sys.stderr, 'error: %s' % str(e)
       return 1
-    except NoSuchProjectError, e:
+    except NoSuchProjectError as e:
       if e.name:
         print >>sys.stderr, 'error: project %s not found' % e.name
       else:
@@ -390,14 +390,14 @@
       close_ssh()
   except KeyboardInterrupt:
     result = 1
-  except RepoChangedException, rce:
+  except RepoChangedException as rce:
     # If repo changed, re-exec ourselves.
     #
     argv = list(sys.argv)
     argv.extend(rce.extra_args)
     try:
       os.execv(__file__, argv)
-    except OSError, e:
+    except OSError as e:
       print >>sys.stderr, 'fatal: cannot restart repo after upgrade'
       print >>sys.stderr, 'fatal: %s' % e
       result = 128
diff --git a/manifest_xml.py b/manifest_xml.py
index 04cabaa..11e4ee5 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -180,20 +180,25 @@
       root.appendChild(e)
       root.appendChild(doc.createTextNode(''))
 
-    sort_projects = list(self.projects.keys())
-    sort_projects.sort()
-
-    for p in sort_projects:
-      p = self.projects[p]
+    def output_projects(parent, parent_node, projects):
+      for p in projects:
+        output_project(parent, parent_node, self.projects[p])
 
+    def output_project(parent, parent_node, p):
       if not p.MatchesGroups(groups):
-        continue
+        return
+
+      name = p.name
+      relpath = p.relpath
+      if parent:
+        name = self._UnjoinName(parent.name, name)
+        relpath = self._UnjoinRelpath(parent.relpath, relpath)
 
       e = doc.createElement('project')
-      root.appendChild(e)
-      e.setAttribute('name', p.name)
-      if p.relpath != p.name:
-        e.setAttribute('path', p.relpath)
+      parent_node.appendChild(e)
+      e.setAttribute('name', name)
+      if relpath != name:
+        e.setAttribute('path', relpath)
       if not d.remote or p.remote.name != d.remote.name:
         e.setAttribute('remote', p.remote.name)
       if peg_rev:
@@ -231,6 +236,16 @@
       if p.sync_c:
         e.setAttribute('sync-c', 'true')
 
+      if p.subprojects:
+        sort_projects = [subp.name for subp in p.subprojects]
+        sort_projects.sort()
+        output_projects(p, e, sort_projects)
+
+    sort_projects = [key for key in self.projects.keys()
+                     if not self.projects[key].parent]
+    sort_projects.sort()
+    output_projects(None, root, sort_projects)
+
     if self._repo_hooks_project:
       root.appendChild(doc.createTextNode(''))
       e = doc.createElement('repo-hooks')
@@ -321,7 +336,7 @@
       raise ManifestParseError("no <manifest> in %s" % (path,))
 
     nodes = []
-    for node in manifest.childNodes:  # pylint:disable-msg=W0631
+    for node in manifest.childNodes:  # pylint:disable=W0631
                                       # We only get here if manifest is initialised
         if node.nodeName == 'include':
             name = self._reqatt(node, 'name')
@@ -336,7 +351,7 @@
             # tricky.  actual parsing implementation may vary.
             except (KeyboardInterrupt, RuntimeError, SystemExit):
                 raise
-            except Exception, e:
+            except Exception as e:
                 raise ManifestParseError(
                     "failed parsing included manifest %s: %s", (name, e))
         else:
@@ -383,11 +398,15 @@
     for node in itertools.chain(*node_list):
       if node.nodeName == 'project':
         project = self._ParseProject(node)
-        if self._projects.get(project.name):
-          raise ManifestParseError(
-              'duplicate project %s in %s' %
-              (project.name, self.manifestFile))
-        self._projects[project.name] = project
+        def recursively_add_projects(project):
+          if self._projects.get(project.name):
+            raise ManifestParseError(
+                'duplicate project %s in %s' %
+                (project.name, self.manifestFile))
+          self._projects[project.name] = project
+          for subproject in project.subprojects:
+            recursively_add_projects(subproject)
+        recursively_add_projects(project)
       if node.nodeName == 'repo-hooks':
         # Get the name of the project and the (space-separated) list of enabled.
         repo_hooks_project = self._reqatt(node, 'in-project')
@@ -537,11 +556,19 @@
 
     return '\n'.join(cleanLines)
 
+  def _JoinName(self, parent_name, name):
+    return os.path.join(parent_name, name)
+
-  def _ParseProject(self, node):
+  def _UnjoinName(self, parent_name, name):
+    return os.path.relpath(name, parent_name)
+
+  def _ParseProject(self, node, parent = None):
     """
     reads a <project> element from the manifest file
     """
     name = self._reqatt(node, 'name')
+    if parent:
+      name = self._JoinName(parent.name, name)
 
     remote = self._get_remote(node)
     if remote is None:
@@ -586,37 +613,66 @@
       groups = node.getAttribute('groups')
     groups = [x for x in re.split('[,\s]+', groups) if x]
 
-    default_groups = ['all', 'name:%s' % name, 'path:%s' % path]
-    groups.extend(set(default_groups).difference(groups))
-
-    if self.IsMirror:
-      worktree = None
-      gitdir = os.path.join(self.topdir, '%s.git' % name)
+    if parent is None:
+      relpath, worktree, gitdir = self.GetProjectPaths(name, path)
     else:
-      worktree = os.path.join(self.topdir, path).replace('\\', '/')
-      gitdir = os.path.join(self.repodir, 'projects/%s.git' % path)
+      relpath, worktree, gitdir = self.GetSubprojectPaths(parent, path)
+
+    default_groups = ['all', 'name:%s' % name, 'path:%s' % relpath]
+    groups.extend(set(default_groups).difference(groups))
 
     project = Project(manifest = self,
                       name = name,
                       remote = remote.ToRemoteSpec(name),
                       gitdir = gitdir,
                       worktree = worktree,
-                      relpath = path,
+                      relpath = relpath,
                       revisionExpr = revisionExpr,
                       revisionId = None,
                       rebase = rebase,
                       groups = groups,
                       sync_c = sync_c,
-                      upstream = upstream)
+                      upstream = upstream,
+                      parent = parent)
 
     for n in node.childNodes:
       if n.nodeName == 'copyfile':
         self._ParseCopyFile(project, n)
       if n.nodeName == 'annotation':
         self._ParseAnnotation(project, n)
+      if n.nodeName == 'project':
+        project.subprojects.append(self._ParseProject(n, parent = project))
 
     return project
 
+  def GetProjectPaths(self, name, path):
+    relpath = path
+    if self.IsMirror:
+      worktree = None
+      gitdir = os.path.join(self.topdir, '%s.git' % name)
+    else:
+      worktree = os.path.join(self.topdir, path).replace('\\', '/')
+      gitdir = os.path.join(self.repodir, 'projects', '%s.git' % path)
+    return relpath, worktree, gitdir
+
+  def GetSubprojectName(self, parent, submodule_path):
+    return os.path.join(parent.name, submodule_path)
+
+  def _JoinRelpath(self, parent_relpath, relpath):
+    return os.path.join(parent_relpath, relpath)
+
+  def _UnjoinRelpath(self, parent_relpath, relpath):
+    return os.path.relpath(relpath, parent_relpath)
+
+  def GetSubprojectPaths(self, parent, path):
+    relpath = self._JoinRelpath(parent.relpath, path)
+    gitdir = os.path.join(parent.gitdir, 'subprojects', '%s.git' % path)
+    if self.IsMirror:
+      worktree = None
+    else:
+      worktree = os.path.join(parent.worktree, path).replace('\\', '/')
+    return relpath, worktree, gitdir
+
   def _ParseCopyFile(self, project, node):
     src = self._reqatt(node, 'src')
     dest = self._reqatt(node, 'dest')
diff --git a/project.py b/project.py
index 099d0b5..c5ee50f 100644
--- a/project.py
+++ b/project.py
@@ -22,6 +22,7 @@
 import stat
 import subprocess
 import sys
+import tempfile
 import time
 
 from color import Coloring
@@ -484,7 +485,28 @@
                rebase = True,
                groups = None,
                sync_c = False,
-               upstream = None):
+               upstream = None,
+               parent = None,
+               is_derived = False):
+    """Init a Project object.
+
+    Args:
+      manifest: The XmlManifest object.
+      name: The `name` attribute of manifest.xml's project element.
+      remote: RemoteSpec object specifying its remote's properties.
+      gitdir: Absolute path of git directory.
+      worktree: Absolute path of git working tree.
+      relpath: Relative path of git working tree to repo's top directory.
+      revisionExpr: The `revision` attribute of manifest.xml's project element.
+      revisionId: git commit id for checking out.
+      rebase: The `rebase` attribute of manifest.xml's project element.
+      groups: The `groups` attribute of manifest.xml's project element.
+      sync_c: The `sync-c` attribute of manifest.xml's project element.
+      upstream: The `upstream` attribute of manifest.xml's project element.
+      parent: The parent Project object.
+      is_derived: False if the project was explicitly defined in the manifest;
+                  True if the project is a discovered submodule.
+    """
     self.manifest = manifest
     self.name = name
     self.remote = remote
@@ -507,6 +529,9 @@
     self.groups = groups
     self.sync_c = sync_c
     self.upstream = upstream
+    self.parent = parent
+    self.is_derived = is_derived
+    self.subprojects = []
 
     self.snapshots = {}
     self.copyfiles = []
@@ -527,6 +552,14 @@
     self.enabled_repo_hooks = []
 
   @property
+  def Registered(self):
+    return self.parent and not self.is_derived
+
+  @property
+  def Derived(self):
+    return self.is_derived
+
+  @property
   def Exists(self):
     return os.path.isdir(self.gitdir)
 
@@ -1044,7 +1077,7 @@
 
       try:
         self._Checkout(revid, quiet=True)
-      except GitError, e:
+      except GitError as e:
         syncbuf.fail(self, e)
         return
       self._CopyFiles()
@@ -1066,7 +1099,7 @@
                    branch.name)
       try:
         self._Checkout(revid, quiet=True)
-      except GitError, e:
+      except GitError as e:
         syncbuf.fail(self, e)
         return
       self._CopyFiles()
@@ -1151,7 +1184,7 @@
       try:
         self._ResetHard(revid)
         self._CopyFiles()
-      except GitError, e:
+      except GitError as e:
         syncbuf.fail(self, e)
         return
     else:
@@ -1370,6 +1403,150 @@
     return kept
 
 
+## Submodule Management ##
+
+  def GetRegisteredSubprojects(self):
+    result = []
+    def rec(subprojects):
+      if not subprojects:
+        return
+      result.extend(subprojects)
+      for p in subprojects:
+        rec(p.subprojects)
+    rec(self.subprojects)
+    return result
+
+  def _GetSubmodules(self):
+    # Unfortunately we cannot call `git submodule status --recursive` here
+    # because the working tree might not exist yet, and it cannot be used
+    # without a working tree in its current implementation.
+
+    def get_submodules(gitdir, rev):
+      # Parse .gitmodules for submodule sub_paths and sub_urls
+      sub_paths, sub_urls = parse_gitmodules(gitdir, rev)
+      if not sub_paths:
+        return []
+      # Run `git ls-tree` to read SHAs of submodule object, which happen to be
+      # revision of submodule repository
+      sub_revs = git_ls_tree(gitdir, rev, sub_paths)
+      submodules = []
+      for sub_path, sub_url in zip(sub_paths, sub_urls):
+        try:
+          sub_rev = sub_revs[sub_path]
+        except KeyError:
+          # Ignore non-exist submodules
+          continue
+        submodules.append((sub_rev, sub_path, sub_url))
+      return submodules
+
+    re_path = re.compile(r'submodule.(\w+).path')
+    re_url = re.compile(r'submodule.(\w+).url')
+    def parse_gitmodules(gitdir, rev):
+      cmd = ['cat-file', 'blob', '%s:.gitmodules' % rev]
+      try:
+        p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True,
+                       bare = True, gitdir = gitdir)
+      except GitError:
+        return [], []
+      if p.Wait() != 0:
+        return [], []
+
+      gitmodules_lines = []
+      fd, temp_gitmodules_path = tempfile.mkstemp()
+      try:
+        os.write(fd, p.stdout)
+        os.close(fd)
+        cmd = ['config', '--file', temp_gitmodules_path, '--list']
+        p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True,
+                       bare = True, gitdir = gitdir)
+        if p.Wait() != 0:
+          return [], []
+        gitmodules_lines = p.stdout.split('\n')
+      except GitError:
+        return [], []
+      finally:
+        os.remove(temp_gitmodules_path)
+
+      names = set()
+      paths = {}
+      urls = {}
+      for line in gitmodules_lines:
+        if not line:
+          continue
+        key, value = line.split('=')
+        m = re_path.match(key)
+        if m:
+          names.add(m.group(1))
+          paths[m.group(1)] = value
+          continue
+        m = re_url.match(key)
+        if m:
+          names.add(m.group(1))
+          urls[m.group(1)] = value
+          continue
+      names = sorted(names)
+      return [paths[name] for name in names], [urls[name] for name in names]
+
+    def git_ls_tree(gitdir, rev, paths):
+      cmd = ['ls-tree', rev, '--']
+      cmd.extend(paths)
+      try:
+        p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True,
+                       bare = True, gitdir = gitdir)
+      except GitError:
+        return []
+      if p.Wait() != 0:
+        return []
+      objects = {}
+      for line in p.stdout.split('\n'):
+        if not line.strip():
+          continue
+        object_rev, object_path = line.split()[2:4]
+        objects[object_path] = object_rev
+      return objects
+
+    try:
+      rev = self.GetRevisionId()
+    except GitError:
+      return []
+    return get_submodules(self.gitdir, rev)
+
+  def GetDerivedSubprojects(self):
+    result = []
+    if not self.Exists:
+      # If git repo does not exist yet, querying its submodules will
+      # mess up its states; so return here.
+      return result
+    for rev, path, url in self._GetSubmodules():
+      name = self.manifest.GetSubprojectName(self, path)
+      project = self.manifest.projects.get(name)
+      if project and project.Registered:
+        # If it has been registered, skip it because we are searching
+        # derived subprojects, but search for its derived subprojects.
+        result.extend(project.GetDerivedSubprojects())
+        continue
+      relpath, worktree, gitdir = self.manifest.GetSubprojectPaths(self, path)
+      remote = RemoteSpec(self.remote.name,
+                          url = url,
+                          review = self.remote.review)
+      subproject = Project(manifest = self.manifest,
+                           name = name,
+                           remote = remote,
+                           gitdir = gitdir,
+                           worktree = worktree,
+                           relpath = relpath,
+                           revisionExpr = self.revisionExpr,
+                           revisionId = rev,
+                           rebase = self.rebase,
+                           groups = self.groups,
+                           sync_c = self.sync_c,
+                           parent = self,
+                           is_derived = True)
+      result.append(subproject)
+      result.extend(subproject.GetDerivedSubprojects())
+    return result
+
+
 ## Direct Git Commands ##
 
   def _RemoteFetch(self, name=None,
@@ -1723,7 +1900,7 @@
           continue
       try:
         os.symlink(os.path.relpath(stock_hook, os.path.dirname(dst)), dst)
-      except OSError, e:
+      except OSError as e:
         if e.errno == errno.EPERM:
           raise GitError('filesystem must support symlinks')
         else:
@@ -1786,7 +1963,7 @@
             os.symlink(os.path.relpath(src, os.path.dirname(dst)), dst)
           else:
             raise GitError('cannot overwrite a local work tree')
-        except OSError, e:
+        except OSError as e:
           if e.errno == errno.EPERM:
             raise GitError('filesystem must support symlinks')
           else:
diff --git a/repo b/repo
index 32cd178..7942851 100755
--- a/repo
+++ b/repo
@@ -185,7 +185,7 @@
   if not os.path.isdir(repodir):
     try:
       os.mkdir(repodir)
-    except OSError, e:
+    except OSError as e:
       print >>sys.stderr, \
             'fatal: cannot make %s directory: %s' % (
             repodir, e.strerror)
@@ -221,7 +221,7 @@
   cmd = [GIT, '--version']
   try:
     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
-  except OSError, e:
+  except OSError as e:
     print >>sys.stderr
     print >>sys.stderr, "fatal: '%s' is not available" % GIT
     print >>sys.stderr, 'fatal: %s' % e
@@ -268,7 +268,7 @@
   if not os.path.isdir(home_dot_repo):
     try:
       os.mkdir(home_dot_repo)
-    except OSError, e:
+    except OSError as e:
       print >>sys.stderr, \
             'fatal: cannot make %s directory: %s' % (
             home_dot_repo, e.strerror)
@@ -277,7 +277,7 @@
   if not os.path.isdir(gpg_dir):
     try:
       os.mkdir(gpg_dir, 0700)
-    except OSError, e:
+    except OSError as e:
       print >>sys.stderr, \
             'fatal: cannot make %s directory: %s' % (
             gpg_dir, e.strerror)
@@ -291,7 +291,7 @@
     proc = subprocess.Popen(cmd,
                             env = env,
                             stdin = subprocess.PIPE)
-  except OSError, e:
+  except OSError as e:
     if not quiet:
       print >>sys.stderr, 'warning: gpg (GnuPG) is not available.'
       print >>sys.stderr, 'warning: Installing it is strongly encouraged.'
@@ -392,13 +392,13 @@
   try:
     try:
       r = urllib2.urlopen(url)
-    except urllib2.HTTPError, e:
+    except urllib2.HTTPError as e:
       if e.code == 404:
         return False
       print >>sys.stderr, 'fatal: Cannot get %s' % url
       print >>sys.stderr, 'fatal: HTTP error %s' % e.code
       raise CloneFailure()
-    except urllib2.URLError, e:
+    except urllib2.URLError as e:
       print >>sys.stderr, 'fatal: Cannot get %s' % url
       print >>sys.stderr, 'fatal: error %s' % e.reason
       raise CloneFailure()
@@ -427,7 +427,7 @@
   """
   try:
     os.mkdir(local)
-  except OSError, e:
+  except OSError as e:
     print >>sys.stderr, \
           'fatal: cannot make %s directory: %s' \
           % (local, e.strerror)
@@ -436,7 +436,7 @@
   cmd = [GIT, 'init', '--quiet']
   try:
     proc = subprocess.Popen(cmd, cwd = local)
-  except OSError, e:
+  except OSError as e:
     print >>sys.stderr
     print >>sys.stderr, "fatal: '%s' is not available" % GIT
     print >>sys.stderr, 'fatal: %s' % e
@@ -699,7 +699,7 @@
   me.extend(extra_args)
   try:
     os.execv(repo_main, me)
-  except OSError, e:
+  except OSError as e:
     print >>sys.stderr, "fatal: unable to start %s" % repo_main
     print >>sys.stderr, "fatal: %s" % e
     sys.exit(148)
diff --git a/subcmds/forall.py b/subcmds/forall.py
index 2ece95e..b633b7d 100644
--- a/subcmds/forall.py
+++ b/subcmds/forall.py
@@ -143,14 +143,14 @@
           break
       else:
         cn = None
-      # pylint: disable-msg=W0631
+      # pylint: disable=W0631
       if cn and cn in _CAN_COLOR:
         class ColorCmd(Coloring):
           def __init__(self, config, cmd):
             Coloring.__init__(self, config, cmd)
         if ColorCmd(self.manifest.manifestProject.config, cn).is_on:
           cmd.insert(cmd.index(cn) + 1, '--color')
-      # pylint: enable-msg=W0631
+      # pylint: enable=W0631
 
     mirror = self.manifest.IsMirror
     out = ForallColoring(self.manifest.manifestProject.config)
diff --git a/subcmds/init.py b/subcmds/init.py
index 007667e..b6b9807 100644
--- a/subcmds/init.py
+++ b/subcmds/init.py
@@ -207,7 +207,7 @@
 
     try:
       self.manifest.Link(name)
-    except ManifestParseError, e:
+    except ManifestParseError as e:
       print >>sys.stderr, "fatal: manifest '%s' not available" % name
       print >>sys.stderr, 'fatal: %s' % str(e)
       sys.exit(1)
diff --git a/subcmds/list.py b/subcmds/list.py
index 2be8257..6058a75 100644
--- a/subcmds/list.py
+++ b/subcmds/list.py
@@ -13,13 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import re
+
 from command import Command, MirrorSafeCommand
 
 class List(Command, MirrorSafeCommand):
   common = True
   helpSummary = "List projects and their associated directories"
   helpUsage = """
-%prog [<project>...]
+%prog [-f] [<project>...]
+%prog [-f] -r str1 [str2]..."
 """
   helpDescription = """
 List all projects; pass '.' to list the project for the cwd.
@@ -27,6 +30,14 @@
 This is similar to running: repo forall -c 'echo "$REPO_PATH : $REPO_PROJECT"'.
 """
 
+  def _Options(self, p, show_smart=True):
+    p.add_option('-r', '--regex',
+                 dest='regex', action='store_true',
+                 help="Filter the project list based on regex or wildcard matching of strings")
+    p.add_option('-f', '--fullpath',
+                 dest='fullpath', action='store_true',
+                 help="Display the full work tree path instead of the relative path")
+
   def Execute(self, opt, args):
     """List all projects and the associated directories.
 
@@ -35,14 +46,33 @@
     discoverable.
 
     Args:
-      opt: The options.  We don't take any.
+      opt: The options.
       args: Positional args.  Can be a list of projects to list, or empty.
     """
-    projects = self.GetProjects(args)
+    if not opt.regex:
+      projects = self.GetProjects(args)
+    else:
+      projects = self.FindProjects(args)
+
+    def _getpath(x):
+      if opt.fullpath:
+        return x.worktree
+      return x.relpath
 
     lines = []
     for project in projects:
-      lines.append("%s : %s" % (project.relpath, project.name))
+      lines.append("%s : %s" % (_getpath(project), project.name))
 
     lines.sort()
     print '\n'.join(lines)
+
+  def FindProjects(self, args):
+    result = []
+    for project in self.GetProjects(''):
+      for arg in args:
+        pattern = re.compile(r'%s' % arg, re.IGNORECASE)
+        if pattern.search(project.name) or pattern.search(project.relpath):
+          result.append(project)
+          break
+    result.sort(key=lambda project: project.relpath)
+    return result
diff --git a/subcmds/sync.py b/subcmds/sync.py
index 9e4a975..a4ca344 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -474,7 +474,7 @@
               # in the .netrc file.
               print >>sys.stderr, 'No credentials found for %s in .netrc' % \
                                   parse_result.hostname
-            except netrc.NetrcParseError, e:
+            except netrc.NetrcParseError as e:
               print >>sys.stderr, 'Error parsing .netrc file: %s' % e
 
         if (username and password):
@@ -521,11 +521,11 @@
         else:
           print >>sys.stderr, 'error: %s' % manifest_str
           sys.exit(1)
-      except (socket.error, IOError, xmlrpclib.Fault), e:
+      except (socket.error, IOError, xmlrpclib.Fault) as e:
         print >>sys.stderr, 'error: cannot connect to manifest server %s:\n%s' % (
             self.manifest.manifest_server, e)
         sys.exit(1)
-      except xmlrpclib.ProtocolError, e:
+      except xmlrpclib.ProtocolError as e:
         print >>sys.stderr, 'error: cannot connect to manifest server %s:\n%d %s' % (
             self.manifest.manifest_server, e.errcode, e.errmsg)
         sys.exit(1)
@@ -562,12 +562,31 @@
       to_fetch.extend(all_projects)
       to_fetch.sort(key=self._fetch_times.Get, reverse=True)
 
-      self._Fetch(to_fetch, opt)
+      fetched = self._Fetch(to_fetch, opt)
       _PostRepoFetch(rp, opt.no_repo_verify)
       if opt.network_only:
         # bail out now; the rest touches the working tree
         return
 
+      # Iteratively fetch missing and/or nested unregistered submodules
+      previously_missing_set = set()
+      while True:
+        self.manifest._Unload()
+        all_projects = self.GetProjects(args, missing_ok=True)
+        missing = []
+        for project in all_projects:
+          if project.gitdir not in fetched:
+            missing.append(project)
+        if not missing:
+          break
+        # Stop us from non-stopped fetching actually-missing repos: If set of
+        # missing repos has not been changed from last fetch, we break.
+        missing_set = set(p.name for p in missing)
+        if previously_missing_set == missing_set:
+          break
+        previously_missing_set = missing_set
+        fetched.update(self._Fetch(missing, opt))
+
     if self.manifest.IsMirror:
       # bail out now, we have no working tree
       return
diff --git a/subcmds/upload.py b/subcmds/upload.py
index 685e342..84a5e44 100644
--- a/subcmds/upload.py
+++ b/subcmds/upload.py
@@ -329,7 +329,7 @@
 
         branch.UploadForReview(people, auto_topic=opt.auto_topic, draft=opt.draft)
         branch.uploaded = True
-      except UploadError, e:
+      except UploadError as e:
         branch.error = e
         branch.uploaded = False
         have_errors = True
@@ -384,7 +384,7 @@
       pending_proj_names = [project.name for (project, avail) in pending]
       try:
         hook.Run(opt.allow_all_hooks, project_list=pending_proj_names)
-      except HookError, e:
+      except HookError as e:
         print >>sys.stderr, "ERROR: %s" % str(e)
         return