Represent git-submodule as nested projects

We need a representation of git-submodule in repo; otherwise repo will
not sync submodules, and leave workspace in a broken state.  Of course
this will not be a problem if all projects are owned by the owner of the
manifest file, who may simply choose not to use git-submodule in all
projects.  However, this is not possible in practice because manifest
file owner is unlikely to own all upstream projects.

As git submodules are simply git repositories, it is natural to treat
them as plain repo projects that live inside a repo project.  That is,
we could use recursively declared projects to denote the is-submodule
relation of git repositories.

The behavior of repo remains the same to projects that do not have a
sub-project within.  As for parent projects, repo fetches them and their
sub-projects as normal projects, and then checks out subprojects at the
commit specified in parent's commit object.  The sub-project is fetched
at a path relative to parent project's working directory; so the path
specified in manifest file should match that of .gitmodules file.

If a submodule is not registered in repo manifest, repo will derive its
properties from itself and its parent project, which might not always be
correct.  In such cases, the subproject is called a derived subproject.

To a user, a sub-project is merely a git-submodule; so all tips of
working with a git-submodule apply here, too.  For example, you should
not run `repo sync` in a parent repository if its submodule is dirty.

Change-Id: I541e9e2ac1a70304272dbe09724572aa1004eb5c
diff --git a/manifest_xml.py b/manifest_xml.py
index 04cabaa..a2a56e9 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -180,20 +180,25 @@
       root.appendChild(e)
       root.appendChild(doc.createTextNode(''))
 
-    sort_projects = list(self.projects.keys())
-    sort_projects.sort()
-
-    for p in sort_projects:
-      p = self.projects[p]
+    def output_projects(parent, parent_node, projects):
+      for p in projects:
+        output_project(parent, parent_node, self.projects[p])
 
+    def output_project(parent, parent_node, p):
       if not p.MatchesGroups(groups):
-        continue
+        return
+
+      name = p.name
+      relpath = p.relpath
+      if parent:
+        name = self._UnjoinName(parent.name, name)
+        relpath = self._UnjoinRelpath(parent.relpath, relpath)
 
       e = doc.createElement('project')
-      root.appendChild(e)
-      e.setAttribute('name', p.name)
-      if p.relpath != p.name:
-        e.setAttribute('path', p.relpath)
+      parent_node.appendChild(e)
+      e.setAttribute('name', name)
+      if relpath != name:
+        e.setAttribute('path', relpath)
       if not d.remote or p.remote.name != d.remote.name:
         e.setAttribute('remote', p.remote.name)
       if peg_rev:
@@ -231,6 +236,16 @@
       if p.sync_c:
         e.setAttribute('sync-c', 'true')
 
+      if p.subprojects:
+        sort_projects = [subp.name for subp in p.subprojects]
+        sort_projects.sort()
+        output_projects(p, e, sort_projects)
+
+    sort_projects = [key for key in self.projects.keys()
+                     if not self.projects[key].parent]
+    sort_projects.sort()
+    output_projects(None, root, sort_projects)
+
     if self._repo_hooks_project:
       root.appendChild(doc.createTextNode(''))
       e = doc.createElement('repo-hooks')
@@ -383,11 +398,15 @@
     for node in itertools.chain(*node_list):
       if node.nodeName == 'project':
         project = self._ParseProject(node)
-        if self._projects.get(project.name):
-          raise ManifestParseError(
-              'duplicate project %s in %s' %
-              (project.name, self.manifestFile))
-        self._projects[project.name] = project
+        def recursively_add_projects(project):
+          if self._projects.get(project.name):
+            raise ManifestParseError(
+                'duplicate project %s in %s' %
+                (project.name, self.manifestFile))
+          self._projects[project.name] = project
+          for subproject in project.subprojects:
+            recursively_add_projects(subproject)
+        recursively_add_projects(project)
       if node.nodeName == 'repo-hooks':
         # Get the name of the project and the (space-separated) list of enabled.
         repo_hooks_project = self._reqatt(node, 'in-project')
@@ -537,11 +556,19 @@
 
     return '\n'.join(cleanLines)
 
+  def _JoinName(self, parent_name, name):
+    return os.path.join(parent_name, name)
+
-  def _ParseProject(self, node):
+  def _UnjoinName(self, parent_name, name):
+    return os.path.relpath(name, parent_name)
+
+  def _ParseProject(self, node, parent = None):
     """
     reads a <project> element from the manifest file
     """
     name = self._reqatt(node, 'name')
+    if parent:
+      name = self._JoinName(parent.name, name)
 
     remote = self._get_remote(node)
     if remote is None:
@@ -586,37 +613,66 @@
       groups = node.getAttribute('groups')
     groups = [x for x in re.split('[,\s]+', groups) if x]
 
-    default_groups = ['all', 'name:%s' % name, 'path:%s' % path]
-    groups.extend(set(default_groups).difference(groups))
-
-    if self.IsMirror:
-      worktree = None
-      gitdir = os.path.join(self.topdir, '%s.git' % name)
+    if parent is None:
+      relpath, worktree, gitdir = self.GetProjectPaths(name, path)
     else:
-      worktree = os.path.join(self.topdir, path).replace('\\', '/')
-      gitdir = os.path.join(self.repodir, 'projects/%s.git' % path)
+      relpath, worktree, gitdir = self.GetSubprojectPaths(parent, path)
+
+    default_groups = ['all', 'name:%s' % name, 'path:%s' % relpath]
+    groups.extend(set(default_groups).difference(groups))
 
     project = Project(manifest = self,
                       name = name,
                       remote = remote.ToRemoteSpec(name),
                       gitdir = gitdir,
                       worktree = worktree,
-                      relpath = path,
+                      relpath = relpath,
                       revisionExpr = revisionExpr,
                       revisionId = None,
                       rebase = rebase,
                       groups = groups,
                       sync_c = sync_c,
-                      upstream = upstream)
+                      upstream = upstream,
+                      parent = parent)
 
     for n in node.childNodes:
       if n.nodeName == 'copyfile':
         self._ParseCopyFile(project, n)
       if n.nodeName == 'annotation':
         self._ParseAnnotation(project, n)
+      if n.nodeName == 'project':
+        project.subprojects.append(self._ParseProject(n, parent = project))
 
     return project
 
+  def GetProjectPaths(self, name, path):
+    relpath = path
+    if self.IsMirror:
+      worktree = None
+      gitdir = os.path.join(self.topdir, '%s.git' % name)
+    else:
+      worktree = os.path.join(self.topdir, path).replace('\\', '/')
+      gitdir = os.path.join(self.repodir, 'projects', '%s.git' % path)
+    return relpath, worktree, gitdir
+
+  def GetSubprojectName(self, parent, submodule_path):
+    return os.path.join(parent.name, submodule_path)
+
+  def _JoinRelpath(self, parent_relpath, relpath):
+    return os.path.join(parent_relpath, relpath)
+
+  def _UnjoinRelpath(self, parent_relpath, relpath):
+    return os.path.relpath(relpath, parent_relpath)
+
+  def GetSubprojectPaths(self, parent, path):
+    relpath = self._JoinRelpath(parent.relpath, path)
+    gitdir = os.path.join(parent.gitdir, 'subprojects', '%s.git' % path)
+    if self.IsMirror:
+      worktree = None
+    else:
+      worktree = os.path.join(parent.worktree, path).replace('\\', '/')
+    return relpath, worktree, gitdir
+
   def _ParseCopyFile(self, project, node):
     src = self._reqatt(node, 'src')
     dest = self._reqatt(node, 'dest')