Support repo-level pre-upload hook and prep for future hooks.

All repo-level hooks are expected to live in a single project at the
top level of that project.  The name of the hooks project is provided
in the manifest.xml.  The manifest also lists which hooks are enabled
to make it obvious if a file somehow failed to sync down (or got
deleted).

Before running any hook, we will prompt the user to make sure that it
is OK.  A user can deny running the hook, allow once, or allow
"forever" (until hooks change).  This tries to keep with the git
spirit of not automatically running anything on the user's computer
that got synced down.  Note that individual repo commands can add
always options to avoid these prompts as they see fit (see below for
the 'upload' options).

When hooks are run, they are loaded into the current interpreter (the
one running repo) and their main() function is run.  This mechanism is
used (instead of using subprocess) to make it easier to expand to a
richer hook interface in the future.  During loading, the
interpreter's sys.path is updated to contain the directory containing
the hooks so that hooks can be split into multiple files.

The upload command has two options that control hook behavior:
  - no-verify=False, verify=False (DEFAULT):
    If stdout is a tty, can prompt about running upload hooks if needed.
    If user denies running hooks, the upload is cancelled.  If stdout is
    not a tty and we would need to prompt about upload hooks, upload is
    cancelled.
  - no-verify=False, verify=True:
    Always run upload hooks with no prompt.
  - no-verify=True, verify=False:
    Never run upload hooks, but upload anyway (AKA bypass hooks).
  - no-verify=True, verify=True:
    Invalid

Sample bit of manifest.xml code for enabling hooks (assumes you have a
project named 'hooks' where hooks are stored):
  <repo-hooks in-project="hooks" enabled-list="pre-upload" />

Sample main() function in pre-upload.py in hooks directory:
  def main(project_list, **kwargs):
    print ('These projects will be uploaded: %s' %
           ', '.join(project_list))
    print ('I am being a good boy and ignoring anything in kwargs\n'
           'that I don\'t understand.')
    print 'I fail 50% of the time.  How flaky.'
    if random.random() <= .5:
      raise Exception('Pre-upload hook failed.  Have a nice day.')

Change-Id: I5cefa2cd5865c72589263cf8e2f152a43c122f70
diff --git a/docs/manifest-format.txt b/docs/manifest-format.txt
index 2e1c8c3..c76df80 100644
--- a/docs/manifest-format.txt
+++ b/docs/manifest-format.txt
@@ -25,7 +25,8 @@
                         default?,
                         manifest-server?,
                         remove-project*,
-                        project*)>
+                        project*,
+                        repo-hooks?)>
   
     <!ELEMENT notice (#PCDATA)>
   
@@ -49,6 +50,10 @@
   
     <!ELEMENT remove-project (EMPTY)>
     <!ATTLIST remove-project name  CDATA #REQUIRED>
+
+    <!ELEMENT repo-hooks (EMPTY)>
+    <!ATTLIST repo-hooks in-project CDATA #REQUIRED>
+    <!ATTLIST repo-hooks enabled-list CDATA #REQUIRED>
   ]>
 
 A description of the elements and their attributes follows.
diff --git a/error.py b/error.py
index cb3b725..5238158 100644
--- a/error.py
+++ b/error.py
@@ -75,3 +75,10 @@
   """
   def __init__(self, extra_args=[]):
     self.extra_args = extra_args
+
+class HookError(Exception):
+  """Thrown if a 'repo-hook' could not be run.
+
+  The common case is that the file wasn't present when we tried to run it.
+  """
+  pass
diff --git a/manifest_xml.py b/manifest_xml.py
index 0103cf5..0e6421f 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -171,6 +171,14 @@
         ce.setAttribute('dest', c.dest)
         e.appendChild(ce)
 
+    if self._repo_hooks_project:
+      root.appendChild(doc.createTextNode(''))
+      e = doc.createElement('repo-hooks')
+      e.setAttribute('in-project', self._repo_hooks_project.name)
+      e.setAttribute('enabled-list',
+                     ' '.join(self._repo_hooks_project.enabled_repo_hooks))
+      root.appendChild(e)
+
     doc.writexml(fd, '', '  ', '\n', 'UTF-8')
 
   @property
@@ -189,6 +197,11 @@
     return self._default
 
   @property
+  def repo_hooks_project(self):
+    self._Load()
+    return self._repo_hooks_project
+
+  @property
   def notice(self):
     self._Load()
     return self._notice
@@ -207,6 +220,7 @@
     self._projects = {}
     self._remotes = {}
     self._default = None
+    self._repo_hooks_project = None
     self._notice = None
     self.branch = None
     self._manifest_server = None
@@ -239,15 +253,15 @@
   def _ParseManifest(self, is_root_file):
     root = xml.dom.minidom.parse(self.manifestFile)
     if not root or not root.childNodes:
-      raise ManifestParseError, \
-            "no root node in %s" % \
-            self.manifestFile
+      raise ManifestParseError(
+          "no root node in %s" %
+          self.manifestFile)
 
     config = root.childNodes[0]
     if config.nodeName != 'manifest':
-      raise ManifestParseError, \
-            "no <manifest> in %s" % \
-            self.manifestFile
+      raise ManifestParseError(
+          "no <manifest> in %s" %
+          self.manifestFile)
 
     for node in config.childNodes:
       if node.nodeName == 'remove-project':
@@ -255,25 +269,30 @@
         try:
           del self._projects[name]
         except KeyError:
-          raise ManifestParseError, \
-                'project %s not found' % \
-                (name)
+          raise ManifestParseError(
+              'project %s not found' %
+              (name))
+
+        # If the manifest removes the hooks project, treat it as if it deleted
+        # the repo-hooks element too.
+        if self._repo_hooks_project and (self._repo_hooks_project.name == name):
+          self._repo_hooks_project = None
 
     for node in config.childNodes:
       if node.nodeName == 'remote':
         remote = self._ParseRemote(node)
         if self._remotes.get(remote.name):
-          raise ManifestParseError, \
-                'duplicate remote %s in %s' % \
-                (remote.name, self.manifestFile)
+          raise ManifestParseError(
+              'duplicate remote %s in %s' %
+              (remote.name, self.manifestFile))
         self._remotes[remote.name] = remote
 
     for node in config.childNodes:
       if node.nodeName == 'default':
         if self._default is not None:
-          raise ManifestParseError, \
-                'duplicate default in %s' % \
-                (self.manifestFile)
+          raise ManifestParseError(
+              'duplicate default in %s' %
+              (self.manifestFile))
         self._default = self._ParseDefault(node)
     if self._default is None:
       self._default = _Default()
@@ -281,29 +300,52 @@
     for node in config.childNodes:
       if node.nodeName == 'notice':
         if self._notice is not None:
-          raise ManifestParseError, \
-                'duplicate notice in %s' % \
-                (self.manifestFile)
+          raise ManifestParseError(
+              'duplicate notice in %s' %
+              (self.manifestFile))
         self._notice = self._ParseNotice(node)
 
     for node in config.childNodes:
       if node.nodeName == 'manifest-server':
         url = self._reqatt(node, 'url')
         if self._manifest_server is not None:
-            raise ManifestParseError, \
-                'duplicate manifest-server in %s' % \
-                (self.manifestFile)
+            raise ManifestParseError(
+                'duplicate manifest-server in %s' %
+                (self.manifestFile))
         self._manifest_server = url
 
     for node in config.childNodes:
       if node.nodeName == 'project':
         project = self._ParseProject(node)
         if self._projects.get(project.name):
-          raise ManifestParseError, \
-                'duplicate project %s in %s' % \
-                (project.name, self.manifestFile)
+          raise ManifestParseError(
+              'duplicate project %s in %s' %
+              (project.name, self.manifestFile))
         self._projects[project.name] = project
 
+    for node in config.childNodes:
+      if node.nodeName == 'repo-hooks':
+        # Get the name of the project and the (space-separated) list of enabled.
+        repo_hooks_project = self._reqatt(node, 'in-project')
+        enabled_repo_hooks = self._reqatt(node, 'enabled-list').split()
+
+        # Only one project can be the hooks project
+        if self._repo_hooks_project is not None:
+          raise ManifestParseError(
+              'duplicate repo-hooks in %s' %
+              (self.manifestFile))
+
+        # Store a reference to the Project.
+        try:
+          self._repo_hooks_project = self._projects[repo_hooks_project]
+        except KeyError:
+          raise ManifestParseError(
+              'project %s not found for repo-hooks' %
+              (repo_hooks_project))
+
+        # Store the enabled hooks in the Project object.
+        self._repo_hooks_project.enabled_repo_hooks = enabled_repo_hooks
+
   def _AddMetaProjectMirror(self, m):
     name = None
     m_url = m.GetRemote(m.remote.name).url
diff --git a/project.py b/project.py
index 37f6d36..49633f7 100644
--- a/project.py
+++ b/project.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import traceback
 import errno
 import filecmp
 import os
@@ -24,7 +25,7 @@
 from color import Coloring
 from git_command import GitCommand
 from git_config import GitConfig, IsId
-from error import GitError, ImportError, UploadError
+from error import GitError, HookError, ImportError, UploadError
 from error import ManifestInvalidRevisionError
 
 from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M
@@ -234,6 +235,249 @@
     self.url = url
     self.review = review
 
+class RepoHook(object):
+  """A RepoHook contains information about a script to run as a hook.
+
+  Hooks are used to run a python script before running an upload (for instance,
+  to run presubmit checks).  Eventually, we may have hooks for other actions.
+
+  This shouldn't be confused with files in the 'repo/hooks' directory.  Those
+  files are copied into each '.git/hooks' folder for each project.  Repo-level
+  hooks are associated instead with repo actions.
+
+  Hooks are always python.  When a hook is run, we will load the hook into the
+  interpreter and execute its main() function.
+  """
+  def __init__(self,
+               hook_type,
+               hooks_project,
+               topdir,
+               abort_if_user_denies=False):
+    """RepoHook constructor.
+
+    Params:
+      hook_type: A string representing the type of hook.  This is also used
+          to figure out the name of the file containing the hook.  For
+          example: 'pre-upload'.
+      hooks_project: The project containing the repo hooks.  If you have a
+          manifest, this is manifest.repo_hooks_project.  OK if this is None,
+          which will make the hook a no-op.
+      topdir: Repo's top directory (the one containing the .repo directory).
+          Scripts will run with CWD as this directory.  If you have a manifest,
+          this is manifest.topdir
+      abort_if_user_denies: If True, we'll throw a HookError() if the user
+          doesn't allow us to run the hook.
+    """
+    self._hook_type = hook_type
+    self._hooks_project = hooks_project
+    self._topdir = topdir
+    self._abort_if_user_denies = abort_if_user_denies
+
+    # Store the full path to the script for convenience.
+    if self._hooks_project:
+      self._script_fullpath = os.path.join(self._hooks_project.worktree,
+                                           self._hook_type + '.py')
+    else:
+      self._script_fullpath = None
+
+  def _GetHash(self):
+    """Return a hash of the contents of the hooks directory.
+
+    We'll just use git to do this.  This hash has the property that if anything
+    changes in the directory we will return a different has.
+
+    SECURITY CONSIDERATION:
+      This hash only represents the contents of files in the hook directory, not
+      any other files imported or called by hooks.  Changes to imported files
+      can change the script behavior without affecting the hash.
+
+    Returns:
+      A string representing the hash.  This will always be ASCII so that it can
+      be printed to the user easily.
+    """
+    assert self._hooks_project, "Must have hooks to calculate their hash."
+
+    # We will use the work_git object rather than just calling GetRevisionId().
+    # That gives us a hash of the latest checked in version of the files that
+    # the user will actually be executing.  Specifically, GetRevisionId()
+    # doesn't appear to change even if a user checks out a different version
+    # of the hooks repo (via git checkout) nor if a user commits their own revs.
+    #
+    # NOTE: Local (non-committed) changes will not be factored into this hash.
+    # I think this is OK, since we're really only worried about warning the user
+    # about upstream changes.
+    return self._hooks_project.work_git.rev_parse('HEAD')
+
+  def _GetMustVerb(self):
+    """Return 'must' if the hook is required; 'should' if not."""
+    if self._abort_if_user_denies:
+      return 'must'
+    else:
+      return 'should'
+
+  def _CheckForHookApproval(self):
+    """Check to see whether this hook has been approved.
+
+    We'll look at the hash of all of the hooks.  If this matches the hash that
+    the user last approved, we're done.  If it doesn't, we'll ask the user
+    about approval.
+
+    Note that we ask permission for each individual hook even though we use
+    the hash of all hooks when detecting changes.  We'd like the user to be
+    able to approve / deny each hook individually.  We only use the hash of all
+    hooks because there is no other easy way to detect changes to local imports.
+
+    Returns:
+      True if this hook is approved to run; False otherwise.
+
+    Raises:
+      HookError: Raised if the user doesn't approve and abort_if_user_denies
+          was passed to the consturctor.
+    """
+    hooks_dir = self._hooks_project.worktree
+    hooks_config = self._hooks_project.config
+    git_approval_key = 'repo.hooks.%s.approvedhash' % self._hook_type
+
+    # Get the last hash that the user approved for this hook; may be None.
+    old_hash = hooks_config.GetString(git_approval_key)
+
+    # Get the current hash so we can tell if scripts changed since approval.
+    new_hash = self._GetHash()
+
+    if old_hash is not None:
+      # User previously approved hook and asked not to be prompted again.
+      if new_hash == old_hash:
+        # Approval matched.  We're done.
+        return True
+      else:
+        # Give the user a reason why we're prompting, since they last told
+        # us to "never ask again".
+        prompt = 'WARNING: Scripts have changed since %s was allowed.\n\n' % (
+            self._hook_type)
+    else:
+      prompt = ''
+
+    # Prompt the user if we're not on a tty; on a tty we'll assume "no".
+    if sys.stdout.isatty():
+      prompt += ('Repo %s run the script:\n'
+                 '  %s\n'
+                 '\n'
+                 'Do you want to allow this script to run '
+                 '(yes/yes-never-ask-again/NO)? ') % (
+                 self._GetMustVerb(), self._script_fullpath)
+      response = raw_input(prompt).lower()
+      print
+
+      # User is doing a one-time approval.
+      if response in ('y', 'yes'):
+        return True
+      elif response == 'yes-never-ask-again':
+        hooks_config.SetString(git_approval_key, new_hash)
+        return True
+
+    # For anything else, we'll assume no approval.
+    if self._abort_if_user_denies:
+      raise HookError('You must allow the %s hook or use --no-verify.' %
+                      self._hook_type)
+
+    return False
+
+  def _ExecuteHook(self, **kwargs):
+    """Actually execute the given hook.
+
+    This will run the hook's 'main' function in our python interpreter.
+
+    Args:
+      kwargs: Keyword arguments to pass to the hook.  These are often specific
+          to the hook type.  For instance, pre-upload hooks will contain
+          a project_list.
+    """
+    # Keep sys.path and CWD stashed away so that we can always restore them
+    # upon function exit.
+    orig_path = os.getcwd()
+    orig_syspath = sys.path
+
+    try:
+      # Always run hooks with CWD as topdir.
+      os.chdir(self._topdir)
+
+      # Put the hook dir as the first item of sys.path so hooks can do
+      # relative imports.  We want to replace the repo dir as [0] so
+      # hooks can't import repo files.
+      sys.path = [os.path.dirname(self._script_fullpath)] + sys.path[1:]
+
+      # Exec, storing global context in the context dict.  We catch exceptions
+      # and  convert to a HookError w/ just the failing traceback.
+      context = {}
+      try:
+        execfile(self._script_fullpath, context)
+      except Exception:
+        raise HookError('%s\nFailed to import %s hook; see traceback above.' % (
+                        traceback.format_exc(), self._hook_type))
+
+      # Running the script should have defined a main() function.
+      if 'main' not in context:
+        raise HookError('Missing main() in: "%s"' % self._script_fullpath)
+
+
+      # Add 'hook_should_take_kwargs' to the arguments to be passed to main.
+      # We don't actually want hooks to define their main with this argument--
+      # it's there to remind them that their hook should always take **kwargs.
+      # For instance, a pre-upload hook should be defined like:
+      #   def main(project_list, **kwargs):
+      #
+      # This allows us to later expand the API without breaking old hooks.
+      kwargs = kwargs.copy()
+      kwargs['hook_should_take_kwargs'] = True
+
+      # Call the main function in the hook.  If the hook should cause the
+      # build to fail, it will raise an Exception.  We'll catch that convert
+      # to a HookError w/ just the failing traceback.
+      try:
+        context['main'](**kwargs)
+      except Exception:
+        raise HookError('%s\nFailed to run main() for %s hook; see traceback '
+                        'above.' % (
+                        traceback.format_exc(), self._hook_type))
+    finally:
+      # Restore sys.path and CWD.
+      sys.path = orig_syspath
+      os.chdir(orig_path)
+
+  def Run(self, user_allows_all_hooks, **kwargs):
+    """Run the hook.
+
+    If the hook doesn't exist (because there is no hooks project or because
+    this particular hook is not enabled), this is a no-op.
+
+    Args:
+      user_allows_all_hooks: If True, we will never prompt about running the
+          hook--we'll just assume it's OK to run it.
+      kwargs: Keyword arguments to pass to the hook.  These are often specific
+          to the hook type.  For instance, pre-upload hooks will contain
+          a project_list.
+
+    Raises:
+      HookError: If there was a problem finding the hook or the user declined
+          to run a required hook (from _CheckForHookApproval).
+    """
+    # No-op if there is no hooks project or if hook is disabled.
+    if ((not self._hooks_project) or
+        (self._hook_type not in self._hooks_project.enabled_repo_hooks)):
+      return
+
+    # Bail with a nice error if we can't find the hook.
+    if not os.path.isfile(self._script_fullpath):
+      raise HookError('Couldn\'t find repo hook: "%s"' % self._script_fullpath)
+
+    # Make sure the user is OK with running the hook.
+    if (not user_allows_all_hooks) and (not self._CheckForHookApproval()):
+      return
+
+    # Run the hook with the same version of python we're using.
+    self._ExecuteHook(**kwargs)
+
+
 class Project(object):
   def __init__(self,
                manifest,
@@ -275,6 +519,10 @@
     self.bare_git = self._GitGetByExec(self, bare=True)
     self.bare_ref = GitRefs(gitdir)
 
+    # This will be filled in if a project is later identified to be the
+    # project containing repo hooks.
+    self.enabled_repo_hooks = []
+
   @property
   def Exists(self):
     return os.path.isdir(self.gitdir)
@@ -1457,6 +1705,22 @@
       return r
 
     def __getattr__(self, name):
+      """Allow arbitrary git commands using pythonic syntax.
+
+      This allows you to do things like:
+        git_obj.rev_parse('HEAD')
+
+      Since we don't have a 'rev_parse' method defined, the __getattr__ will
+      run.  We'll replace the '_' with a '-' and try to run a git command.
+      Any other arguments will be passed to the git command.
+
+      Args:
+        name: The name of the git command to call.  Any '_' characters will
+            be replaced with '-'.
+
+      Returns:
+        A callable object that will try to call git with the named command.
+      """
       name = name.replace('_', '-')
       def runner(*args):
         cmdv = [name]
diff --git a/subcmds/upload.py b/subcmds/upload.py
index 2082209..c561b8a 100644
--- a/subcmds/upload.py
+++ b/subcmds/upload.py
@@ -19,7 +19,8 @@
 
 from command import InteractiveCommand
 from editor import Editor
-from error import UploadError
+from error import HookError, UploadError
+from project import RepoHook
 
 UNUSUAL_COMMIT_THRESHOLD = 5
 
@@ -120,6 +121,29 @@
                  type='string',  action='append', dest='cc',
                  help='Also send email to these email addresses.')
 
+    # Options relating to upload hook.  Note that verify and no-verify are NOT
+    # opposites of each other, which is why they store to different locations.
+    # We are using them to match 'git commit' syntax.
+    #
+    # Combinations:
+    # - no-verify=False, verify=False (DEFAULT):
+    #   If stdout is a tty, can prompt about running upload hooks if needed.
+    #   If user denies running hooks, the upload is cancelled.  If stdout is
+    #   not a tty and we would need to prompt about upload hooks, upload is
+    #   cancelled.
+    # - no-verify=False, verify=True:
+    #   Always run upload hooks with no prompt.
+    # - no-verify=True, verify=False:
+    #   Never run upload hooks, but upload anyway (AKA bypass hooks).
+    # - no-verify=True, verify=True:
+    #   Invalid
+    p.add_option('--no-verify',
+                 dest='bypass_hooks', action='store_true',
+                 help='Do not run the upload hook.')
+    p.add_option('--verify',
+                 dest='allow_all_hooks', action='store_true',
+                 help='Run the upload hook without prompting.')
+
   def _SingleBranch(self, opt, branch, people):
     project = branch.project
     name = branch.name
@@ -313,17 +337,27 @@
     reviewers = []
     cc = []
 
+    for project in project_list:
+      avail = project.GetUploadableBranches()
+      if avail:
+        pending.append((project, avail))
+
+    if pending and (not opt.bypass_hooks):
+      hook = RepoHook('pre-upload', self.manifest.repo_hooks_project,
+                      self.manifest.topdir, abort_if_user_denies=True)
+      pending_proj_names = [project.name for (project, avail) in pending]
+      try:
+        hook.Run(opt.allow_all_hooks, project_list=pending_proj_names)
+      except HookError, e:
+        print >>sys.stderr, "ERROR: %s" % str(e)
+        return
+
     if opt.reviewers:
       reviewers = _SplitEmails(opt.reviewers)
     if opt.cc:
       cc = _SplitEmails(opt.cc)
     people = (reviewers,cc)
 
-    for project in project_list:
-      avail = project.GetUploadableBranches()
-      if avail:
-        pending.append((project, avail))
-
     if not pending:
       print >>sys.stdout, "no branches ready for upload"
     elif len(pending) == 1 and len(pending[0][1]) == 1: