Initial Contribution

commit: cf31fe9b4fb650b27e19f5d7ee7297e383660caf [log] [tgz]
author: The Android Open Source Project <initial-contribution@android.com> Tue Oct 21 07:00:00 2008 -0700
committer: The Android Open Source Project <initial-contribution@android.com> Tue Oct 21 07:00:00 2008 -0700
tree: d04ca6a45d579dca5e5469606c48c405aee68f4b
diff --git a/import_zip.py b/import_zip.py
new file mode 100644
index 0000000..08aff32
--- /dev/null
+++ b/import_zip.py

@@ -0,0 +1,345 @@
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import stat
+import struct
+import zlib
+import cStringIO
+
+from import_ext import ImportExternal
+from error import ImportError
+
+class ImportZip(ImportExternal):
+  """Streams a zip file from the network directly into a Project's
+     Git repository.
+  """
+  @classmethod
+  def CanAccept(cls, url):
+    """Can this importer read and unpack the data stored at url?
+    """
+    if url.endswith('.zip') or url.endswith('.jar'):
+      return True
+    return False
+
+  def _UnpackFiles(self):
+    url_fd, url = self._OpenUrl()
+    try:
+      if not self.__class__.CanAccept(url):
+        raise ImportError('non-zip file extension: %s' % url)
+
+      zip = _ZipFile(url_fd)
+      for entry in zip.FileRecords():
+        data = zip.Open(entry).read()
+        sz = len(data)
+
+        if data and _SafeCRLF(data):
+          data = data.replace('\r\n', '\n')
+          sz = len(data)
+
+        fd = cStringIO.StringIO(data)
+        self._UnpackOneFile(entry.mode, sz, entry.name, fd)
+        zip.Close(entry)
+
+      for entry in zip.CentralDirectory():
+        self._SetFileMode(entry.name, entry.mode)
+
+      zip.CheckTail()
+    finally:
+      url_fd.close()
+
+
+def _SafeCRLF(data):
+  """Is it reasonably safe to perform a CRLF->LF conversion?
+
+     If the stream contains a NUL byte it is likely binary,
+     and thus a CRLF->LF conversion may damage the stream.
+
+     If the only NUL is in the last position of the stream,
+     but it otherwise can do a CRLF<->LF conversion we do
+     the CRLF conversion anyway.  At least one source ZIP
+     file has this structure in its source code.
+
+     If every occurrance of a CR and LF is paired up as a
+     CRLF pair then the conversion is safely bi-directional.
+     s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
+  """
+  nul = data.find('\0')
+  if 0 <= nul and nul < (len(data) - 1):
+    return False
+
+  n_lf = 0
+  last = 0
+  while True:
+    lf = data.find('\n', last)
+    if lf < 0:
+      break
+    if lf == 0 or data[lf - 1] != '\r':
+      return False
+    last = lf + 1
+    n_lf += 1
+  return n_lf > 0
+
+class _ZipFile(object):
+  """Streaming iterator to parse a zip file on the fly.
+  """
+  def __init__(self, fd):
+    self._fd = _UngetStream(fd)
+
+  def FileRecords(self):
+    return _FileIter(self._fd)
+
+  def CentralDirectory(self):
+    return _CentIter(self._fd)
+
+  def CheckTail(self):
+    type_buf = self._fd.read(4)
+    type = struct.unpack('<I', type_buf)[0]
+    if type != 0x06054b50:  # end of central directory
+      raise ImportError('zip record %x unsupported' % type)
+
+  def Open(self, entry):
+    if entry.is_compressed:
+      return _InflateStream(self._fd)
+    else:
+      if entry.has_trailer:
+        raise ImportError('unable to extract streamed zip')
+      return _FixedLengthStream(self._fd, entry.uncompressed_size)
+
+  def Close(self, entry):
+    if entry.has_trailer:
+      type = struct.unpack('<I', self._fd.read(4))[0]
+      if type == 0x08074b50:
+        # Not a formal type marker, but commonly seen in zips
+        # as the data descriptor signature.
+        #
+        struct.unpack('<3I', self._fd.read(12))
+      else:
+        # No signature for the data descriptor, so read the
+        # remaining fields out of the stream
+        #
+        self._fd.read(8)
+
+
+class _FileIter(object):
+  def __init__(self, fd):
+    self._fd = fd
+
+  def __iter__(self):
+    return self
+
+  def next(self):
+    fd = self._fd
+
+    type_buf = fd.read(4)
+    type = struct.unpack('<I', type_buf)[0]
+
+    if type != 0x04034b50:    # local file header
+      fd.unread(type_buf)
+      raise StopIteration()
+
+    rec = _FileHeader(fd.read(26))
+    rec.name = fd.read(rec.name_len)
+    fd.read(rec.extra_len)
+
+    if rec.name.endswith('/'):
+      rec.name = rec.name[:-1]
+      rec.mode = stat.S_IFDIR | 0777
+    return rec
+
+
+class _FileHeader(object):
+  """Information about a single file in the archive.
+     0  version needed to extract       2 bytes
+     1  general purpose bit flag        2 bytes
+     2  compression method              2 bytes
+     3  last mod file time              2 bytes
+     4  last mod file date              2 bytes
+     5  crc-32                          4 bytes
+     6  compressed size                 4 bytes
+     7  uncompressed size               4 bytes
+     8  file name length                2 bytes
+     9  extra field length              2 bytes
+  """
+  def __init__(self, raw_bin):
+    rec = struct.unpack('<5H3I2H', raw_bin)
+    
+    if rec[2] == 8:
+      self.is_compressed = True
+    elif rec[2] == 0:
+      self.is_compressed = False
+    else:
+      raise ImportError('unrecognized compression format')
+
+    if rec[1] & (1 << 3):
+      self.has_trailer = True
+    else:
+      self.has_trailer = False
+
+    self.compressed_size  = rec[6]
+    self.uncompressed_size = rec[7]
+    self.name_len = rec[8]
+    self.extra_len = rec[9]
+    self.mode = stat.S_IFREG | 0644
+
+
+class _CentIter(object):
+  def __init__(self, fd):
+    self._fd = fd
+
+  def __iter__(self):
+    return self
+
+  def next(self):
+    fd = self._fd
+
+    type_buf = fd.read(4)
+    type = struct.unpack('<I', type_buf)[0]
+
+    if type != 0x02014b50:  # central directory
+      fd.unread(type_buf)
+      raise StopIteration()
+
+    rec = _CentHeader(fd.read(42))
+    rec.name = fd.read(rec.name_len)
+    fd.read(rec.extra_len)
+    fd.read(rec.comment_len)
+
+    if rec.name.endswith('/'):
+      rec.name = rec.name[:-1]
+      rec.mode = stat.S_IFDIR | 0777
+    return rec
+
+
+class _CentHeader(object):
+  """Information about a single file in the archive.
+     0  version made by                 2 bytes
+     1  version needed to extract       2 bytes
+     2  general purpose bit flag        2 bytes
+     3  compression method              2 bytes
+     4  last mod file time              2 bytes
+     5  last mod file date              2 bytes
+     6  crc-32                          4 bytes
+     7  compressed size                 4 bytes
+     8  uncompressed size               4 bytes
+     9  file name length                2 bytes
+    10  extra field length              2 bytes
+    11  file comment length             2 bytes
+    12  disk number start               2 bytes
+    13  internal file attributes        2 bytes
+    14  external file attributes        4 bytes
+    15  relative offset of local header 4 bytes
+  """
+  def __init__(self, raw_bin):
+    rec = struct.unpack('<6H3I5H2I', raw_bin)
+    self.name_len = rec[9]
+    self.extra_len = rec[10]
+    self.comment_len = rec[11]
+
+    if (rec[0] & 0xff00) == 0x0300:  # UNIX
+      self.mode = rec[14] >> 16
+    else:
+      self.mode = stat.S_IFREG | 0644
+
+
+class _UngetStream(object):
+  """File like object to read and rewind a stream.
+  """
+  def __init__(self, fd):
+    self._fd = fd
+    self._buf = None
+
+  def read(self, size = -1):
+    r = []
+    try:
+      if size >= 0:
+        self._ReadChunk(r, size)
+      else:
+        while True:
+          self._ReadChunk(r, 2048)
+    except EOFError:
+      pass
+
+    if len(r) == 1:
+      return r[0]
+    return ''.join(r)
+
+  def unread(self, buf):
+    b = self._buf
+    if b is None or len(b) == 0:
+      self._buf = buf
+    else:
+      self._buf = buf + b
+
+  def _ReadChunk(self, r, size):
+    b = self._buf
+    try:
+      while size > 0:
+        if b is None or len(b) == 0:
+          b = self._Inflate(self._fd.read(2048))
+          if not b:
+            raise EOFError()
+          continue
+
+        use = min(size, len(b))
+        r.append(b[:use])
+        b = b[use:]
+        size -= use
+    finally:
+      self._buf = b
+
+  def _Inflate(self, b):
+    return b
+
+
+class _FixedLengthStream(_UngetStream):
+  """File like object to read a fixed length stream.
+  """
+  def __init__(self, fd, have):
+    _UngetStream.__init__(self, fd)
+    self._have = have
+
+  def _Inflate(self, b):
+    n = self._have
+    if n == 0:
+      self._fd.unread(b)
+      return None
+
+    if len(b) > n:
+      self._fd.unread(b[n:])
+      b = b[:n]
+    self._have -= len(b)
+    return b
+
+
+class _InflateStream(_UngetStream):
+  """Inflates the stream as it reads input.
+  """
+  def __init__(self, fd):
+    _UngetStream.__init__(self, fd)
+    self._z = zlib.decompressobj(-zlib.MAX_WBITS)
+
+  def _Inflate(self, b):
+    z = self._z
+    if not z:
+      self._fd.unread(b)
+      return None
+
+    b = z.decompress(b)
+    if z.unconsumed_tail != '':
+      self._fd.unread(z.unconsumed_tail)
+    elif z.unused_data != '':
+      self._fd.unread(z.unused_data)
+      self._z = None
+    return b
commit	cf31fe9b4fb650b27e19f5d7ee7297e383660caf	[log] [tgz]
author	The Android Open Source Project <initial-contribution@android.com>	Tue Oct 21 07:00:00 2008 -0700
committer	The Android Open Source Project <initial-contribution@android.com>	Tue Oct 21 07:00:00 2008 -0700
tree	d04ca6a45d579dca5e5469606c48c405aee68f4b