Initial Contribution
diff --git a/import_zip.py b/import_zip.py
new file mode 100644
index 0000000..08aff32
--- /dev/null
+++ b/import_zip.py
@@ -0,0 +1,345 @@
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import stat
+import struct
+import zlib
+import cStringIO
+
+from import_ext import ImportExternal
+from error import ImportError
+
+class ImportZip(ImportExternal):
+ """Streams a zip file from the network directly into a Project's
+ Git repository.
+ """
+ @classmethod
+ def CanAccept(cls, url):
+ """Can this importer read and unpack the data stored at url?
+ """
+ if url.endswith('.zip') or url.endswith('.jar'):
+ return True
+ return False
+
+ def _UnpackFiles(self):
+ url_fd, url = self._OpenUrl()
+ try:
+ if not self.__class__.CanAccept(url):
+ raise ImportError('non-zip file extension: %s' % url)
+
+ zip = _ZipFile(url_fd)
+ for entry in zip.FileRecords():
+ data = zip.Open(entry).read()
+ sz = len(data)
+
+ if data and _SafeCRLF(data):
+ data = data.replace('\r\n', '\n')
+ sz = len(data)
+
+ fd = cStringIO.StringIO(data)
+ self._UnpackOneFile(entry.mode, sz, entry.name, fd)
+ zip.Close(entry)
+
+ for entry in zip.CentralDirectory():
+ self._SetFileMode(entry.name, entry.mode)
+
+ zip.CheckTail()
+ finally:
+ url_fd.close()
+
+
+def _SafeCRLF(data):
+ """Is it reasonably safe to perform a CRLF->LF conversion?
+
+ If the stream contains a NUL byte it is likely binary,
+ and thus a CRLF->LF conversion may damage the stream.
+
+ If the only NUL is in the last position of the stream,
+ but it otherwise can do a CRLF<->LF conversion we do
+ the CRLF conversion anyway. At least one source ZIP
+ file has this structure in its source code.
+
+ If every occurrance of a CR and LF is paired up as a
+ CRLF pair then the conversion is safely bi-directional.
+ s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
+ """
+ nul = data.find('\0')
+ if 0 <= nul and nul < (len(data) - 1):
+ return False
+
+ n_lf = 0
+ last = 0
+ while True:
+ lf = data.find('\n', last)
+ if lf < 0:
+ break
+ if lf == 0 or data[lf - 1] != '\r':
+ return False
+ last = lf + 1
+ n_lf += 1
+ return n_lf > 0
+
+class _ZipFile(object):
+ """Streaming iterator to parse a zip file on the fly.
+ """
+ def __init__(self, fd):
+ self._fd = _UngetStream(fd)
+
+ def FileRecords(self):
+ return _FileIter(self._fd)
+
+ def CentralDirectory(self):
+ return _CentIter(self._fd)
+
+ def CheckTail(self):
+ type_buf = self._fd.read(4)
+ type = struct.unpack('<I', type_buf)[0]
+ if type != 0x06054b50: # end of central directory
+ raise ImportError('zip record %x unsupported' % type)
+
+ def Open(self, entry):
+ if entry.is_compressed:
+ return _InflateStream(self._fd)
+ else:
+ if entry.has_trailer:
+ raise ImportError('unable to extract streamed zip')
+ return _FixedLengthStream(self._fd, entry.uncompressed_size)
+
+ def Close(self, entry):
+ if entry.has_trailer:
+ type = struct.unpack('<I', self._fd.read(4))[0]
+ if type == 0x08074b50:
+ # Not a formal type marker, but commonly seen in zips
+ # as the data descriptor signature.
+ #
+ struct.unpack('<3I', self._fd.read(12))
+ else:
+ # No signature for the data descriptor, so read the
+ # remaining fields out of the stream
+ #
+ self._fd.read(8)
+
+
+class _FileIter(object):
+ def __init__(self, fd):
+ self._fd = fd
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ fd = self._fd
+
+ type_buf = fd.read(4)
+ type = struct.unpack('<I', type_buf)[0]
+
+ if type != 0x04034b50: # local file header
+ fd.unread(type_buf)
+ raise StopIteration()
+
+ rec = _FileHeader(fd.read(26))
+ rec.name = fd.read(rec.name_len)
+ fd.read(rec.extra_len)
+
+ if rec.name.endswith('/'):
+ rec.name = rec.name[:-1]
+ rec.mode = stat.S_IFDIR | 0777
+ return rec
+
+
+class _FileHeader(object):
+ """Information about a single file in the archive.
+ 0 version needed to extract 2 bytes
+ 1 general purpose bit flag 2 bytes
+ 2 compression method 2 bytes
+ 3 last mod file time 2 bytes
+ 4 last mod file date 2 bytes
+ 5 crc-32 4 bytes
+ 6 compressed size 4 bytes
+ 7 uncompressed size 4 bytes
+ 8 file name length 2 bytes
+ 9 extra field length 2 bytes
+ """
+ def __init__(self, raw_bin):
+ rec = struct.unpack('<5H3I2H', raw_bin)
+
+ if rec[2] == 8:
+ self.is_compressed = True
+ elif rec[2] == 0:
+ self.is_compressed = False
+ else:
+ raise ImportError('unrecognized compression format')
+
+ if rec[1] & (1 << 3):
+ self.has_trailer = True
+ else:
+ self.has_trailer = False
+
+ self.compressed_size = rec[6]
+ self.uncompressed_size = rec[7]
+ self.name_len = rec[8]
+ self.extra_len = rec[9]
+ self.mode = stat.S_IFREG | 0644
+
+
+class _CentIter(object):
+ def __init__(self, fd):
+ self._fd = fd
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ fd = self._fd
+
+ type_buf = fd.read(4)
+ type = struct.unpack('<I', type_buf)[0]
+
+ if type != 0x02014b50: # central directory
+ fd.unread(type_buf)
+ raise StopIteration()
+
+ rec = _CentHeader(fd.read(42))
+ rec.name = fd.read(rec.name_len)
+ fd.read(rec.extra_len)
+ fd.read(rec.comment_len)
+
+ if rec.name.endswith('/'):
+ rec.name = rec.name[:-1]
+ rec.mode = stat.S_IFDIR | 0777
+ return rec
+
+
+class _CentHeader(object):
+ """Information about a single file in the archive.
+ 0 version made by 2 bytes
+ 1 version needed to extract 2 bytes
+ 2 general purpose bit flag 2 bytes
+ 3 compression method 2 bytes
+ 4 last mod file time 2 bytes
+ 5 last mod file date 2 bytes
+ 6 crc-32 4 bytes
+ 7 compressed size 4 bytes
+ 8 uncompressed size 4 bytes
+ 9 file name length 2 bytes
+ 10 extra field length 2 bytes
+ 11 file comment length 2 bytes
+ 12 disk number start 2 bytes
+ 13 internal file attributes 2 bytes
+ 14 external file attributes 4 bytes
+ 15 relative offset of local header 4 bytes
+ """
+ def __init__(self, raw_bin):
+ rec = struct.unpack('<6H3I5H2I', raw_bin)
+ self.name_len = rec[9]
+ self.extra_len = rec[10]
+ self.comment_len = rec[11]
+
+ if (rec[0] & 0xff00) == 0x0300: # UNIX
+ self.mode = rec[14] >> 16
+ else:
+ self.mode = stat.S_IFREG | 0644
+
+
+class _UngetStream(object):
+ """File like object to read and rewind a stream.
+ """
+ def __init__(self, fd):
+ self._fd = fd
+ self._buf = None
+
+ def read(self, size = -1):
+ r = []
+ try:
+ if size >= 0:
+ self._ReadChunk(r, size)
+ else:
+ while True:
+ self._ReadChunk(r, 2048)
+ except EOFError:
+ pass
+
+ if len(r) == 1:
+ return r[0]
+ return ''.join(r)
+
+ def unread(self, buf):
+ b = self._buf
+ if b is None or len(b) == 0:
+ self._buf = buf
+ else:
+ self._buf = buf + b
+
+ def _ReadChunk(self, r, size):
+ b = self._buf
+ try:
+ while size > 0:
+ if b is None or len(b) == 0:
+ b = self._Inflate(self._fd.read(2048))
+ if not b:
+ raise EOFError()
+ continue
+
+ use = min(size, len(b))
+ r.append(b[:use])
+ b = b[use:]
+ size -= use
+ finally:
+ self._buf = b
+
+ def _Inflate(self, b):
+ return b
+
+
+class _FixedLengthStream(_UngetStream):
+ """File like object to read a fixed length stream.
+ """
+ def __init__(self, fd, have):
+ _UngetStream.__init__(self, fd)
+ self._have = have
+
+ def _Inflate(self, b):
+ n = self._have
+ if n == 0:
+ self._fd.unread(b)
+ return None
+
+ if len(b) > n:
+ self._fd.unread(b[n:])
+ b = b[:n]
+ self._have -= len(b)
+ return b
+
+
+class _InflateStream(_UngetStream):
+ """Inflates the stream as it reads input.
+ """
+ def __init__(self, fd):
+ _UngetStream.__init__(self, fd)
+ self._z = zlib.decompressobj(-zlib.MAX_WBITS)
+
+ def _Inflate(self, b):
+ z = self._z
+ if not z:
+ self._fd.unread(b)
+ return None
+
+ b = z.decompress(b)
+ if z.unconsumed_tail != '':
+ self._fd.unread(z.unconsumed_tail)
+ elif z.unused_data != '':
+ self._fd.unread(z.unused_data)
+ self._z = None
+ return b