import_zip.py - git-repo - Gitiles

 #
 # Copyright (C) 2008 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 import stat
 import struct
 import zlib
 import cStringIO

 from import_ext import ImportExternal
 from error import ImportError

 class ImportZip(ImportExternal):
   """Streams a zip file from the network directly into a Project's
      Git repository.
   """
   @classmethod
   def CanAccept(cls, url):
     """Can this importer read and unpack the data stored at url?
     """
     if url.endswith('.zip') or url.endswith('.jar'):
       return True
     return False

   def _UnpackFiles(self):
     url_fd, url = self._OpenUrl()
     try:
       if not self.__class__.CanAccept(url):
         raise ImportError('non-zip file extension: %s' % url)

       zip = _ZipFile(url_fd)
       for entry in zip.FileRecords():
         data = zip.Open(entry).read()
         sz = len(data)

         if data and _SafeCRLF(data):
           data = data.replace('\r\n', '\n')
           sz = len(data)

         fd = cStringIO.StringIO(data)
         self._UnpackOneFile(entry.mode, sz, entry.name, fd)
         zip.Close(entry)

       for entry in zip.CentralDirectory():
         self._SetFileMode(entry.name, entry.mode)

       zip.CheckTail()
     finally:
       url_fd.close()


 def _SafeCRLF(data):
   """Is it reasonably safe to perform a CRLF->LF conversion?

      If the stream contains a NUL byte it is likely binary,
      and thus a CRLF->LF conversion may damage the stream.

      If the only NUL is in the last position of the stream,
      but it otherwise can do a CRLF<->LF conversion we do
      the CRLF conversion anyway.  At least one source ZIP
      file has this structure in its source code.

      If every occurrance of a CR and LF is paired up as a
      CRLF pair then the conversion is safely bi-directional.
      s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
   """
   nul = data.find('\0')
   if 0 <= nul and nul < (len(data) - 1):
     return False

   n_lf = 0
   last = 0
   while True:
     lf = data.find('\n', last)
     if lf < 0:
       break
     if lf == 0 or data[lf - 1] != '\r':
       return False
     last = lf + 1
     n_lf += 1
   return n_lf > 0

 class _ZipFile(object):
   """Streaming iterator to parse a zip file on the fly.
   """
   def __init__(self, fd):
     self._fd = _UngetStream(fd)

   def FileRecords(self):
     return _FileIter(self._fd)

   def CentralDirectory(self):
     return _CentIter(self._fd)

   def CheckTail(self):
     type_buf = self._fd.read(4)
     type = struct.unpack('<I', type_buf)[0]
     if type != 0x06054b50:  # end of central directory
       raise ImportError('zip record %x unsupported' % type)

   def Open(self, entry):
     if entry.is_compressed:
       return _InflateStream(self._fd)
     else:
       if entry.has_trailer:
         raise ImportError('unable to extract streamed zip')
       return _FixedLengthStream(self._fd, entry.uncompressed_size)

   def Close(self, entry):
     if entry.has_trailer:
       type = struct.unpack('<I', self._fd.read(4))[0]
       if type == 0x08074b50:
         # Not a formal type marker, but commonly seen in zips
         # as the data descriptor signature.
         #
         struct.unpack('<3I', self._fd.read(12))
       else:
         # No signature for the data descriptor, so read the
         # remaining fields out of the stream
         #
         self._fd.read(8)


 class _FileIter(object):
   def __init__(self, fd):
     self._fd = fd

   def __iter__(self):
     return self

   def next(self):
     fd = self._fd

     type_buf = fd.read(4)
     type = struct.unpack('<I', type_buf)[0]

     if type != 0x04034b50:    # local file header
       fd.unread(type_buf)
       raise StopIteration()

     rec = _FileHeader(fd.read(26))
     rec.name = fd.read(rec.name_len)
     fd.read(rec.extra_len)

     if rec.name.endswith('/'):
       rec.name = rec.name[:-1]
       rec.mode = stat.S_IFDIR | 0777
     return rec


 class _FileHeader(object):
   """Information about a single file in the archive.
      0  version needed to extract       2 bytes
      1  general purpose bit flag        2 bytes
      2  compression method              2 bytes
      3  last mod file time              2 bytes
      4  last mod file date              2 bytes
      5  crc-32                          4 bytes
      6  compressed size                 4 bytes
      7  uncompressed size               4 bytes
      8  file name length                2 bytes
      9  extra field length              2 bytes
   """
   def __init__(self, raw_bin):
     rec = struct.unpack('<5H3I2H', raw_bin)

     if rec[2] == 8:
       self.is_compressed = True
     elif rec[2] == 0:
       self.is_compressed = False
     else:
       raise ImportError('unrecognized compression format')

     if rec[1] & (1 << 3):
       self.has_trailer = True
     else:
       self.has_trailer = False

     self.compressed_size  = rec[6]
     self.uncompressed_size = rec[7]
     self.name_len = rec[8]
     self.extra_len = rec[9]
     self.mode = stat.S_IFREG | 0644


 class _CentIter(object):
   def __init__(self, fd):
     self._fd = fd

   def __iter__(self):
     return self

   def next(self):
     fd = self._fd

     type_buf = fd.read(4)
     type = struct.unpack('<I', type_buf)[0]

     if type != 0x02014b50:  # central directory
       fd.unread(type_buf)
       raise StopIteration()

     rec = _CentHeader(fd.read(42))
     rec.name = fd.read(rec.name_len)
     fd.read(rec.extra_len)
     fd.read(rec.comment_len)

     if rec.name.endswith('/'):
       rec.name = rec.name[:-1]
       rec.mode = stat.S_IFDIR | 0777
     return rec


 class _CentHeader(object):
   """Information about a single file in the archive.
      0  version made by                 2 bytes
      1  version needed to extract       2 bytes
      2  general purpose bit flag        2 bytes
      3  compression method              2 bytes
      4  last mod file time              2 bytes
      5  last mod file date              2 bytes
      6  crc-32                          4 bytes
      7  compressed size                 4 bytes
      8  uncompressed size               4 bytes
      9  file name length                2 bytes
     10  extra field length              2 bytes
     11  file comment length             2 bytes
     12  disk number start               2 bytes
     13  internal file attributes        2 bytes
     14  external file attributes        4 bytes
     15  relative offset of local header 4 bytes
   """
   def __init__(self, raw_bin):
     rec = struct.unpack('<6H3I5H2I', raw_bin)
     self.name_len = rec[9]
     self.extra_len = rec[10]
     self.comment_len = rec[11]

     if (rec[0] & 0xff00) == 0x0300:  # UNIX
       self.mode = rec[14] >> 16
     else:
       self.mode = stat.S_IFREG | 0644


 class _UngetStream(object):
   """File like object to read and rewind a stream.
   """
   def __init__(self, fd):
     self._fd = fd
     self._buf = None

   def read(self, size = -1):
     r = []
     try:
       if size >= 0:
         self._ReadChunk(r, size)
       else:
         while True:
           self._ReadChunk(r, 2048)
     except EOFError:
       pass

     if len(r) == 1:
       return r[0]
     return ''.join(r)

   def unread(self, buf):
     b = self._buf
     if b is None or len(b) == 0:
       self._buf = buf
     else:
       self._buf = buf + b

   def _ReadChunk(self, r, size):
     b = self._buf
     try:
       while size > 0:
         if b is None or len(b) == 0:
           b = self._Inflate(self._fd.read(2048))
           if not b:
             raise EOFError()
           continue

         use = min(size, len(b))
         r.append(b[:use])
         b = b[use:]
         size -= use
     finally:
       self._buf = b

   def _Inflate(self, b):
     return b


 class _FixedLengthStream(_UngetStream):
   """File like object to read a fixed length stream.
   """
   def __init__(self, fd, have):
     _UngetStream.__init__(self, fd)
     self._have = have

   def _Inflate(self, b):
     n = self._have
     if n == 0:
       self._fd.unread(b)
       return None

     if len(b) > n:
       self._fd.unread(b[n:])
       b = b[:n]
     self._have -= len(b)
     return b


 class _InflateStream(_UngetStream):
   """Inflates the stream as it reads input.
   """
   def __init__(self, fd):
     _UngetStream.__init__(self, fd)
     self._z = zlib.decompressobj(-zlib.MAX_WBITS)

   def _Inflate(self, b):
     z = self._z
     if not z:
       self._fd.unread(b)
       return None

     b = z.decompress(b)
     if z.unconsumed_tail != '':
       self._fd.unread(z.unconsumed_tail)
     elif z.unused_data != '':
       self._fd.unread(z.unused_data)
       self._z = None
     return b
	#
	# Copyright (C) 2008 The Android Open Source Project
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import stat
	import struct
	import zlib
	import cStringIO

	from import_ext import ImportExternal
	from error import ImportError

	class ImportZip(ImportExternal):
	"""Streams a zip file from the network directly into a Project's
	Git repository.
	"""
	@classmethod
	def CanAccept(cls, url):
	"""Can this importer read and unpack the data stored at url?
	"""
	if url.endswith('.zip') or url.endswith('.jar'):
	return True
	return False

	def _UnpackFiles(self):
	url_fd, url = self._OpenUrl()
	try:
	if not self.__class__.CanAccept(url):
	raise ImportError('non-zip file extension: %s' % url)

	zip = _ZipFile(url_fd)
	for entry in zip.FileRecords():
	data = zip.Open(entry).read()
	sz = len(data)

	if data and _SafeCRLF(data):
	data = data.replace('\r\n', '\n')
	sz = len(data)

	fd = cStringIO.StringIO(data)
	self._UnpackOneFile(entry.mode, sz, entry.name, fd)
	zip.Close(entry)

	for entry in zip.CentralDirectory():
	self._SetFileMode(entry.name, entry.mode)

	zip.CheckTail()
	finally:
	url_fd.close()


	def _SafeCRLF(data):
	"""Is it reasonably safe to perform a CRLF->LF conversion?

	If the stream contains a NUL byte it is likely binary,
	and thus a CRLF->LF conversion may damage the stream.

	If the only NUL is in the last position of the stream,
	but it otherwise can do a CRLF<->LF conversion we do
	the CRLF conversion anyway. At least one source ZIP
	file has this structure in its source code.

	If every occurrance of a CR and LF is paired up as a
	CRLF pair then the conversion is safely bi-directional.
	s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
	"""
	nul = data.find('\0')
	if 0 <= nul and nul < (len(data) - 1):
	return False

	n_lf = 0
	last = 0
	while True:
	lf = data.find('\n', last)
	if lf < 0:
	break
	if lf == 0 or data[lf - 1] != '\r':
	return False
	last = lf + 1
	n_lf += 1
	return n_lf > 0

	class _ZipFile(object):
	"""Streaming iterator to parse a zip file on the fly.
	"""
	def __init__(self, fd):
	self._fd = _UngetStream(fd)

	def FileRecords(self):
	return _FileIter(self._fd)

	def CentralDirectory(self):
	return _CentIter(self._fd)

	def CheckTail(self):
	type_buf = self._fd.read(4)
	type = struct.unpack('<I', type_buf)[0]
	if type != 0x06054b50: # end of central directory
	raise ImportError('zip record %x unsupported' % type)

	def Open(self, entry):
	if entry.is_compressed:
	return _InflateStream(self._fd)
	else:
	if entry.has_trailer:
	raise ImportError('unable to extract streamed zip')
	return _FixedLengthStream(self._fd, entry.uncompressed_size)

	def Close(self, entry):
	if entry.has_trailer:
	type = struct.unpack('<I', self._fd.read(4))[0]
	if type == 0x08074b50:
	# Not a formal type marker, but commonly seen in zips
	# as the data descriptor signature.
	#
	struct.unpack('<3I', self._fd.read(12))
	else:
	# No signature for the data descriptor, so read the
	# remaining fields out of the stream
	#
	self._fd.read(8)


	class _FileIter(object):
	def __init__(self, fd):
	self._fd = fd

	def __iter__(self):
	return self

	def next(self):
	fd = self._fd

	type_buf = fd.read(4)
	type = struct.unpack('<I', type_buf)[0]

	if type != 0x04034b50: # local file header
	fd.unread(type_buf)
	raise StopIteration()

	rec = _FileHeader(fd.read(26))
	rec.name = fd.read(rec.name_len)
	fd.read(rec.extra_len)

	if rec.name.endswith('/'):
	rec.name = rec.name[:-1]
	rec.mode = stat.S_IFDIR \| 0777
	return rec


	class _FileHeader(object):
	"""Information about a single file in the archive.
	0 version needed to extract 2 bytes
	1 general purpose bit flag 2 bytes
	2 compression method 2 bytes
	3 last mod file time 2 bytes
	4 last mod file date 2 bytes
	5 crc-32 4 bytes
	6 compressed size 4 bytes
	7 uncompressed size 4 bytes
	8 file name length 2 bytes
	9 extra field length 2 bytes
	"""
	def __init__(self, raw_bin):
	rec = struct.unpack('<5H3I2H', raw_bin)

	if rec[2] == 8:
	self.is_compressed = True
	elif rec[2] == 0:
	self.is_compressed = False
	else:
	raise ImportError('unrecognized compression format')

	if rec[1] & (1 << 3):
	self.has_trailer = True
	else:
	self.has_trailer = False

	self.compressed_size = rec[6]
	self.uncompressed_size = rec[7]
	self.name_len = rec[8]
	self.extra_len = rec[9]
	self.mode = stat.S_IFREG \| 0644


	class _CentIter(object):
	def __init__(self, fd):
	self._fd = fd

	def __iter__(self):
	return self

	def next(self):
	fd = self._fd

	type_buf = fd.read(4)
	type = struct.unpack('<I', type_buf)[0]

	if type != 0x02014b50: # central directory
	fd.unread(type_buf)
	raise StopIteration()

	rec = _CentHeader(fd.read(42))
	rec.name = fd.read(rec.name_len)
	fd.read(rec.extra_len)
	fd.read(rec.comment_len)

	if rec.name.endswith('/'):
	rec.name = rec.name[:-1]
	rec.mode = stat.S_IFDIR \| 0777
	return rec


	class _CentHeader(object):
	"""Information about a single file in the archive.
	0 version made by 2 bytes
	1 version needed to extract 2 bytes
	2 general purpose bit flag 2 bytes
	3 compression method 2 bytes
	4 last mod file time 2 bytes
	5 last mod file date 2 bytes
	6 crc-32 4 bytes
	7 compressed size 4 bytes
	8 uncompressed size 4 bytes
	9 file name length 2 bytes
	10 extra field length 2 bytes
	11 file comment length 2 bytes
	12 disk number start 2 bytes
	13 internal file attributes 2 bytes
	14 external file attributes 4 bytes
	15 relative offset of local header 4 bytes
	"""
	def __init__(self, raw_bin):
	rec = struct.unpack('<6H3I5H2I', raw_bin)
	self.name_len = rec[9]
	self.extra_len = rec[10]
	self.comment_len = rec[11]

	if (rec[0] & 0xff00) == 0x0300: # UNIX
	self.mode = rec[14] >> 16
	else:
	self.mode = stat.S_IFREG \| 0644


	class _UngetStream(object):
	"""File like object to read and rewind a stream.
	"""
	def __init__(self, fd):
	self._fd = fd
	self._buf = None

	def read(self, size = -1):
	r = []
	try:
	if size >= 0:
	self._ReadChunk(r, size)
	else:
	while True:
	self._ReadChunk(r, 2048)
	except EOFError:
	pass

	if len(r) == 1:
	return r[0]
	return ''.join(r)

	def unread(self, buf):
	b = self._buf
	if b is None or len(b) == 0:
	self._buf = buf
	else:
	self._buf = buf + b

	def _ReadChunk(self, r, size):
	b = self._buf
	try:
	while size > 0:
	if b is None or len(b) == 0:
	b = self._Inflate(self._fd.read(2048))
	if not b:
	raise EOFError()
	continue

	use = min(size, len(b))
	r.append(b[:use])
	b = b[use:]
	size -= use
	finally:
	self._buf = b

	def _Inflate(self, b):
	return b


	class _FixedLengthStream(_UngetStream):
	"""File like object to read a fixed length stream.
	"""
	def __init__(self, fd, have):
	_UngetStream.__init__(self, fd)
	self._have = have

	def _Inflate(self, b):
	n = self._have
	if n == 0:
	self._fd.unread(b)
	return None

	if len(b) > n:
	self._fd.unread(b[n:])
	b = b[:n]
	self._have -= len(b)
	return b


	class _InflateStream(_UngetStream):
	"""Inflates the stream as it reads input.
	"""
	def __init__(self, fd):
	_UngetStream.__init__(self, fd)
	self._z = zlib.decompressobj(-zlib.MAX_WBITS)

	def _Inflate(self, b):
	z = self._z
	if not z:
	self._fd.unread(b)
	return None

	b = z.decompress(b)
	if z.unconsumed_tail != '':
	self._fd.unread(z.unconsumed_tail)
	elif z.unused_data != '':
	self._fd.unread(z.unused_data)
	self._z = None
	return b