Blame - import_zip.py - git-repo

blob: 08aff32602c4be8bee15cb42ac7527a86a621e93 [file] [log] [blame]

The Android Open Source Project	cf31fe9	2008-10-21 07:00:00 -0700	[diff] [blame^]	1	#
				2	# Copyright (C) 2008 The Android Open Source Project
				3	#
				4	# Licensed under the Apache License, Version 2.0 (the "License");
				5	# you may not use this file except in compliance with the License.
				6	# You may obtain a copy of the License at
				7	#
				8	# http://www.apache.org/licenses/LICENSE-2.0
				9	#
				10	# Unless required by applicable law or agreed to in writing, software
				11	# distributed under the License is distributed on an "AS IS" BASIS,
				12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	# See the License for the specific language governing permissions and
				14	# limitations under the License.
				15
				16	import stat
				17	import struct
				18	import zlib
				19	import cStringIO
				20
				21	from import_ext import ImportExternal
				22	from error import ImportError
				23
				24	class ImportZip(ImportExternal):
				25	"""Streams a zip file from the network directly into a Project's
				26	Git repository.
				27	"""
				28	@classmethod
				29	def CanAccept(cls, url):
				30	"""Can this importer read and unpack the data stored at url?
				31	"""
				32	if url.endswith('.zip') or url.endswith('.jar'):
				33	return True
				34	return False
				35
				36	def _UnpackFiles(self):
				37	url_fd, url = self._OpenUrl()
				38	try:
				39	if not self.__class__.CanAccept(url):
				40	raise ImportError('non-zip file extension: %s' % url)
				41
				42	zip = _ZipFile(url_fd)
				43	for entry in zip.FileRecords():
				44	data = zip.Open(entry).read()
				45	sz = len(data)
				46
				47	if data and _SafeCRLF(data):
				48	data = data.replace('\r\n', '\n')
				49	sz = len(data)
				50
				51	fd = cStringIO.StringIO(data)
				52	self._UnpackOneFile(entry.mode, sz, entry.name, fd)
				53	zip.Close(entry)
				54
				55	for entry in zip.CentralDirectory():
				56	self._SetFileMode(entry.name, entry.mode)
				57
				58	zip.CheckTail()
				59	finally:
				60	url_fd.close()
				61
				62
				63	def _SafeCRLF(data):
				64	"""Is it reasonably safe to perform a CRLF->LF conversion?
				65
				66	If the stream contains a NUL byte it is likely binary,
				67	and thus a CRLF->LF conversion may damage the stream.
				68
				69	If the only NUL is in the last position of the stream,
				70	but it otherwise can do a CRLF<->LF conversion we do
				71	the CRLF conversion anyway. At least one source ZIP
				72	file has this structure in its source code.
				73
				74	If every occurrance of a CR and LF is paired up as a
				75	CRLF pair then the conversion is safely bi-directional.
				76	s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
				77	"""
				78	nul = data.find('\0')
				79	if 0 <= nul and nul < (len(data) - 1):
				80	return False
				81
				82	n_lf = 0
				83	last = 0
				84	while True:
				85	lf = data.find('\n', last)
				86	if lf < 0:
				87	break
				88	if lf == 0 or data[lf - 1] != '\r':
				89	return False
				90	last = lf + 1
				91	n_lf += 1
				92	return n_lf > 0
				93
				94	class _ZipFile(object):
				95	"""Streaming iterator to parse a zip file on the fly.
				96	"""
				97	def __init__(self, fd):
				98	self._fd = _UngetStream(fd)
				99
				100	def FileRecords(self):
				101	return _FileIter(self._fd)
				102
				103	def CentralDirectory(self):
				104	return _CentIter(self._fd)
				105
				106	def CheckTail(self):
				107	type_buf = self._fd.read(4)
				108	type = struct.unpack('<I', type_buf)[0]
				109	if type != 0x06054b50: # end of central directory
				110	raise ImportError('zip record %x unsupported' % type)
				111
				112	def Open(self, entry):
				113	if entry.is_compressed:
				114	return _InflateStream(self._fd)
				115	else:
				116	if entry.has_trailer:
				117	raise ImportError('unable to extract streamed zip')
				118	return _FixedLengthStream(self._fd, entry.uncompressed_size)
				119
				120	def Close(self, entry):
				121	if entry.has_trailer:
				122	type = struct.unpack('<I', self._fd.read(4))[0]
				123	if type == 0x08074b50:
				124	# Not a formal type marker, but commonly seen in zips
				125	# as the data descriptor signature.
				126	#
				127	struct.unpack('<3I', self._fd.read(12))
				128	else:
				129	# No signature for the data descriptor, so read the
				130	# remaining fields out of the stream
				131	#
				132	self._fd.read(8)
				133
				134
				135	class _FileIter(object):
				136	def __init__(self, fd):
				137	self._fd = fd
				138
				139	def __iter__(self):
				140	return self
				141
				142	def next(self):
				143	fd = self._fd
				144
				145	type_buf = fd.read(4)
				146	type = struct.unpack('<I', type_buf)[0]
				147
				148	if type != 0x04034b50: # local file header
				149	fd.unread(type_buf)
				150	raise StopIteration()
				151
				152	rec = _FileHeader(fd.read(26))
				153	rec.name = fd.read(rec.name_len)
				154	fd.read(rec.extra_len)
				155
				156	if rec.name.endswith('/'):
				157	rec.name = rec.name[:-1]
				158	rec.mode = stat.S_IFDIR \| 0777
				159	return rec
				160
				161
				162	class _FileHeader(object):
				163	"""Information about a single file in the archive.
				164	0 version needed to extract 2 bytes
				165	1 general purpose bit flag 2 bytes
				166	2 compression method 2 bytes
				167	3 last mod file time 2 bytes
				168	4 last mod file date 2 bytes
				169	5 crc-32 4 bytes
				170	6 compressed size 4 bytes
				171	7 uncompressed size 4 bytes
				172	8 file name length 2 bytes
				173	9 extra field length 2 bytes
				174	"""
				175	def __init__(self, raw_bin):
				176	rec = struct.unpack('<5H3I2H', raw_bin)
				177
				178	if rec[2] == 8:
				179	self.is_compressed = True
				180	elif rec[2] == 0:
				181	self.is_compressed = False
				182	else:
				183	raise ImportError('unrecognized compression format')
				184
				185	if rec[1] & (1 << 3):
				186	self.has_trailer = True
				187	else:
				188	self.has_trailer = False
				189
				190	self.compressed_size = rec[6]
				191	self.uncompressed_size = rec[7]
				192	self.name_len = rec[8]
				193	self.extra_len = rec[9]
				194	self.mode = stat.S_IFREG \| 0644
				195
				196
				197	class _CentIter(object):
				198	def __init__(self, fd):
				199	self._fd = fd
				200
				201	def __iter__(self):
				202	return self
				203
				204	def next(self):
				205	fd = self._fd
				206
				207	type_buf = fd.read(4)
				208	type = struct.unpack('<I', type_buf)[0]
				209
				210	if type != 0x02014b50: # central directory
				211	fd.unread(type_buf)
				212	raise StopIteration()
				213
				214	rec = _CentHeader(fd.read(42))
				215	rec.name = fd.read(rec.name_len)
				216	fd.read(rec.extra_len)
				217	fd.read(rec.comment_len)
				218
				219	if rec.name.endswith('/'):
				220	rec.name = rec.name[:-1]
				221	rec.mode = stat.S_IFDIR \| 0777
				222	return rec
				223
				224
				225	class _CentHeader(object):
				226	"""Information about a single file in the archive.
				227	0 version made by 2 bytes
				228	1 version needed to extract 2 bytes
				229	2 general purpose bit flag 2 bytes
				230	3 compression method 2 bytes
				231	4 last mod file time 2 bytes
				232	5 last mod file date 2 bytes
				233	6 crc-32 4 bytes
				234	7 compressed size 4 bytes
				235	8 uncompressed size 4 bytes
				236	9 file name length 2 bytes
				237	10 extra field length 2 bytes
				238	11 file comment length 2 bytes
				239	12 disk number start 2 bytes
				240	13 internal file attributes 2 bytes
				241	14 external file attributes 4 bytes
				242	15 relative offset of local header 4 bytes
				243	"""
				244	def __init__(self, raw_bin):
				245	rec = struct.unpack('<6H3I5H2I', raw_bin)
				246	self.name_len = rec[9]
				247	self.extra_len = rec[10]
				248	self.comment_len = rec[11]
				249
				250	if (rec[0] & 0xff00) == 0x0300: # UNIX
				251	self.mode = rec[14] >> 16
				252	else:
				253	self.mode = stat.S_IFREG \| 0644
				254
				255
				256	class _UngetStream(object):
				257	"""File like object to read and rewind a stream.
				258	"""
				259	def __init__(self, fd):
				260	self._fd = fd
				261	self._buf = None
				262
				263	def read(self, size = -1):
				264	r = []
				265	try:
				266	if size >= 0:
				267	self._ReadChunk(r, size)
				268	else:
				269	while True:
				270	self._ReadChunk(r, 2048)
				271	except EOFError:
				272	pass
				273
				274	if len(r) == 1:
				275	return r[0]
				276	return ''.join(r)
				277
				278	def unread(self, buf):
				279	b = self._buf
				280	if b is None or len(b) == 0:
				281	self._buf = buf
				282	else:
				283	self._buf = buf + b
				284
				285	def _ReadChunk(self, r, size):
				286	b = self._buf
				287	try:
				288	while size > 0:
				289	if b is None or len(b) == 0:
				290	b = self._Inflate(self._fd.read(2048))
				291	if not b:
				292	raise EOFError()
				293	continue
				294
				295	use = min(size, len(b))
				296	r.append(b[:use])
				297	b = b[use:]
				298	size -= use
				299	finally:
				300	self._buf = b
				301
				302	def _Inflate(self, b):
				303	return b
				304
				305
				306	class _FixedLengthStream(_UngetStream):
				307	"""File like object to read a fixed length stream.
				308	"""
				309	def __init__(self, fd, have):
				310	_UngetStream.__init__(self, fd)
				311	self._have = have
				312
				313	def _Inflate(self, b):
				314	n = self._have
				315	if n == 0:
				316	self._fd.unread(b)
				317	return None
				318
				319	if len(b) > n:
				320	self._fd.unread(b[n:])
				321	b = b[:n]
				322	self._have -= len(b)
				323	return b
				324
				325
				326	class _InflateStream(_UngetStream):
				327	"""Inflates the stream as it reads input.
				328	"""
				329	def __init__(self, fd):
				330	_UngetStream.__init__(self, fd)
				331	self._z = zlib.decompressobj(-zlib.MAX_WBITS)
				332
				333	def _Inflate(self, b):
				334	z = self._z
				335	if not z:
				336	self._fd.unread(b)
				337	return None
				338
				339	b = z.decompress(b)
				340	if z.unconsumed_tail != '':
				341	self._fd.unread(z.unconsumed_tail)
				342	elif z.unused_data != '':
				343	self._fd.unread(z.unused_data)
				344	self._z = None
				345	return b