The Android Open Source Project | cf31fe9 | 2008-10-21 07:00:00 -0700 | [diff] [blame] | 1 | # |
| 2 | # Copyright (C) 2008 The Android Open Source Project |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | |
| 16 | import stat |
| 17 | import struct |
| 18 | import zlib |
| 19 | import cStringIO |
| 20 | |
| 21 | from import_ext import ImportExternal |
| 22 | from error import ImportError |
| 23 | |
| 24 | class ImportZip(ImportExternal): |
| 25 | """Streams a zip file from the network directly into a Project's |
| 26 | Git repository. |
| 27 | """ |
| 28 | @classmethod |
| 29 | def CanAccept(cls, url): |
| 30 | """Can this importer read and unpack the data stored at url? |
| 31 | """ |
| 32 | if url.endswith('.zip') or url.endswith('.jar'): |
| 33 | return True |
| 34 | return False |
| 35 | |
| 36 | def _UnpackFiles(self): |
| 37 | url_fd, url = self._OpenUrl() |
| 38 | try: |
| 39 | if not self.__class__.CanAccept(url): |
| 40 | raise ImportError('non-zip file extension: %s' % url) |
| 41 | |
| 42 | zip = _ZipFile(url_fd) |
| 43 | for entry in zip.FileRecords(): |
| 44 | data = zip.Open(entry).read() |
| 45 | sz = len(data) |
| 46 | |
| 47 | if data and _SafeCRLF(data): |
| 48 | data = data.replace('\r\n', '\n') |
| 49 | sz = len(data) |
| 50 | |
| 51 | fd = cStringIO.StringIO(data) |
| 52 | self._UnpackOneFile(entry.mode, sz, entry.name, fd) |
| 53 | zip.Close(entry) |
| 54 | |
| 55 | for entry in zip.CentralDirectory(): |
| 56 | self._SetFileMode(entry.name, entry.mode) |
| 57 | |
| 58 | zip.CheckTail() |
| 59 | finally: |
| 60 | url_fd.close() |
| 61 | |
| 62 | |
| 63 | def _SafeCRLF(data): |
| 64 | """Is it reasonably safe to perform a CRLF->LF conversion? |
| 65 | |
| 66 | If the stream contains a NUL byte it is likely binary, |
| 67 | and thus a CRLF->LF conversion may damage the stream. |
| 68 | |
| 69 | If the only NUL is in the last position of the stream, |
| 70 | but it otherwise can do a CRLF<->LF conversion we do |
| 71 | the CRLF conversion anyway. At least one source ZIP |
| 72 | file has this structure in its source code. |
| 73 | |
| 74 | If every occurrance of a CR and LF is paired up as a |
| 75 | CRLF pair then the conversion is safely bi-directional. |
| 76 | s/\r\n/\n/g == s/\n/\r\\n/g can convert between them. |
| 77 | """ |
| 78 | nul = data.find('\0') |
| 79 | if 0 <= nul and nul < (len(data) - 1): |
| 80 | return False |
| 81 | |
| 82 | n_lf = 0 |
| 83 | last = 0 |
| 84 | while True: |
| 85 | lf = data.find('\n', last) |
| 86 | if lf < 0: |
| 87 | break |
| 88 | if lf == 0 or data[lf - 1] != '\r': |
| 89 | return False |
| 90 | last = lf + 1 |
| 91 | n_lf += 1 |
| 92 | return n_lf > 0 |
| 93 | |
| 94 | class _ZipFile(object): |
| 95 | """Streaming iterator to parse a zip file on the fly. |
| 96 | """ |
| 97 | def __init__(self, fd): |
| 98 | self._fd = _UngetStream(fd) |
| 99 | |
| 100 | def FileRecords(self): |
| 101 | return _FileIter(self._fd) |
| 102 | |
| 103 | def CentralDirectory(self): |
| 104 | return _CentIter(self._fd) |
| 105 | |
| 106 | def CheckTail(self): |
| 107 | type_buf = self._fd.read(4) |
| 108 | type = struct.unpack('<I', type_buf)[0] |
| 109 | if type != 0x06054b50: # end of central directory |
| 110 | raise ImportError('zip record %x unsupported' % type) |
| 111 | |
| 112 | def Open(self, entry): |
| 113 | if entry.is_compressed: |
| 114 | return _InflateStream(self._fd) |
| 115 | else: |
| 116 | if entry.has_trailer: |
| 117 | raise ImportError('unable to extract streamed zip') |
| 118 | return _FixedLengthStream(self._fd, entry.uncompressed_size) |
| 119 | |
| 120 | def Close(self, entry): |
| 121 | if entry.has_trailer: |
| 122 | type = struct.unpack('<I', self._fd.read(4))[0] |
| 123 | if type == 0x08074b50: |
| 124 | # Not a formal type marker, but commonly seen in zips |
| 125 | # as the data descriptor signature. |
| 126 | # |
| 127 | struct.unpack('<3I', self._fd.read(12)) |
| 128 | else: |
| 129 | # No signature for the data descriptor, so read the |
| 130 | # remaining fields out of the stream |
| 131 | # |
| 132 | self._fd.read(8) |
| 133 | |
| 134 | |
| 135 | class _FileIter(object): |
| 136 | def __init__(self, fd): |
| 137 | self._fd = fd |
| 138 | |
| 139 | def __iter__(self): |
| 140 | return self |
| 141 | |
| 142 | def next(self): |
| 143 | fd = self._fd |
| 144 | |
| 145 | type_buf = fd.read(4) |
| 146 | type = struct.unpack('<I', type_buf)[0] |
| 147 | |
| 148 | if type != 0x04034b50: # local file header |
| 149 | fd.unread(type_buf) |
| 150 | raise StopIteration() |
| 151 | |
| 152 | rec = _FileHeader(fd.read(26)) |
| 153 | rec.name = fd.read(rec.name_len) |
| 154 | fd.read(rec.extra_len) |
| 155 | |
| 156 | if rec.name.endswith('/'): |
| 157 | rec.name = rec.name[:-1] |
| 158 | rec.mode = stat.S_IFDIR | 0777 |
| 159 | return rec |
| 160 | |
| 161 | |
| 162 | class _FileHeader(object): |
| 163 | """Information about a single file in the archive. |
| 164 | 0 version needed to extract 2 bytes |
| 165 | 1 general purpose bit flag 2 bytes |
| 166 | 2 compression method 2 bytes |
| 167 | 3 last mod file time 2 bytes |
| 168 | 4 last mod file date 2 bytes |
| 169 | 5 crc-32 4 bytes |
| 170 | 6 compressed size 4 bytes |
| 171 | 7 uncompressed size 4 bytes |
| 172 | 8 file name length 2 bytes |
| 173 | 9 extra field length 2 bytes |
| 174 | """ |
| 175 | def __init__(self, raw_bin): |
| 176 | rec = struct.unpack('<5H3I2H', raw_bin) |
| 177 | |
| 178 | if rec[2] == 8: |
| 179 | self.is_compressed = True |
| 180 | elif rec[2] == 0: |
| 181 | self.is_compressed = False |
| 182 | else: |
| 183 | raise ImportError('unrecognized compression format') |
| 184 | |
| 185 | if rec[1] & (1 << 3): |
| 186 | self.has_trailer = True |
| 187 | else: |
| 188 | self.has_trailer = False |
| 189 | |
| 190 | self.compressed_size = rec[6] |
| 191 | self.uncompressed_size = rec[7] |
| 192 | self.name_len = rec[8] |
| 193 | self.extra_len = rec[9] |
| 194 | self.mode = stat.S_IFREG | 0644 |
| 195 | |
| 196 | |
| 197 | class _CentIter(object): |
| 198 | def __init__(self, fd): |
| 199 | self._fd = fd |
| 200 | |
| 201 | def __iter__(self): |
| 202 | return self |
| 203 | |
| 204 | def next(self): |
| 205 | fd = self._fd |
| 206 | |
| 207 | type_buf = fd.read(4) |
| 208 | type = struct.unpack('<I', type_buf)[0] |
| 209 | |
| 210 | if type != 0x02014b50: # central directory |
| 211 | fd.unread(type_buf) |
| 212 | raise StopIteration() |
| 213 | |
| 214 | rec = _CentHeader(fd.read(42)) |
| 215 | rec.name = fd.read(rec.name_len) |
| 216 | fd.read(rec.extra_len) |
| 217 | fd.read(rec.comment_len) |
| 218 | |
| 219 | if rec.name.endswith('/'): |
| 220 | rec.name = rec.name[:-1] |
| 221 | rec.mode = stat.S_IFDIR | 0777 |
| 222 | return rec |
| 223 | |
| 224 | |
| 225 | class _CentHeader(object): |
| 226 | """Information about a single file in the archive. |
| 227 | 0 version made by 2 bytes |
| 228 | 1 version needed to extract 2 bytes |
| 229 | 2 general purpose bit flag 2 bytes |
| 230 | 3 compression method 2 bytes |
| 231 | 4 last mod file time 2 bytes |
| 232 | 5 last mod file date 2 bytes |
| 233 | 6 crc-32 4 bytes |
| 234 | 7 compressed size 4 bytes |
| 235 | 8 uncompressed size 4 bytes |
| 236 | 9 file name length 2 bytes |
| 237 | 10 extra field length 2 bytes |
| 238 | 11 file comment length 2 bytes |
| 239 | 12 disk number start 2 bytes |
| 240 | 13 internal file attributes 2 bytes |
| 241 | 14 external file attributes 4 bytes |
| 242 | 15 relative offset of local header 4 bytes |
| 243 | """ |
| 244 | def __init__(self, raw_bin): |
| 245 | rec = struct.unpack('<6H3I5H2I', raw_bin) |
| 246 | self.name_len = rec[9] |
| 247 | self.extra_len = rec[10] |
| 248 | self.comment_len = rec[11] |
| 249 | |
| 250 | if (rec[0] & 0xff00) == 0x0300: # UNIX |
| 251 | self.mode = rec[14] >> 16 |
| 252 | else: |
| 253 | self.mode = stat.S_IFREG | 0644 |
| 254 | |
| 255 | |
| 256 | class _UngetStream(object): |
| 257 | """File like object to read and rewind a stream. |
| 258 | """ |
| 259 | def __init__(self, fd): |
| 260 | self._fd = fd |
| 261 | self._buf = None |
| 262 | |
| 263 | def read(self, size = -1): |
| 264 | r = [] |
| 265 | try: |
| 266 | if size >= 0: |
| 267 | self._ReadChunk(r, size) |
| 268 | else: |
| 269 | while True: |
| 270 | self._ReadChunk(r, 2048) |
| 271 | except EOFError: |
| 272 | pass |
| 273 | |
| 274 | if len(r) == 1: |
| 275 | return r[0] |
| 276 | return ''.join(r) |
| 277 | |
| 278 | def unread(self, buf): |
| 279 | b = self._buf |
| 280 | if b is None or len(b) == 0: |
| 281 | self._buf = buf |
| 282 | else: |
| 283 | self._buf = buf + b |
| 284 | |
| 285 | def _ReadChunk(self, r, size): |
| 286 | b = self._buf |
| 287 | try: |
| 288 | while size > 0: |
| 289 | if b is None or len(b) == 0: |
| 290 | b = self._Inflate(self._fd.read(2048)) |
| 291 | if not b: |
| 292 | raise EOFError() |
| 293 | continue |
| 294 | |
| 295 | use = min(size, len(b)) |
| 296 | r.append(b[:use]) |
| 297 | b = b[use:] |
| 298 | size -= use |
| 299 | finally: |
| 300 | self._buf = b |
| 301 | |
| 302 | def _Inflate(self, b): |
| 303 | return b |
| 304 | |
| 305 | |
| 306 | class _FixedLengthStream(_UngetStream): |
| 307 | """File like object to read a fixed length stream. |
| 308 | """ |
| 309 | def __init__(self, fd, have): |
| 310 | _UngetStream.__init__(self, fd) |
| 311 | self._have = have |
| 312 | |
| 313 | def _Inflate(self, b): |
| 314 | n = self._have |
| 315 | if n == 0: |
| 316 | self._fd.unread(b) |
| 317 | return None |
| 318 | |
| 319 | if len(b) > n: |
| 320 | self._fd.unread(b[n:]) |
| 321 | b = b[:n] |
| 322 | self._have -= len(b) |
| 323 | return b |
| 324 | |
| 325 | |
| 326 | class _InflateStream(_UngetStream): |
| 327 | """Inflates the stream as it reads input. |
| 328 | """ |
| 329 | def __init__(self, fd): |
| 330 | _UngetStream.__init__(self, fd) |
| 331 | self._z = zlib.decompressobj(-zlib.MAX_WBITS) |
| 332 | |
| 333 | def _Inflate(self, b): |
| 334 | z = self._z |
| 335 | if not z: |
| 336 | self._fd.unread(b) |
| 337 | return None |
| 338 | |
| 339 | b = z.decompress(b) |
| 340 | if z.unconsumed_tail != '': |
| 341 | self._fd.unread(z.unconsumed_tail) |
| 342 | elif z.unused_data != '': |
| 343 | self._fd.unread(z.unused_data) |
| 344 | self._z = None |
| 345 | return b |