-
-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathvolume_archive_libarchive.cc
382 lines (323 loc) · 14 KB
/
volume_archive_libarchive.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
// Copyright 2014 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "volume_archive_libarchive.h"
#include <algorithm>
#include <cerrno>
#include <limits>
#include "archive_entry.h"
#include "ppapi/cpp/logging.h"
namespace {
const int64_t kArchiveReadDataError = -1; // Negative value means error.
std::string ArchiveError(const std::string& message, archive* archive_object) {
return message + archive_error_string(archive_object);
}
// Sets the libarchive internal error to a VolumeReader related error.
// archive_error_string function must work on valid strings, but in case of
// errors in the custom functions, libarchive API assumes the error is set by
// us. If we don't set it, we will get a Segmentation Fault because
// archive_error_string will work on invalid memory.
void SetLibarchiveErrorToVolumeReaderError(archive* archive_object) {
archive_set_error(archive_object,
EIO /* I/O error. */,
"%s" /* Format string similar to printf. */,
volume_archive_constants::kVolumeReaderError);
}
ssize_t CustomArchiveRead(archive* archive_object,
void* client_data,
const void** buffer) {
VolumeArchiveLibarchive* volume_archive =
static_cast<VolumeArchiveLibarchive*>(client_data);
int64_t read_bytes = volume_archive->reader()->Read(
volume_archive->reader_data_size(), buffer);
if (read_bytes == ARCHIVE_FATAL)
SetLibarchiveErrorToVolumeReaderError(archive_object);
return read_bytes;
}
int64_t CustomArchiveSkip(archive* archive_object,
void* client_data,
int64_t request) {
VolumeArchiveLibarchive* volume_archive =
static_cast<VolumeArchiveLibarchive*>(client_data);
// VolumeReader::Skip returns 0 in case of failure and CustomArchiveRead is
// used instead, so there is no need to check for VolumeReader error.
return volume_archive->reader()->Skip(request);
}
int64_t CustomArchiveSeek(archive* archive_object,
void* client_data,
int64_t offset,
int whence) {
VolumeArchiveLibarchive* volume_archive =
static_cast<VolumeArchiveLibarchive*>(client_data);
int64_t new_offset = volume_archive->reader()->Seek(offset, whence);
if (new_offset == ARCHIVE_FATAL)
SetLibarchiveErrorToVolumeReaderError(archive_object);
return new_offset;
}
const char* CustomArchivePassphrase(
archive* archive_object, void* client_data) {
VolumeArchiveLibarchive* volume_archive =
static_cast<VolumeArchiveLibarchive*>(client_data);
return volume_archive->reader()->Passphrase();
}
} // namespace
VolumeArchiveLibarchive::VolumeArchiveLibarchive(VolumeReader* reader)
: VolumeArchive(reader),
reader_data_size_(volume_archive_constants::kMinimumDataChunkSize),
archive_(NULL),
current_archive_entry_(NULL),
last_read_data_offset_(0),
last_read_data_length_(0),
decompressed_data_(NULL),
decompressed_data_size_(0),
decompressed_error_(false) {
}
VolumeArchiveLibarchive::~VolumeArchiveLibarchive() {
Cleanup();
}
bool VolumeArchiveLibarchive::Init(const std::string& encoding, bool raw) {
archive_ = archive_read_new();
if (!archive_) {
set_error_message(volume_archive_constants::kArchiveReadNewError);
return false;
}
if (archive_read_support_filter_all(archive_) != ARCHIVE_OK) {
set_error_message(ArchiveError(
volume_archive_constants::kArchiveSupportErrorPrefix, archive_));
return false;
}
// TODO(cmihail): Once the bug mentioned at
// https://github.com/libarchive/libarchive/issues/373 is resolved
// add RAR file handler to manifest.json.
int ret;
if (raw)
ret = archive_read_support_format_raw(archive_);
else
ret = archive_read_support_format_all(archive_);
if (ret != ARCHIVE_OK) {
set_error_message(ArchiveError(
volume_archive_constants::kArchiveSupportErrorPrefix, archive_));
return false;
}
// Default encoding for file names in headers. Note, that another one may be
// used if specified in the archive.
std::string options = std::string("hdrcharset=") + encoding;
if (!encoding.empty() &&
archive_read_set_options(archive_, options.c_str()) != ARCHIVE_OK) {
set_error_message(ArchiveError(
volume_archive_constants::kArchiveSupportErrorPrefix, archive_));
return false;
}
// Set callbacks for processing the archive's data and open the archive.
// The callback data is the VolumeArchive itself.
int ok = ARCHIVE_OK;
if (archive_read_set_read_callback(archive_, CustomArchiveRead) != ok ||
archive_read_set_skip_callback(archive_, CustomArchiveSkip) != ok ||
archive_read_set_seek_callback(archive_, CustomArchiveSeek) != ok ||
archive_read_set_passphrase_callback(
archive_, this, CustomArchivePassphrase) != ok ||
archive_read_set_callback_data(archive_, this) != ok ||
archive_read_open1(archive_) != ok) {
set_error_message(ArchiveError(
volume_archive_constants::kArchiveOpenErrorPrefix, archive_));
return false;
}
curr_index = 0;
raw_ = raw;
return true;
}
VolumeArchive::Result VolumeArchiveLibarchive::GetNextHeader() {
// Headers are being read from the central directory (in the ZIP format), so
// use a large block size to save on IPC calls. The headers in EOCD are
// grouped one by one.
reader_data_size_ = volume_archive_constants::kMaximumDataChunkSize;
// Reset to 0 for new VolumeArchive::ReadData operation.
last_read_data_offset_ = 0;
decompressed_data_size_ = 0;
++curr_index;
// Archive data is skipped automatically by next call to
// archive_read_next_header.
switch (archive_read_next_header(archive_, ¤t_archive_entry_)) {
case ARCHIVE_EOF:
return RESULT_EOF;
case ARCHIVE_OK:
return RESULT_SUCCESS;
default:
set_error_message(ArchiveError(
volume_archive_constants::kArchiveNextHeaderErrorPrefix, archive_));
return RESULT_FAIL;
}
}
VolumeArchive::Result VolumeArchiveLibarchive::GetNextHeader(
const char** pathname,
int64_t* size,
bool* is_directory,
time_t* modification_time) {
Result ret = GetNextHeader();
if (ret == RESULT_SUCCESS) {
*pathname = archive_entry_pathname(current_archive_entry_);
if (raw_) {
if (strcmp(*pathname, "data") == 0) {
// Tell the higher layers to re-use the name from the archive.
*pathname = NULL;
}
}
*size = archive_entry_size(current_archive_entry_);
if (raw_ && !archive_entry_size_is_set(current_archive_entry_)) {
// We weren't able to quickly locate the filesize. Brute force!
size_t block_size;
const void *buf;
int64_t offset;
while (archive_read_data_block(archive_, &buf, &block_size, &offset) != ARCHIVE_EOF)
*size += block_size;
}
*modification_time = archive_entry_mtime(current_archive_entry_);
*is_directory = archive_entry_filetype(current_archive_entry_) == AE_IFDIR;
}
return ret;
}
bool VolumeArchiveLibarchive::SeekHeader(int64_t index) {
// Reset to 0 for new VolumeArchive::ReadData operation.
last_read_data_offset_ = 0;
decompressed_data_size_ = 0;
if (archive_read_seek_header(archive_, index) != ARCHIVE_OK) {
set_error_message(ArchiveError(
volume_archive_constants::kArchiveNextHeaderErrorPrefix, archive_));
return false;
}
curr_index = index;
return true;
}
void VolumeArchiveLibarchive::DecompressData(int64_t offset, int64_t length) {
// TODO(cmihail): As an optimization consider using archive_read_data_block
// which avoids extra copying in case offset != last_read_data_offset_.
// The logic will be more complicated because archive_read_data_block offset
// will not be aligned with the offset of the read request from JavaScript.
// Requests with offset smaller than last read offset are not supported.
if (offset < last_read_data_offset_) {
set_error_message(
std::string(volume_archive_constants::kArchiveReadDataErrorPrefix) +
"Reading backwards is not supported.");
decompressed_error_ = true;
return;
}
// Request with offset greater than last read offset. Skip not needed bytes.
// Because files are compressed, seeking is not possible, so all of the bytes
// until the requested position must be unpacked.
ssize_t size = -1;
while (offset > last_read_data_offset_) {
// ReadData will call CustomArchiveRead when calling archive_read_data. Read
// should not request more bytes than possibly needed, so we request either
// offset - last_read_data_offset_, kMaximumDataChunkSize in case the former
// is too big or kMinimumDataChunkSize in case its too small and we might
// end up with too many IPCs.
reader_data_size_ =
std::max(std::min(offset - last_read_data_offset_,
volume_archive_constants::kMaximumDataChunkSize),
volume_archive_constants::kMinimumDataChunkSize);
// No need for an offset in dummy_buffer as it will be ignored anyway.
// archive_read_data receives size_t as length parameter, but we limit it to
// volume_archive_constants::kDummyBufferSize which is positive and less
// than size_t maximum. So conversion from int64_t to size_t is safe here.
size =
archive_read_data(archive_,
dummy_buffer_,
std::min(offset - last_read_data_offset_,
volume_archive_constants::kDummyBufferSize));
PP_DCHECK(size != 0); // The actual read is done below. We shouldn't get to
// end of file here.
if (size < 0) { // Error.
set_error_message(ArchiveError(
volume_archive_constants::kArchiveReadDataErrorPrefix, archive_));
decompressed_error_ = true;
return;
}
last_read_data_offset_ += size;
}
// Do not decompress more bytes than we can store internally. The
// kDecompressBufferSize limit is used to avoid huge memory usage.
int64_t left_length =
std::min(length, volume_archive_constants::kDecompressBufferSize);
// ReadData will call CustomArchiveRead when calling archive_read_data. The
// read should be done with a value similar to length, which is the requested
// number of bytes, or kMaximumDataChunkSize / kMinimumDataChunkSize
// in case length is too big or too small.
reader_data_size_ =
std::max(std::min(static_cast<int64_t>(left_length),
volume_archive_constants::kMaximumDataChunkSize),
volume_archive_constants::kMinimumDataChunkSize);
// Perform the actual copy.
int64_t bytes_read = 0;
do {
// archive_read_data receives size_t as length parameter, but we limit it to
// volume_archive_constants::kMinimumDataChunkSize (see left_length
// initialization), which is positive and less than size_t maximum.
// So conversion from int64_t to size_t is safe here.
size = archive_read_data(
archive_, decompressed_data_buffer_ + bytes_read, left_length);
if (size < 0) { // Error.
set_error_message(ArchiveError(
volume_archive_constants::kArchiveReadDataErrorPrefix, archive_));
decompressed_error_ = true;
return;
}
bytes_read += size;
left_length -= size;
} while (left_length > 0 && size != 0); // There is still data to read.
// VolumeArchiveLibarchive::DecompressData always stores the data from
// beginning of the buffer. VolumeArchiveLibarchive::ConsumeData is used
// to preserve the bytes that are decompressed but not required by
// VolumeArchiveLibarchive::ReadData.
decompressed_data_ = decompressed_data_buffer_;
decompressed_data_size_ = bytes_read;
}
bool VolumeArchiveLibarchive::Cleanup() {
bool returnValue = true;
if (archive_ && archive_read_free(archive_) != ARCHIVE_OK) {
set_error_message(ArchiveError(
volume_archive_constants::kArchiveReadFreeErrorPrefix, archive_));
returnValue = false; // Cleanup should release all resources even
// in case of failures.
}
archive_ = NULL;
CleanupReader();
return returnValue;
}
int64_t VolumeArchiveLibarchive::ReadData(int64_t offset,
int64_t length,
const char** buffer) {
PP_DCHECK(length > 0); // Length must be at least 1.
PP_DCHECK(current_archive_entry_); // Check that GetNextHeader was called at
// least once. In case it wasn't, this is
// a programmer error.
// End of archive.
if (archive_entry_size_is_set(current_archive_entry_) &&
archive_entry_size(current_archive_entry_) <= offset)
return 0;
// In case of first read or no more available data in the internal buffer or
// offset is different from the last_read_data_offset_, then force
// VolumeArchiveLibarchive::DecompressData as the decompressed data is
// invalid.
if (!decompressed_data_ || last_read_data_offset_ != offset ||
decompressed_data_size_ == 0)
DecompressData(offset, length);
// Decompressed failed.
if (decompressed_error_)
return kArchiveReadDataError;
last_read_data_length_ = length; // Used for decompress ahead.
// Assign the output *buffer parameter to the internal buffer.
*buffer = decompressed_data_;
// Advance internal buffer for next ReadData call.
int64_t read_bytes = std::min(decompressed_data_size_, length);
decompressed_data_ = decompressed_data_ + read_bytes;
decompressed_data_size_ -= read_bytes;
last_read_data_offset_ += read_bytes;
PP_DCHECK(decompressed_data_ + decompressed_data_size_ <=
decompressed_data_buffer_ +
volume_archive_constants::kDecompressBufferSize);
return read_bytes;
}
void VolumeArchiveLibarchive::MaybeDecompressAhead() {
if (decompressed_data_size_ == 0)
DecompressData(last_read_data_offset_, last_read_data_length_);
}