Path: blob/trunk/third_party/closure/goog/net/filedownloader.js
2868 views
// Copyright 2011 The Closure Library Authors. All Rights Reserved.1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS-IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.1314/**15* @fileoverview A class for downloading remote files and storing them16* locally using the HTML5 FileSystem API.17*18* The directory structure is of the form /HASH/URL/BASENAME:19*20* The HASH portion is a three-character slice of the hash of the URL. Since the21* filesystem has a limit of about 5000 files per directory, this should divide22* the downloads roughly evenly among about 5000 directories, thus allowing for23* at most 5000^2 downloads.24*25* The URL portion is the (sanitized) full URL used for downloading the file.26* This is used to ensure that each file ends up in a different location, even27* if the HASH and BASENAME are the same.28*29* The BASENAME portion is the basename of the URL. It's used for the filename30* proper so that the local filesystem: URL will be downloaded to a file with a31* recognizable name.32*33*/3435goog.provide('goog.net.FileDownloader');36goog.provide('goog.net.FileDownloader.Error');3738goog.require('goog.Disposable');39goog.require('goog.asserts');40goog.require('goog.async.Deferred');41goog.require('goog.crypt.hash32');42goog.require('goog.debug.Error');43goog.require('goog.events');44goog.require('goog.events.EventHandler');45goog.require('goog.fs');46goog.require('goog.fs.DirectoryEntry');47goog.require('goog.fs.Error');48goog.require('goog.fs.FileSaver');49goog.require('goog.net.EventType');50goog.require('goog.net.XhrIo');51goog.require('goog.net.XhrIoPool');52goog.require('goog.object');53545556/**57* A class for downloading remote files and storing them locally using the58* HTML5 filesystem API.59*60* @param {!goog.fs.DirectoryEntry} dir The directory in which the downloaded61* files are stored. This directory should be solely managed by62* FileDownloader.63* @param {goog.net.XhrIoPool=} opt_pool The pool of XhrIo objects to use for64* downloading files.65* @constructor66* @extends {goog.Disposable}67* @final68*/69goog.net.FileDownloader = function(dir, opt_pool) {70goog.net.FileDownloader.base(this, 'constructor');7172/**73* The directory in which the downloaded files are stored.74* @type {!goog.fs.DirectoryEntry}75* @private76*/77this.dir_ = dir;7879/**80* The pool of XHRs to use for capturing.81* @type {!goog.net.XhrIoPool}82* @private83*/84this.pool_ = opt_pool || new goog.net.XhrIoPool();8586/**87* A map from URLs to active downloads running for those URLs.88* @type {!Object<!goog.net.FileDownloader.Download_>}89* @private90*/91this.downloads_ = {};9293/**94* The handler for URL capturing events.95* @type {!goog.events.EventHandler<!goog.net.FileDownloader>}96* @private97*/98this.eventHandler_ = new goog.events.EventHandler(this);99};100goog.inherits(goog.net.FileDownloader, goog.Disposable);101102103/**104* Download a remote file and save its contents to the filesystem. A given file105* is uniquely identified by its URL string; this means that the relative and106* absolute URLs for a single file are considered different for the purposes of107* the FileDownloader.108*109* Returns a Deferred that will contain the downloaded blob. If there's an error110* while downloading the URL, this Deferred will be passed the111* {@link goog.net.FileDownloader.Error} object as an errback.112*113* If a download is already in progress for the given URL, this will return the114* deferred blob for that download. If the URL has already been downloaded, this115* will fail once it tries to save the downloaded blob.116*117* When a download is in progress, all Deferreds returned for that download will118* be branches of a single parent. If all such branches are cancelled, or if one119* is cancelled with opt_deepCancel set, then the download will be cancelled as120* well.121*122* @param {string} url The URL of the file to download.123* @return {!goog.async.Deferred} The deferred result blob.124*/125goog.net.FileDownloader.prototype.download = function(url) {126if (this.isDownloading(url)) {127return this.downloads_[url].deferred.branch(true /* opt_propagateCancel */);128}129130var download = new goog.net.FileDownloader.Download_(url, this);131this.downloads_[url] = download;132this.pool_.getObject(goog.bind(this.gotXhr_, this, download));133return download.deferred.branch(true /* opt_propagateCancel */);134};135136137/**138* Return a Deferred that will fire once no download is active for a given URL.139* If there's no download active for that URL when this is called, the deferred140* will fire immediately; otherwise, it will fire once the download is complete,141* whether or not it succeeds.142*143* @param {string} url The URL of the download to wait for.144* @return {!goog.async.Deferred} The Deferred that will fire when the download145* is complete.146*/147goog.net.FileDownloader.prototype.waitForDownload = function(url) {148var deferred = new goog.async.Deferred();149if (this.isDownloading(url)) {150this.downloads_[url].deferred.addBoth(function() {151deferred.callback(null);152}, this);153} else {154deferred.callback(null);155}156return deferred;157};158159160/**161* Returns whether or not there is an active download for a given URL.162*163* @param {string} url The URL of the download to check.164* @return {boolean} Whether or not there is an active download for the URL.165*/166goog.net.FileDownloader.prototype.isDownloading = function(url) {167return url in this.downloads_;168};169170171/**172* Load a downloaded blob from the filesystem. Will fire a deferred error if the173* given URL has not yet been downloaded.174*175* @param {string} url The URL of the blob to load.176* @return {!goog.async.Deferred} The deferred Blob object. The callback will be177* passed the blob. If a file API error occurs while loading the blob, that178* error will be passed to the errback.179*/180goog.net.FileDownloader.prototype.getDownloadedBlob = function(url) {181return this.getFile_(url).addCallback(function(fileEntry) {182return fileEntry.file();183});184};185186187/**188* Get the local filesystem: URL for a downloaded file. This is different from189* the blob: URL that's available from getDownloadedBlob(). If the end user190* accesses the filesystem: URL, the resulting file's name will be determined by191* the download filename as opposed to an arbitrary GUID. In addition, the192* filesystem: URL is connected to a filesystem location, so if the download is193* removed then that URL will become invalid.194*195* Warning: in Chrome 12, some filesystem: URLs are opened inline. This means196* that e.g. HTML pages given to the user via filesystem: URLs will be opened197* and processed by the browser.198*199* @param {string} url The URL of the file to get the URL of.200* @return {!goog.async.Deferred} The deferred filesystem: URL. The callback201* will be passed the URL. If a file API error occurs while loading the202* blob, that error will be passed to the errback.203*/204goog.net.FileDownloader.prototype.getLocalUrl = function(url) {205return this.getFile_(url).addCallback(function(fileEntry) {206return fileEntry.toUrl();207});208};209210211/**212* Return (deferred) whether or not a URL has been downloaded. Will fire a213* deferred error if something goes wrong when determining this.214*215* @param {string} url The URL to check.216* @return {!goog.async.Deferred} The deferred boolean. The callback will be217* passed the boolean. If a file API error occurs while checking the218* existence of the downloaded URL, that error will be passed to the219* errback.220*/221goog.net.FileDownloader.prototype.isDownloaded = function(url) {222var deferred = new goog.async.Deferred();223var blobDeferred = this.getDownloadedBlob(url);224blobDeferred.addCallback(function() { deferred.callback(true); });225blobDeferred.addErrback(function(err) {226if (err.name == goog.fs.Error.ErrorName.NOT_FOUND) {227deferred.callback(false);228} else {229deferred.errback(err);230}231});232return deferred;233};234235236/**237* Remove a URL from the FileDownloader.238*239* This returns a Deferred. If the removal is completed successfully, its240* callback will be called without any value. If the removal fails, its errback241* will be called with the {@link goog.fs.Error}.242*243* @param {string} url The URL to remove.244* @return {!goog.async.Deferred} The deferred used for registering callbacks on245* success or on error.246*/247goog.net.FileDownloader.prototype.remove = function(url) {248return this.getDir_(url, goog.fs.DirectoryEntry.Behavior.DEFAULT)249.addCallback(function(dir) { return dir.removeRecursively(); });250};251252253/**254* Save a blob for a given URL. This works just as through the blob were255* downloaded form that URL, except you specify the blob and no HTTP request is256* made.257*258* If the URL is currently being downloaded, it's indeterminate whether the blob259* being set or the blob being downloaded will end up in the filesystem.260* Whichever one doesn't get saved will have an error. To ensure that one or the261* other takes precedence, use {@link #waitForDownload} to allow the download to262* complete before setting the blob.263*264* @param {string} url The URL at which to set the blob.265* @param {!Blob} blob The blob to set.266* @param {string=} opt_name The name of the file. If this isn't given, it's267* determined from the URL.268* @return {!goog.async.Deferred} The deferred used for registering callbacks on269* success or on error. This can be cancelled just like a {@link #download}270* Deferred. The objects passed to the errback will be271* {@link goog.net.FileDownloader.Error}s.272*/273goog.net.FileDownloader.prototype.setBlob = function(url, blob, opt_name) {274var name = this.sanitize_(opt_name || this.urlToName_(url));275var download = new goog.net.FileDownloader.Download_(url, this);276this.downloads_[url] = download;277download.blob = blob;278this.getDir_(download.url, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE)279.addCallback(function(dir) {280return dir.getFile(281name, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE);282})283.addCallback(goog.bind(this.fileSuccess_, this, download))284.addErrback(goog.bind(this.error_, this, download));285return download.deferred.branch(true /* opt_propagateCancel */);286};287288289/**290* The callback called when an XHR becomes available from the XHR pool.291*292* @param {!goog.net.FileDownloader.Download_} download The download object for293* this download.294* @param {!goog.net.XhrIo} xhr The XhrIo object for downloading the page.295* @private296*/297goog.net.FileDownloader.prototype.gotXhr_ = function(download, xhr) {298if (download.cancelled) {299this.freeXhr_(xhr);300return;301}302303this.eventHandler_.listen(304xhr, goog.net.EventType.SUCCESS,305goog.bind(this.xhrSuccess_, this, download));306this.eventHandler_.listen(307xhr, [goog.net.EventType.ERROR, goog.net.EventType.ABORT],308goog.bind(this.error_, this, download));309this.eventHandler_.listen(310xhr, goog.net.EventType.READY, goog.bind(this.freeXhr_, this, xhr));311312download.xhr = xhr;313xhr.setResponseType(goog.net.XhrIo.ResponseType.ARRAY_BUFFER);314xhr.send(download.url);315};316317318/**319* The callback called when an XHR succeeds in downloading a remote file.320*321* @param {!goog.net.FileDownloader.Download_} download The download object for322* this download.323* @private324*/325goog.net.FileDownloader.prototype.xhrSuccess_ = function(download) {326if (download.cancelled) {327return;328}329330var name = this.sanitize_(331this.getName_(332/** @type {!goog.net.XhrIo} */ (download.xhr)));333var resp = /** @type {ArrayBuffer} */ (download.xhr.getResponse());334if (!resp) {335// This should never happen - it indicates the XHR hasn't completed, has336// failed or has been cleaned up. If it does happen (eg. due to a bug337// somewhere) we don't want to pass null to getBlob - it's not valid and338// triggers a bug in some versions of WebKit causing it to crash.339this.error_(download);340return;341}342343download.blob = goog.fs.getBlob(resp);344delete download.xhr;345346this.getDir_(download.url, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE)347.addCallback(function(dir) {348return dir.getFile(349name, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE);350})351.addCallback(goog.bind(this.fileSuccess_, this, download))352.addErrback(goog.bind(this.error_, this, download));353};354355356/**357* The callback called when a file that will be used for saving a file is358* successfully opened.359*360* @param {!goog.net.FileDownloader.Download_} download The download object for361* this download.362* @param {!goog.fs.FileEntry} file The newly-opened file object.363* @private364*/365goog.net.FileDownloader.prototype.fileSuccess_ = function(download, file) {366if (download.cancelled) {367file.remove();368return;369}370371download.file = file;372file.createWriter()373.addCallback(goog.bind(this.fileWriterSuccess_, this, download))374.addErrback(goog.bind(this.error_, this, download));375};376377378/**379* The callback called when a file writer is successfully created for writing a380* file to the filesystem.381*382* @param {!goog.net.FileDownloader.Download_} download The download object for383* this download.384* @param {!goog.fs.FileWriter} writer The newly-created file writer object.385* @private386*/387goog.net.FileDownloader.prototype.fileWriterSuccess_ = function(388download, writer) {389if (download.cancelled) {390download.file.remove();391return;392}393394download.writer = writer;395writer.write(/** @type {!Blob} */ (download.blob));396this.eventHandler_.listenOnce(397writer, goog.fs.FileSaver.EventType.WRITE_END,398goog.bind(this.writeEnd_, this, download));399};400401402/**403* The callback called when file writing ends, whether or not it's successful.404*405* @param {!goog.net.FileDownloader.Download_} download The download object for406* this download.407* @private408*/409goog.net.FileDownloader.prototype.writeEnd_ = function(download) {410if (download.cancelled || download.writer.getError()) {411this.error_(download, download.writer.getError());412return;413}414415delete this.downloads_[download.url];416download.deferred.callback(download.blob);417};418419420/**421* The error callback for all asynchronous operations. Ensures that all stages422* of a given download are cleaned up, and emits the error event.423*424* @param {!goog.net.FileDownloader.Download_} download The download object for425* this download.426* @param {goog.fs.Error=} opt_err The file error object. Only defined if the427* error was raised by the file API.428* @private429*/430goog.net.FileDownloader.prototype.error_ = function(download, opt_err) {431if (download.file) {432download.file.remove();433}434435if (download.cancelled) {436return;437}438439delete this.downloads_[download.url];440download.deferred.errback(441new goog.net.FileDownloader.Error(download, opt_err));442};443444445/**446* Abort the download of the given URL.447*448* @param {!goog.net.FileDownloader.Download_} download The download to abort.449* @private450*/451goog.net.FileDownloader.prototype.cancel_ = function(download) {452goog.dispose(download);453delete this.downloads_[download.url];454};455456457/**458* Get the directory for a given URL. If the directory already exists when this459* is called, it will contain exactly one file: the downloaded file.460*461* This not only calls the FileSystem API's getFile method, but attempts to462* distribute the files so that they don't overload the filesystem. The spec463* says directories can't contain more than 5000 files464* (http://www.w3.org/TR/file-system-api/#directories), so this ensures that465* each file is put into a subdirectory based on its SHA1 hash.466*467* All parameters are the same as in the FileSystem API's Entry#getFile method.468*469* @param {string} url The URL corresponding to the directory to get.470* @param {goog.fs.DirectoryEntry.Behavior} behavior The behavior to pass to the471* underlying method.472* @return {!goog.async.Deferred} The deferred DirectoryEntry object.473* @private474*/475goog.net.FileDownloader.prototype.getDir_ = function(url, behavior) {476// 3 hex digits provide 16**3 = 4096 different possible dirnames, which is477// less than the maximum of 5000 entries. Downloaded files should be478// distributed roughly evenly throughout the directories due to the hash479// function, allowing many more than 5000 files to be downloaded.480//481// The leading ` ensures that no illegal dirnames are accidentally used. % was482// previously used, but Chrome has a bug (as of 12.0.725.0 dev) where483// filenames are URL-decoded before checking their validity, so filenames484// containing e.g. '%3f' (the URL-encoding of :, an invalid character) are485// rejected.486var dirname = '`' +487Math.abs(goog.crypt.hash32.encodeString(url))488.toString(16)489.substring(0, 3);490491return this.dir_.getDirectory(dirname, goog.fs.DirectoryEntry.Behavior.CREATE)492.addCallback(function(dir) {493return dir.getDirectory(this.sanitize_(url), behavior);494}, this);495};496497498/**499* Get the file for a given URL. This will only retrieve files that have already500* been saved; it shouldn't be used for creating the file in the first place.501* This is because the filename isn't necessarily determined by the URL, but by502* the headers of the XHR response.503*504* @param {string} url The URL corresponding to the file to get.505* @return {!goog.async.Deferred} The deferred FileEntry object.506* @private507*/508goog.net.FileDownloader.prototype.getFile_ = function(url) {509return this.getDir_(url, goog.fs.DirectoryEntry.Behavior.DEFAULT)510.addCallback(function(dir) {511return dir.listDirectory().addCallback(function(files) {512goog.asserts.assert(files.length == 1);513// If the filesystem somehow gets corrupted and we end up with an514// empty directory here, it makes sense to just return the normal515// file-not-found error.516return files[0] || dir.getFile('file');517});518});519};520521522/**523* Sanitize a string so it can be safely used as a file or directory name for524* the FileSystem API.525*526* @param {string} str The string to sanitize.527* @return {string} The sanitized string.528* @private529*/530goog.net.FileDownloader.prototype.sanitize_ = function(str) {531// Add a prefix, since certain prefixes are disallowed for paths. None of the532// disallowed prefixes start with '`'. We use ` rather than % for escaping the533// filename due to a Chrome bug (as of 12.0.725.0 dev) where filenames are534// URL-decoded before checking their validity, so filenames containing e.g.535// '%3f' (the URL-encoding of :, an invalid character) are rejected.536return '`' +537str.replace(/[\/\\<>:?*"|%`]/g, encodeURIComponent).replace(/%/g, '`');538};539540541/**542* Gets the filename specified by the XHR. This first attempts to parse the543* Content-Disposition header for a filename and, failing that, falls back on544* deriving the filename from the URL.545*546* @param {!goog.net.XhrIo} xhr The XHR containing the response headers.547* @return {string} The filename.548* @private549*/550goog.net.FileDownloader.prototype.getName_ = function(xhr) {551var disposition = xhr.getResponseHeader('Content-Disposition');552var match =553disposition && disposition.match(/^attachment *; *filename="(.*)"$/i);554if (match) {555// The Content-Disposition header allows for arbitrary backslash-escaped556// characters (usually " and \). We want to unescape them before using them557// in the filename.558return match[1].replace(/\\(.)/g, '$1');559}560561return this.urlToName_(xhr.getLastUri());562};563564565/**566* Extracts the basename from a URL.567*568* @param {string} url The URL.569* @return {string} The basename.570* @private571*/572goog.net.FileDownloader.prototype.urlToName_ = function(url) {573var segments = url.split('/');574return segments[segments.length - 1];575};576577578/**579* Remove all event listeners for an XHR and release it back into the pool.580*581* @param {!goog.net.XhrIo} xhr The XHR to free.582* @private583*/584goog.net.FileDownloader.prototype.freeXhr_ = function(xhr) {585goog.events.removeAll(xhr);586this.pool_.addFreeObject(xhr);587};588589590/** @override */591goog.net.FileDownloader.prototype.disposeInternal = function() {592delete this.dir_;593goog.dispose(this.eventHandler_);594delete this.eventHandler_;595goog.object.forEach(this.downloads_, function(download) {596download.deferred.cancel();597}, this);598delete this.downloads_;599goog.dispose(this.pool_);600delete this.pool_;601602goog.net.FileDownloader.base(this, 'disposeInternal');603};604605606607/**608* The error object for FileDownloader download errors.609*610* @param {!goog.net.FileDownloader.Download_} download The download object for611* the download in question.612* @param {goog.fs.Error=} opt_fsErr The file error object, if this was a file613* error.614*615* @constructor616* @extends {goog.debug.Error}617* @final618*/619goog.net.FileDownloader.Error = function(download, opt_fsErr) {620goog.net.FileDownloader.Error.base(621this, 'constructor', 'Error capturing URL ' + download.url);622623/**624* The URL the event relates to.625* @type {string}626*/627this.url = download.url;628629if (download.xhr) {630this.xhrStatus = download.xhr.getStatus();631this.xhrErrorCode = download.xhr.getLastErrorCode();632this.message += ': XHR failed with status ' + this.xhrStatus +633' (error code ' + this.xhrErrorCode + ')';634} else if (opt_fsErr) {635this.fileError = opt_fsErr;636this.message += ': file API failed (' + opt_fsErr.message + ')';637}638};639goog.inherits(goog.net.FileDownloader.Error, goog.debug.Error);640641642/**643* The status of the XHR. Only set if the error was caused by an XHR failure.644* @type {number|undefined}645*/646goog.net.FileDownloader.Error.prototype.xhrStatus;647648649/**650* The error code of the XHR. Only set if the error was caused by an XHR651* failure.652* @type {goog.net.ErrorCode|undefined}653*/654goog.net.FileDownloader.Error.prototype.xhrErrorCode;655656657/**658* The file API error. Only set if the error was caused by the file API.659* @type {goog.fs.Error|undefined}660*/661goog.net.FileDownloader.Error.prototype.fileError;662663664665/**666* A struct containing the data for a single download.667*668* @param {string} url The URL for the file being downloaded.669* @param {!goog.net.FileDownloader} downloader The parent FileDownloader.670* @extends {goog.Disposable}671* @constructor672* @private673*/674goog.net.FileDownloader.Download_ = function(url, downloader) {675goog.net.FileDownloader.Download_.base(this, 'constructor');676677/**678* The URL for the file being downloaded.679* @type {string}680*/681this.url = url;682683/**684* The Deferred that will be fired when the download is complete.685* @type {!goog.async.Deferred}686*/687this.deferred =688new goog.async.Deferred(goog.bind(downloader.cancel_, downloader, this));689690/**691* Whether this download has been cancelled by the user.692* @type {boolean}693*/694this.cancelled = false;695696/**697* The XhrIo object for downloading the file. Only set once it's been698* retrieved from the pool.699* @type {goog.net.XhrIo}700*/701this.xhr = null;702703/**704* The name of the blob being downloaded. Only sey once the XHR has completed,705* if it completed successfully.706* @type {?string}707*/708this.name = null;709710/**711* The downloaded blob. Only set once the XHR has completed, if it completed712* successfully.713* @type {Blob}714*/715this.blob = null;716717/**718* The file entry where the blob is to be stored. Only set once it's been719* loaded from the filesystem.720* @type {goog.fs.FileEntry}721*/722this.file = null;723724/**725* The file writer for writing the blob to the filesystem. Only set once it's726* been loaded from the filesystem.727* @type {goog.fs.FileWriter}728*/729this.writer = null;730};731goog.inherits(goog.net.FileDownloader.Download_, goog.Disposable);732733734/** @override */735goog.net.FileDownloader.Download_.prototype.disposeInternal = function() {736this.cancelled = true;737if (this.xhr) {738this.xhr.abort();739} else if (740this.writer &&741this.writer.getReadyState() == goog.fs.FileSaver.ReadyState.WRITING) {742this.writer.abort();743}744745goog.net.FileDownloader.Download_.base(this, 'disposeInternal');746};747748749