/**
 * unzip.js
 *
 * Copyright(c) 2011 Google Inc.
 * Copyright(c) 2011 antimatter15
 *
 * Reference Documentation:
 *
 * ZIP format: http://www.pkware.com/documents/casestudies/APPNOTE.TXT
 * DEFLATE format: http://tools.ietf.org/html/rfc1951
 */

// This file expects to be invoked as a Worker (see onmessage below).
importScripts('io.js');
importScripts('archive.js');

// Progress variables.
var currentFilename = "";
var currentFileNumber = 0;
var currentBytesUnarchivedInFile = 0;
var currentBytesUnarchived = 0;
var totalUncompressedBytesInArchive = 0;
var totalFilesInArchive = 0;

// Helper functions.
var info = function(str) {
  postMessage(new bitjs.archive.UnarchiveInfoEvent(str));
};
var err = function(str) {
  postMessage(new bitjs.archive.UnarchiveErrorEvent(str));
};
var postProgress = function() {
  postMessage(new bitjs.archive.UnarchiveProgressEvent(
      currentFilename,
      currentFileNumber,
      currentBytesUnarchivedInFile,
      currentBytesUnarchived,
      totalUncompressedBytesInArchive,
      totalFilesInArchive));
};

var zLocalFileHeaderSignature = 0x04034b50;
var zArchiveExtraDataSignature = 0x08064b50;
var zCentralFileHeaderSignature = 0x02014b50;
var zDigitalSignatureSignature = 0x05054b50;
var zEndOfCentralDirSignature = 0x06064b50;
var zEndOfCentralDirLocatorSignature = 0x07064b50;

// takes a ByteStream and parses out the local file information
var ZipLocalFile = function(bstream) {
	if (typeof bstream != typeof {} || !bstream.readNumber || typeof bstream.readNumber != typeof function(){}) {
		return null;
	}
	
	bstream.readNumber(4); // swallow signature
	this.version = bstream.readNumber(2);
	this.generalPurpose = bstream.readNumber(2);
	this.compressionMethod = bstream.readNumber(2);
	this.lastModFileTime = bstream.readNumber(2);
	this.lastModFileDate = bstream.readNumber(2);
	this.crc32 = bstream.readNumber(4);
	this.compressedSize = bstream.readNumber(4);
	this.uncompressedSize = bstream.readNumber(4);
	this.fileNameLength = bstream.readNumber(2);
	this.extraFieldLength = bstream.readNumber(2);
	
	this.filename = null;
	if (this.fileNameLength > 0) {
		this.filename = bstream.readString(this.fileNameLength);
	}
	
	info("Zip Local File Header:");
	info(" version=" + this.version);
	info(" general purpose=" + this.generalPurpose);
	info(" compression method=" + this.compressionMethod);
	info(" last mod file time=" + this.lastModFileTime);
	info(" last mod file date=" + this.lastModFileDate);
	info(" crc32=" + this.crc32);
	info(" compressed size=" + this.compressedSize);
	info(" uncompressed size=" + this.uncompressedSize);
	info(" file name length=" + this.fileNameLength);
	info(" extra field length=" + this.extraFieldLength);
	info(" filename = '" + this.filename + "'");
	
	this.extraField = null;
	if (this.extraFieldLength > 0) {
		this.extraField = bstream.readString(this.extraFieldLength);
		 info(" extra field=" + this.extraField);
	}
	
	// read in the compressed data
	this.fileData = null;
	if (this.compressedSize > 0) {
		this.fileData = new Uint8Array(bstream.bytes.buffer, bstream.ptr, this.compressedSize);
		bstream.ptr += this.compressedSize;
	}
	
	// TODO: deal with data descriptor if present (we currently assume no data descriptor!)
	// "This descriptor exists only if bit 3 of the general purpose bit flag is set"
	// But how do you figure out how big the file data is if you don't know the compressedSize
	// from the header?!?
	if ((this.generalPurpose & bitjs.BIT[3]) != 0) {
		this.crc32 = bstream.readNumber(4);
		this.compressedSize = bstream.readNumber(4);
		this.uncompressedSize = bstream.readNumber(4);
	}
};

// determine what kind of compressed data we have and decompress
ZipLocalFile.prototype.unzip = function() {

  // Zip Version 1.0, no compression (store only)
  if (this.compressionMethod == 0 ) {
    info("ZIP v"+this.version+", store only: " + this.filename + " (" + this.compressedSize + " bytes)");
    currentBytesUnarchivedInFile = this.compressedSize;
    currentBytesUnarchived += this.compressedSize;
  }
  // version == 20, compression method == 8 (DEFLATE)
  else if (this.compressionMethod == 8) {
    info("ZIP v2.0, DEFLATE: " + this.filename + " (" + this.compressedSize + " bytes)");
    this.fileData = inflate(this.fileData, this.uncompressedSize);
  }
  else {
    err("UNSUPPORTED VERSION/FORMAT: ZIP v" + this.version + ", compression method=" + this.compressionMethod + ": " + this.filename + " (" + this.compressedSize + " bytes)");
    this.fileData = null;
  }
};


// Takes an ArrayBuffer of a zip file in
// returns null on error
// returns an array of DecompressedFile objects on success
var unzip = function(arrayBuffer) {
  postMessage(new bitjs.archive.UnarchiveStartEvent());

  currentFilename = "";
  currentFileNumber = 0;
  currentBytesUnarchivedInFile = 0;
  currentBytesUnarchived = 0;
  totalUncompressedBytesInArchive = 0;
  totalFilesInArchive = 0;
  currentBytesUnarchived = 0;

  var bstream = new bitjs.io.ByteStream(arrayBuffer);
  // detect local file header signature or return null
  if (bstream.peekNumber(4) == zLocalFileHeaderSignature) {
		var localFiles = [];
		// loop until we don't see any more local files
		while (bstream.peekNumber(4) == zLocalFileHeaderSignature) {
			var oneLocalFile = new ZipLocalFile(bstream);
			// this should strip out directories/folders
			if (oneLocalFile && oneLocalFile.uncompressedSize > 0 && oneLocalFile.fileData) {
				localFiles.push(oneLocalFile);
				totalUncompressedBytesInArchive += oneLocalFile.uncompressedSize;
			}
		}
		totalFilesInArchive = localFiles.length;
		
		// got all local files, now sort them
		localFiles.sort(function(a,b) {
            var aname = a.filename;
            var bname = b.filename;
            return aname > bname ? 1 : -1;

			// extract the number at the end of both filenames
			/*
			var aname = a.filename;
			var bname = b.filename;
			var aindex = aname.length, bindex = bname.length;

			// Find the last number character from the back of the filename.
			while (aname[aindex-1] < '0' || aname[aindex-1] > '9') --aindex;
			while (bname[bindex-1] < '0' || bname[bindex-1] > '9') --bindex;

			// Find the first number character from the back of the filename
			while (aname[aindex-1] >= '0' && aname[aindex-1] <= '9') --aindex;
			while (bname[bindex-1] >= '0' && bname[bindex-1] <= '9') --bindex;
			
			// parse them into numbers and return comparison
			var anum = parseInt(aname.substr(aindex), 10),
				bnum = parseInt(bname.substr(bindex), 10);
			return anum - bnum;
			*/
		});

		// archive extra data record
		if (bstream.peekNumber(4) == zArchiveExtraDataSignature) {
			info(" Found an Archive Extra Data Signature");

			// skipping this record for now
			bstream.readNumber(4);
			var archiveExtraFieldLength = bstream.readNumber(4);
			bstream.readString(archiveExtraFieldLength);
		}
		
		// central directory structure
		// TODO: handle the rest of the structures (Zip64 stuff)
		if (bstream.peekNumber(4) == zCentralFileHeaderSignature) {
			info(" Found a Central File Header");

			// read all file headers
			while (bstream.peekNumber(4) == zCentralFileHeaderSignature) {
				bstream.readNumber(4); // signature
				bstream.readNumber(2); // version made by
				bstream.readNumber(2); // version needed to extract
				bstream.readNumber(2); // general purpose bit flag
				bstream.readNumber(2); // compression method
				bstream.readNumber(2); // last mod file time
				bstream.readNumber(2); // last mod file date
				bstream.readNumber(4); // crc32
				bstream.readNumber(4); // compressed size
				bstream.readNumber(4); // uncompressed size
				var fileNameLength = bstream.readNumber(2); // file name length
				var extraFieldLength = bstream.readNumber(2); // extra field length
				var fileCommentLength = bstream.readNumber(2); // file comment length
				bstream.readNumber(2); // disk number start
				bstream.readNumber(2); // internal file attributes
				bstream.readNumber(4); // external file attributes
				bstream.readNumber(4); // relative offset of local header
				
				bstream.readString(fileNameLength); // file name
				bstream.readString(extraFieldLength); // extra field
				bstream.readString(fileCommentLength); // file comment				
			}
		}
		
		// digital signature
		if (bstream.peekNumber(4) == zDigitalSignatureSignature) {
			info(" Found a Digital Signature");

			bstream.readNumber(4);
			var sizeOfSignature = bstream.readNumber(2);
			bstream.readString(sizeOfSignature); // digital signature data
		}
		
		// report # files and total length
		if (localFiles.length > 0) {
			postProgress();
		}
		
		// now do the unzipping of each file
		for (var i = 0; i < localFiles.length; ++i) {
			var localfile = localFiles[i];
			
			// update progress 
			currentFilename = localfile.filename;
			currentFileNumber = i;
			currentBytesUnarchivedInFile = 0;
			
			// actually do the unzipping
			localfile.unzip();
			
			if (localfile.fileData != null) {
				postMessage(new bitjs.archive.UnarchiveExtractEvent(localfile));
				postProgress();
			}
		}
		postProgress();
		postMessage(new bitjs.archive.UnarchiveFinishEvent());
  }
}

// returns a table of Huffman codes 
// each entry's index is its code and its value is a JavaScript object 
// containing {length: 6, symbol: X}
function getHuffmanCodes(bitLengths) {
	// ensure bitLengths is an array containing at least one element
	if (typeof bitLengths != typeof [] || bitLengths.length < 1) {
		err("Error! getHuffmanCodes() called with an invalid array");
		return null;
	}
	
	// Reference: http://tools.ietf.org/html/rfc1951#page-8
	var numLengths = bitLengths.length,
		bl_count = [],
		MAX_BITS = 1;
	
	// Step 1: count up how many codes of each length we have
	for (var i = 0; i < numLengths; ++i) {
		var length = bitLengths[i];
		// test to ensure each bit length is a positive, non-zero number
		if (typeof length != typeof 1 || length < 0) {
			err("bitLengths contained an invalid number in getHuffmanCodes(): " + length + " of type " + (typeof length));
			return null;
		}
		// increment the appropriate bitlength count
		if (bl_count[length] == undefined) bl_count[length] = 0;
		// a length of zero means this symbol is not participating in the huffman coding
		if (length > 0) bl_count[length]++;
		
		if (length > MAX_BITS) MAX_BITS = length;
	}
	
	// Step 2: Find the numerical value of the smallest code for each code length
	var next_code = [],
		code = 0;
	for (var bits = 1; bits <= MAX_BITS; ++bits) {
		var length = bits-1;
		// ensure undefined lengths are zero
		if (bl_count[length] == undefined) bl_count[length] = 0;
		code = (code + bl_count[bits-1]) << 1;
		next_code[bits] = code;
	}
	
	// Step 3: Assign numerical values to all codes
	var table = {}, tableLength = 0;
	for (var n = 0; n < numLengths; ++n) {
		var len = bitLengths[n];
		if (len != 0) {
			table[next_code[len]] = { length: len, symbol: n }; //, bitstring: binaryValueToString(next_code[len],len) };
			tableLength++;
			next_code[len]++;
		}
	}
	table.maxLength = tableLength;
	
	return table;
}

/*
	 The Huffman codes for the two alphabets are fixed, and are not
	 represented explicitly in the data.  The Huffman code lengths
	 for the literal/length alphabet are:
	
			   Lit Value    Bits        Codes
			   ---------    ----        -----
				 0 - 143     8          00110000 through
										10111111
			   144 - 255     9          110010000 through
										111111111
			   256 - 279     7          0000000 through
										0010111
			   280 - 287     8          11000000 through
										11000111
*/
// fixed Huffman codes go from 7-9 bits, so we need an array whose index can hold up to 9 bits
var fixedHCtoLiteral = null;
var fixedHCtoDistance = null;
function getFixedLiteralTable() {
	// create once
	if (!fixedHCtoLiteral) {
		var bitlengths = new Array(288);
		for (var i = 0; i <= 143; ++i) bitlengths[i] = 8;
		for (i = 144; i <= 255; ++i) bitlengths[i] = 9;
		for (i = 256; i <= 279; ++i) bitlengths[i] = 7;
		for (i = 280; i <= 287; ++i) bitlengths[i] = 8;
		
		// get huffman code table
		fixedHCtoLiteral = getHuffmanCodes(bitlengths);
	}
	return fixedHCtoLiteral;
}
function getFixedDistanceTable() {
	// create once
	if (!fixedHCtoDistance) {
		var bitlengths = new Array(32);
		for (var i = 0; i < 32; ++i) { bitlengths[i] = 5; }
		
		// get huffman code table
		fixedHCtoDistance = getHuffmanCodes(bitlengths);
	}
	return fixedHCtoDistance;
}

// extract one bit at a time until we find a matching Huffman Code
// then return that symbol
function decodeSymbol(bstream, hcTable) {
	var code = 0, len = 0;
	var match = false;
	
	// loop until we match
	for (;;) {
		// read in next bit
		var bit = bstream.readBits(1);
		code = (code<<1) | bit;
		++len;
		
		// check against Huffman Code table and break if found
		if (hcTable.hasOwnProperty(code) && hcTable[code].length == len) {
		
			break;
		}
		if (len > hcTable.maxLength) {
			err("Bit stream out of sync, didn't find a Huffman Code, length was " + len + 
			    " and table only max code length of " + hcTable.maxLength);
			break;
		}
	}
	return hcTable[code].symbol;
}


var CodeLengthCodeOrder = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15];
	/*
		 Extra               Extra               Extra
	Code Bits Length(s) Code Bits Lengths   Code Bits Length(s)
	---- ---- ------     ---- ---- -------   ---- ---- -------
	 257   0     3       267   1   15,16     277   4   67-82
	 258   0     4       268   1   17,18     278   4   83-98
	 259   0     5       269   2   19-22     279   4   99-114
	 260   0     6       270   2   23-26     280   4  115-130
	 261   0     7       271   2   27-30     281   5  131-162
	 262   0     8       272   2   31-34     282   5  163-194
	 263   0     9       273   3   35-42     283   5  195-226
	 264   0    10       274   3   43-50     284   5  227-257
	 265   1  11,12      275   3   51-58     285   0    258
	 266   1  13,14      276   3   59-66
	 
	*/
var LengthLookupTable = [
		[0,3], [0,4], [0,5], [0,6],
		[0,7], [0,8], [0,9], [0,10],
		[1,11], [1,13], [1,15], [1,17],
		[2,19], [2,23], [2,27], [2,31],
		[3,35], [3,43], [3,51], [3,59],
		[4,67], [4,83], [4,99], [4,115],
		[5,131], [5,163], [5,195], [5,227],
		[0,258]
];
	/*
		  Extra           Extra                Extra
	 Code Bits Dist  Code Bits   Dist     Code Bits Distance
	 ---- ---- ----  ---- ----  ------    ---- ---- --------
	   0   0    1     10   4     33-48    20    9   1025-1536
	   1   0    2     11   4     49-64    21    9   1537-2048
	   2   0    3     12   5     65-96    22   10   2049-3072
	   3   0    4     13   5     97-128   23   10   3073-4096
	   4   1   5,6    14   6    129-192   24   11   4097-6144
	   5   1   7,8    15   6    193-256   25   11   6145-8192
	   6   2   9-12   16   7    257-384   26   12  8193-12288
	   7   2  13-16   17   7    385-512   27   12 12289-16384
	   8   3  17-24   18   8    513-768   28   13 16385-24576
	   9   3  25-32   19   8   769-1024   29   13 24577-32768
	*/
var DistLookupTable = [
	[0,1], [0,2], [0,3], [0,4],
	[1,5], [1,7],
	[2,9], [2,13],
	[3,17], [3,25],
	[4,33], [4,49],
	[5,65], [5,97],
	[6,129], [6,193],
	[7,257], [7,385],
	[8,513], [8,769],
	[9,1025], [9,1537],
	[10,2049], [10,3073],
	[11,4097], [11,6145],
	[12,8193], [12,12289],
	[13,16385], [13,24577]
];

function inflateBlockData(bstream, hcLiteralTable, hcDistanceTable, buffer) {
	/*
		  loop (until end of block code recognized)
			 decode literal/length value from input stream
			 if value < 256
				copy value (literal byte) to output stream
			 otherwise
				if value = end of block (256)
				   break from loop
				otherwise (value = 257..285)
				   decode distance from input stream

				   move backwards distance bytes in the output
				   stream, and copy length bytes from this
				   position to the output stream.
	*/
	var numSymbols = 0, blockSize = 0;
	for (;;) {
		var symbol = decodeSymbol(bstream, hcLiteralTable);
		++numSymbols;
		if (symbol < 256) {
			// copy literal byte to output
			buffer.insertByte(symbol);
			blockSize++;
		}
		else {
			// end of block reached
			if (symbol == 256) {
				break;
			}
			else {
				var lengthLookup = LengthLookupTable[symbol-257],
					length = lengthLookup[1] + bstream.readBits(lengthLookup[0]),
					distLookup = DistLookupTable[decodeSymbol(bstream, hcDistanceTable)],
					distance = distLookup[1] + bstream.readBits(distLookup[0]);

				// now apply length and distance appropriately and copy to output

				// TODO: check that backward distance < data.length?
				
				// http://tools.ietf.org/html/rfc1951#page-11
				// "Note also that the referenced string may overlap the current
				//  position; for example, if the last 2 bytes decoded have values
				//  X and Y, a string reference with <length = 5, distance = 2>
				//  adds X,Y,X,Y,X to the output stream."
				// 
				// loop for each character
				var ch = buffer.ptr - distance;
				blockSize += length;
				if(length > distance) {
				  var data = buffer.data;
				  while (length--) {
					  buffer.insertByte(data[ch++]);
				  }
				} else {
				  buffer.insertBytes(buffer.data.subarray(ch, ch + length))
				}
				
			} // length-distance pair
		} // length-distance pair or end-of-block
	} // loop until we reach end of block
	return blockSize;
}

// {Uint8Array} compressedData A Uint8Array of the compressed file data.
// compression method 8
// deflate: http://tools.ietf.org/html/rfc1951
function inflate(compressedData, numDecompressedBytes) {
  // Bit stream representing the compressed data.
  var bstream = new bitjs.io.BitStream(compressedData.buffer,
      false /* rtl */,
      compressedData.byteOffset,
      compressedData.byteLength);
  var buffer = new bitjs.io.ByteBuffer(numDecompressedBytes);
  var numBlocks = 0, blockSize = 0;

  // block format: http://tools.ietf.org/html/rfc1951#page-9
  do {
		var bFinal = bstream.readBits(1),
			bType = bstream.readBits(2);
		blockSize = 0;
		++numBlocks;
		// no compression
		if (bType == 0) {
			// skip remaining bits in this byte
			while (bstream.bitPtr != 0) bstream.readBits(1);
			var len = bstream.readBits(16),
				nlen = bstream.readBits(16);
			// TODO: check if nlen is the ones-complement of len?
			
			if(len > 0) buffer.insertBytes(bstream.readBytes(len));
			blockSize = len;
		}
		// fixed Huffman codes
		else if(bType == 1) {
			blockSize = inflateBlockData(bstream, getFixedLiteralTable(), getFixedDistanceTable(), buffer);
		}
		// dynamic Huffman codes
		else if(bType == 2) {
			var numLiteralLengthCodes = bstream.readBits(5) + 257;
			var numDistanceCodes = bstream.readBits(5) + 1,
				numCodeLengthCodes = bstream.readBits(4) + 4;
				
			// populate the array of code length codes (first de-compaction)		
			var codeLengthsCodeLengths = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0];
			for (var i = 0; i < numCodeLengthCodes; ++i) {
				codeLengthsCodeLengths[ CodeLengthCodeOrder[i] ] = bstream.readBits(3);
			}
			
			// get the Huffman Codes for the code lengths
			var codeLengthsCodes = getHuffmanCodes(codeLengthsCodeLengths);
			
			// now follow this mapping 
			/*
               0 - 15: Represent code lengths of 0 - 15
                   16: Copy the previous code length 3 - 6 times.
                       The next 2 bits indicate repeat length
                             (0 = 3, ... , 3 = 6)
                          Example:  Codes 8, 16 (+2 bits 11),
                                    16 (+2 bits 10) will expand to
                                    12 code lengths of 8 (1 + 6 + 5)
                   17: Repeat a code length of 0 for 3 - 10 times.
                       (3 bits of length)
                   18: Repeat a code length of 0 for 11 - 138 times
                       (7 bits of length)
			*/
			// to generate the true code lengths of the Huffman Codes for the literal
			// and distance tables together
			var literalCodeLengths = [];
			var prevCodeLength = 0;
			while (literalCodeLengths.length < numLiteralLengthCodes + numDistanceCodes) {
				var symbol = decodeSymbol(bstream, codeLengthsCodes);
				if (symbol <= 15) {
					literalCodeLengths.push(symbol);
					prevCodeLength = symbol;
				}
				else if (symbol == 16) {
					var repeat = bstream.readBits(2) + 3;
					while (repeat--) {
						literalCodeLengths.push(prevCodeLength);
					}
				}
				else if (symbol == 17) {
					var repeat = bstream.readBits(3) + 3;
					while (repeat--) {
						literalCodeLengths.push(0);
					}
				}
				else if (symbol == 18) {
					var repeat = bstream.readBits(7) + 11;
					while (repeat--) {
						literalCodeLengths.push(0);
					}
				}
			}
			
			// now split the distance code lengths out of the literal code array
			var distanceCodeLengths = literalCodeLengths.splice(numLiteralLengthCodes, numDistanceCodes);
			
			// now generate the true Huffman Code tables using these code lengths
			var hcLiteralTable = getHuffmanCodes(literalCodeLengths),
				hcDistanceTable = getHuffmanCodes(distanceCodeLengths);
			blockSize = inflateBlockData(bstream, hcLiteralTable, hcDistanceTable, buffer);
		}
		// error
		else {
			err("Error! Encountered deflate block of type 3");
			return null;
		}

		// update progress
		currentBytesUnarchivedInFile += blockSize;
		currentBytesUnarchived += blockSize;
		postProgress();

  } while (bFinal != 1);
  // we are done reading blocks if the bFinal bit was set for this block
	
  // return the buffer data bytes
  return buffer.data;
}

// event.data.file has the ArrayBuffer.
onmessage = function(event) {
  unzip(event.data.file, true);
};