1: <?php
2: /* vim: set expandtab tabstop=4 shiftwidth=4: */
3: // +----------------------------------------------------------------------+
4: // | PHP Version 4 |
5: // +----------------------------------------------------------------------+
6: // | Copyright (c) 1997-2002 The PHP Group |
7: // +----------------------------------------------------------------------+
8: // | This source file is subject to version 2.02 of the PHP license, |
9: // | that is bundled with this package in the file LICENSE, and is |
10: // | available at through the world-wide-web at |
11: // | http://www.php.net/license/2_02.txt. |
12: // | If you did not receive a copy of the PHP license and are unable to |
13: // | obtain it through the world-wide-web, please send a note to |
14: // | license@php.net so we can mail you a copy immediately. |
15: // +----------------------------------------------------------------------+
16: // | Author: Xavier Noguer <xnoguer@php.net> |
17: // | Based on OLE::Storage_Lite by Kawai, Takanori |
18: // +----------------------------------------------------------------------+
19: //
20: // $Id: OLE.php,v 1.13 2007/03/07 14:38:25 schmidt Exp $
21:
22:
23: /**
24: * Array for storing OLE instances that are accessed from
25: * OLE_ChainedBlockStream::stream_open().
26: * @var array
27: */
28: $GLOBALS['_OLE_INSTANCES'] = array();
29:
30: /**
31: * OLE package base class.
32: *
33: * @author Xavier Noguer <xnoguer@php.net>
34: * @author Christian Schmidt <schmidt@php.net>
35: * @category PHPExcel
36: * @package PHPExcel_Shared_OLE
37: */
38: class PHPExcel_Shared_OLE
39: {
40: const OLE_PPS_TYPE_ROOT = 5;
41: const OLE_PPS_TYPE_DIR = 1;
42: const OLE_PPS_TYPE_FILE = 2;
43: const OLE_DATA_SIZE_SMALL = 0x1000;
44: const OLE_LONG_INT_SIZE = 4;
45: const OLE_PPS_SIZE = 0x80;
46:
47: /**
48: * The file handle for reading an OLE container
49: * @var resource
50: */
51: public $_file_handle;
52:
53: /**
54: * Array of PPS's found on the OLE container
55: * @var array
56: */
57: public $_list = array();
58:
59: /**
60: * Root directory of OLE container
61: * @var OLE_PPS_Root
62: */
63: public $root;
64:
65: /**
66: * Big Block Allocation Table
67: * @var array (blockId => nextBlockId)
68: */
69: public $bbat;
70:
71: /**
72: * Short Block Allocation Table
73: * @var array (blockId => nextBlockId)
74: */
75: public $sbat;
76:
77: /**
78: * Size of big blocks. This is usually 512.
79: * @var int number of octets per block.
80: */
81: public $bigBlockSize;
82:
83: /**
84: * Size of small blocks. This is usually 64.
85: * @var int number of octets per block
86: */
87: public $smallBlockSize;
88:
89: /**
90: * Reads an OLE container from the contents of the file given.
91: *
92: * @acces public
93: * @param string $file
94: * @return mixed true on success, PEAR_Error on failure
95: */
96: public function read($file)
97: {
98: $fh = fopen($file, "r");
99: if (!$fh) {
100: throw new PHPExcel_Reader_Exception("Can't open file $file");
101: }
102: $this->_file_handle = $fh;
103:
104: $signature = fread($fh, 8);
105: if ("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" != $signature) {
106: throw new PHPExcel_Reader_Exception("File doesn't seem to be an OLE container.");
107: }
108: fseek($fh, 28);
109: if (fread($fh, 2) != "\xFE\xFF") {
110: // This shouldn't be a problem in practice
111: throw new PHPExcel_Reader_Exception("Only Little-Endian encoding is supported.");
112: }
113: // Size of blocks and short blocks in bytes
114: $this->bigBlockSize = pow(2, self::_readInt2($fh));
115: $this->smallBlockSize = pow(2, self::_readInt2($fh));
116:
117: // Skip UID, revision number and version number
118: fseek($fh, 44);
119: // Number of blocks in Big Block Allocation Table
120: $bbatBlockCount = self::_readInt4($fh);
121:
122: // Root chain 1st block
123: $directoryFirstBlockId = self::_readInt4($fh);
124:
125: // Skip unused bytes
126: fseek($fh, 56);
127: // Streams shorter than this are stored using small blocks
128: $this->bigBlockThreshold = self::_readInt4($fh);
129: // Block id of first sector in Short Block Allocation Table
130: $sbatFirstBlockId = self::_readInt4($fh);
131: // Number of blocks in Short Block Allocation Table
132: $sbbatBlockCount = self::_readInt4($fh);
133: // Block id of first sector in Master Block Allocation Table
134: $mbatFirstBlockId = self::_readInt4($fh);
135: // Number of blocks in Master Block Allocation Table
136: $mbbatBlockCount = self::_readInt4($fh);
137: $this->bbat = array();
138:
139: // Remaining 4 * 109 bytes of current block is beginning of Master
140: // Block Allocation Table
141: $mbatBlocks = array();
142: for ($i = 0; $i < 109; ++$i) {
143: $mbatBlocks[] = self::_readInt4($fh);
144: }
145:
146: // Read rest of Master Block Allocation Table (if any is left)
147: $pos = $this->_getBlockOffset($mbatFirstBlockId);
148: for ($i = 0; $i < $mbbatBlockCount; ++$i) {
149: fseek($fh, $pos);
150: for ($j = 0; $j < $this->bigBlockSize / 4 - 1; ++$j) {
151: $mbatBlocks[] = self::_readInt4($fh);
152: }
153: // Last block id in each block points to next block
154: $pos = $this->_getBlockOffset(self::_readInt4($fh));
155: }
156:
157: // Read Big Block Allocation Table according to chain specified by
158: // $mbatBlocks
159: for ($i = 0; $i < $bbatBlockCount; ++$i) {
160: $pos = $this->_getBlockOffset($mbatBlocks[$i]);
161: fseek($fh, $pos);
162: for ($j = 0 ; $j < $this->bigBlockSize / 4; ++$j) {
163: $this->bbat[] = self::_readInt4($fh);
164: }
165: }
166:
167: // Read short block allocation table (SBAT)
168: $this->sbat = array();
169: $shortBlockCount = $sbbatBlockCount * $this->bigBlockSize / 4;
170: $sbatFh = $this->getStream($sbatFirstBlockId);
171: for ($blockId = 0; $blockId < $shortBlockCount; ++$blockId) {
172: $this->sbat[$blockId] = self::_readInt4($sbatFh);
173: }
174: fclose($sbatFh);
175:
176: $this->_readPpsWks($directoryFirstBlockId);
177:
178: return true;
179: }
180:
181: /**
182: * @param int block id
183: * @param int byte offset from beginning of file
184: * @access public
185: */
186: public function _getBlockOffset($blockId)
187: {
188: return 512 + $blockId * $this->bigBlockSize;
189: }
190:
191: /**
192: * Returns a stream for use with fread() etc. External callers should
193: * use PHPExcel_Shared_OLE_PPS_File::getStream().
194: * @param int|PPS block id or PPS
195: * @return resource read-only stream
196: */
197: public function getStream($blockIdOrPps)
198: {
199: static $isRegistered = false;
200: if (!$isRegistered) {
201: stream_wrapper_register('ole-chainedblockstream',
202: 'PHPExcel_Shared_OLE_ChainedBlockStream');
203: $isRegistered = true;
204: }
205:
206: // Store current instance in global array, so that it can be accessed
207: // in OLE_ChainedBlockStream::stream_open().
208: // Object is removed from self::$instances in OLE_Stream::close().
209: $GLOBALS['_OLE_INSTANCES'][] = $this;
210: $instanceId = end(array_keys($GLOBALS['_OLE_INSTANCES']));
211:
212: $path = 'ole-chainedblockstream://oleInstanceId=' . $instanceId;
213: if ($blockIdOrPps instanceof PHPExcel_Shared_OLE_PPS) {
214: $path .= '&blockId=' . $blockIdOrPps->_StartBlock;
215: $path .= '&size=' . $blockIdOrPps->Size;
216: } else {
217: $path .= '&blockId=' . $blockIdOrPps;
218: }
219: return fopen($path, 'r');
220: }
221:
222: /**
223: * Reads a signed char.
224: * @param resource file handle
225: * @return int
226: * @access public
227: */
228: private static function _readInt1($fh)
229: {
230: list(, $tmp) = unpack("c", fread($fh, 1));
231: return $tmp;
232: }
233:
234: /**
235: * Reads an unsigned short (2 octets).
236: * @param resource file handle
237: * @return int
238: * @access public
239: */
240: private static function _readInt2($fh)
241: {
242: list(, $tmp) = unpack("v", fread($fh, 2));
243: return $tmp;
244: }
245:
246: /**
247: * Reads an unsigned long (4 octets).
248: * @param resource file handle
249: * @return int
250: * @access public
251: */
252: private static function _readInt4($fh)
253: {
254: list(, $tmp) = unpack("V", fread($fh, 4));
255: return $tmp;
256: }
257:
258: /**
259: * Gets information about all PPS's on the OLE container from the PPS WK's
260: * creates an OLE_PPS object for each one.
261: *
262: * @access public
263: * @param integer the block id of the first block
264: * @return mixed true on success, PEAR_Error on failure
265: */
266: public function _readPpsWks($blockId)
267: {
268: $fh = $this->getStream($blockId);
269: for ($pos = 0; ; $pos += 128) {
270: fseek($fh, $pos, SEEK_SET);
271: $nameUtf16 = fread($fh, 64);
272: $nameLength = self::_readInt2($fh);
273: $nameUtf16 = substr($nameUtf16, 0, $nameLength - 2);
274: // Simple conversion from UTF-16LE to ISO-8859-1
275: $name = str_replace("\x00", "", $nameUtf16);
276: $type = self::_readInt1($fh);
277: switch ($type) {
278: case self::OLE_PPS_TYPE_ROOT:
279: $pps = new PHPExcel_Shared_OLE_PPS_Root(null, null, array());
280: $this->root = $pps;
281: break;
282: case self::OLE_PPS_TYPE_DIR:
283: $pps = new PHPExcel_Shared_OLE_PPS(null, null, null, null, null,
284: null, null, null, null, array());
285: break;
286: case self::OLE_PPS_TYPE_FILE:
287: $pps = new PHPExcel_Shared_OLE_PPS_File($name);
288: break;
289: default:
290: continue;
291: }
292: fseek($fh, 1, SEEK_CUR);
293: $pps->Type = $type;
294: $pps->Name = $name;
295: $pps->PrevPps = self::_readInt4($fh);
296: $pps->NextPps = self::_readInt4($fh);
297: $pps->DirPps = self::_readInt4($fh);
298: fseek($fh, 20, SEEK_CUR);
299: $pps->Time1st = self::OLE2LocalDate(fread($fh, 8));
300: $pps->Time2nd = self::OLE2LocalDate(fread($fh, 8));
301: $pps->_StartBlock = self::_readInt4($fh);
302: $pps->Size = self::_readInt4($fh);
303: $pps->No = count($this->_list);
304: $this->_list[] = $pps;
305:
306: // check if the PPS tree (starting from root) is complete
307: if (isset($this->root) &&
308: $this->_ppsTreeComplete($this->root->No)) {
309:
310: break;
311: }
312: }
313: fclose($fh);
314:
315: // Initialize $pps->children on directories
316: foreach ($this->_list as $pps) {
317: if ($pps->Type == self::OLE_PPS_TYPE_DIR || $pps->Type == self::OLE_PPS_TYPE_ROOT) {
318: $nos = array($pps->DirPps);
319: $pps->children = array();
320: while ($nos) {
321: $no = array_pop($nos);
322: if ($no != -1) {
323: $childPps = $this->_list[$no];
324: $nos[] = $childPps->PrevPps;
325: $nos[] = $childPps->NextPps;
326: $pps->children[] = $childPps;
327: }
328: }
329: }
330: }
331:
332: return true;
333: }
334:
335: /**
336: * It checks whether the PPS tree is complete (all PPS's read)
337: * starting with the given PPS (not necessarily root)
338: *
339: * @access public
340: * @param integer $index The index of the PPS from which we are checking
341: * @return boolean Whether the PPS tree for the given PPS is complete
342: */
343: public function _ppsTreeComplete($index)
344: {
345: return isset($this->_list[$index]) &&
346: ($pps = $this->_list[$index]) &&
347: ($pps->PrevPps == -1 ||
348: $this->_ppsTreeComplete($pps->PrevPps)) &&
349: ($pps->NextPps == -1 ||
350: $this->_ppsTreeComplete($pps->NextPps)) &&
351: ($pps->DirPps == -1 ||
352: $this->_ppsTreeComplete($pps->DirPps));
353: }
354:
355: /**
356: * Checks whether a PPS is a File PPS or not.
357: * If there is no PPS for the index given, it will return false.
358: *
359: * @access public
360: * @param integer $index The index for the PPS
361: * @return bool true if it's a File PPS, false otherwise
362: */
363: public function isFile($index)
364: {
365: if (isset($this->_list[$index])) {
366: return ($this->_list[$index]->Type == self::OLE_PPS_TYPE_FILE);
367: }
368: return false;
369: }
370:
371: /**
372: * Checks whether a PPS is a Root PPS or not.
373: * If there is no PPS for the index given, it will return false.
374: *
375: * @access public
376: * @param integer $index The index for the PPS.
377: * @return bool true if it's a Root PPS, false otherwise
378: */
379: public function isRoot($index)
380: {
381: if (isset($this->_list[$index])) {
382: return ($this->_list[$index]->Type == self::OLE_PPS_TYPE_ROOT);
383: }
384: return false;
385: }
386:
387: /**
388: * Gives the total number of PPS's found in the OLE container.
389: *
390: * @access public
391: * @return integer The total number of PPS's found in the OLE container
392: */
393: public function ppsTotal()
394: {
395: return count($this->_list);
396: }
397:
398: /**
399: * Gets data from a PPS
400: * If there is no PPS for the index given, it will return an empty string.
401: *
402: * @access public
403: * @param integer $index The index for the PPS
404: * @param integer $position The position from which to start reading
405: * (relative to the PPS)
406: * @param integer $length The amount of bytes to read (at most)
407: * @return string The binary string containing the data requested
408: * @see OLE_PPS_File::getStream()
409: */
410: public function getData($index, $position, $length)
411: {
412: // if position is not valid return empty string
413: if (!isset($this->_list[$index]) || ($position >= $this->_list[$index]->Size) || ($position < 0)) {
414: return '';
415: }
416: $fh = $this->getStream($this->_list[$index]);
417: $data = stream_get_contents($fh, $length, $position);
418: fclose($fh);
419: return $data;
420: }
421:
422: /**
423: * Gets the data length from a PPS
424: * If there is no PPS for the index given, it will return 0.
425: *
426: * @access public
427: * @param integer $index The index for the PPS
428: * @return integer The amount of bytes in data the PPS has
429: */
430: public function getDataLength($index)
431: {
432: if (isset($this->_list[$index])) {
433: return $this->_list[$index]->Size;
434: }
435: return 0;
436: }
437:
438: /**
439: * Utility function to transform ASCII text to Unicode
440: *
441: * @access public
442: * @static
443: * @param string $ascii The ASCII string to transform
444: * @return string The string in Unicode
445: */
446: public static function Asc2Ucs($ascii)
447: {
448: $rawname = '';
449: for ($i = 0; $i < strlen($ascii); ++$i) {
450: $rawname .= $ascii{$i} . "\x00";
451: }
452: return $rawname;
453: }
454:
455: /**
456: * Utility function
457: * Returns a string for the OLE container with the date given
458: *
459: * @access public
460: * @static
461: * @param integer $date A timestamp
462: * @return string The string for the OLE container
463: */
464: public static function LocalDate2OLE($date = null)
465: {
466: if (!isset($date)) {
467: return "\x00\x00\x00\x00\x00\x00\x00\x00";
468: }
469:
470: // factor used for separating numbers into 4 bytes parts
471: $factor = pow(2, 32);
472:
473: // days from 1-1-1601 until the beggining of UNIX era
474: $days = 134774;
475: // calculate seconds
476: $big_date = $days*24*3600 + gmmktime(date("H",$date),date("i",$date),date("s",$date),
477: date("m",$date),date("d",$date),date("Y",$date));
478: // multiply just to make MS happy
479: $big_date *= 10000000;
480:
481: $high_part = floor($big_date / $factor);
482: // lower 4 bytes
483: $low_part = floor((($big_date / $factor) - $high_part) * $factor);
484:
485: // Make HEX string
486: $res = '';
487:
488: for ($i = 0; $i < 4; ++$i) {
489: $hex = $low_part % 0x100;
490: $res .= pack('c', $hex);
491: $low_part /= 0x100;
492: }
493: for ($i = 0; $i < 4; ++$i) {
494: $hex = $high_part % 0x100;
495: $res .= pack('c', $hex);
496: $high_part /= 0x100;
497: }
498: return $res;
499: }
500:
501: /**
502: * Returns a timestamp from an OLE container's date
503: *
504: * @access public
505: * @static
506: * @param integer $string A binary string with the encoded date
507: * @return string The timestamp corresponding to the string
508: */
509: public static function OLE2LocalDate($string)
510: {
511: if (strlen($string) != 8) {
512: return new PEAR_Error("Expecting 8 byte string");
513: }
514:
515: // factor used for separating numbers into 4 bytes parts
516: $factor = pow(2,32);
517: list(, $high_part) = unpack('V', substr($string, 4, 4));
518: list(, $low_part) = unpack('V', substr($string, 0, 4));
519:
520: $big_date = ($high_part * $factor) + $low_part;
521: // translate to seconds
522: $big_date /= 10000000;
523:
524: // days from 1-1-1601 until the beggining of UNIX era
525: $days = 134774;
526:
527: // translate to seconds from beggining of UNIX era
528: $big_date -= $days * 24 * 3600;
529: return floor($big_date);
530: }
531: }
532: