diff --git a/COPYRIGHT b/COPYRIGHT
index e979d87fcf3..a609ac13b66 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -48,7 +48,7 @@ Swift Mailer 5.4.2-DEV MIT License Yes
Symfony/var-dumper ??? MIT License Yes Library to make var dump (used by DebugBar)
Stripe 10.7.0 MIT Licence Yes Library for Stripe module
TCPDF 6.7.5 LGPL-3+ Yes PDF generation
-TCPDI 1.0.0 LGPL-3+ / Apache 2.0 Yes FPDI replacement
+TCPDI 1.1.0 LGPL-3+ / Apache 2.0 Yes FPDI replacement
bacon, dasprid, swiss-qr-bill, kmukku, symfony/validator
diff --git a/htdocs/includes/tcpdi/tcpdi.php b/htdocs/includes/tcpdi/tcpdi.php
index 9e87ac2f221..89db66b50c2 100644
--- a/htdocs/includes/tcpdi/tcpdi.php
+++ b/htdocs/includes/tcpdi/tcpdi.php
@@ -1,6 +1,6 @@
_importedPages[$pageKey] = $this->tpl;
-
return $this->tpl;
}
+ function setPageFormatFromTemplatePage($pageno, $orientation) {
+ $fn = $this->current_filename;
+ $parser =& $this->parsers[$fn];
+ $parser->setPageno($pageno);
+ $boxes = $parser->getPageBoxes($pageno, $this->k);
+ foreach ($boxes as $name => $box) {
+ if ($name[0] == '/') {
+ $boxes[substr($name, 1)] = $box;
+ unset($boxes[$name]);
+ }
+ }
+ $this->setPageFormat($boxes, $orientation);
+ }
+
+ /* Wrapper for AddPage() which tracks TOC pages to offset annotations later */
+ function AddPage($orientation='', $format='', $keepmargins=false, $tocpage=false) {
+ if ($this->inxobj) {
+ // we are inside an XObject template
+ return;
+ }
+ parent::AddPage($orientation, $format, $keepmargins, $tocpage);
+ if ($this->tocpage) {
+ $this->_numTOCpages++;
+ }
+ }
+
+ /* Wrapper for AddTOC() which tracks TOC position to offset annotations later */
+ function AddTOC($page='', $numbersfont='', $filler='.', $toc_name='TOC', $style='', $color=array(0,0,0)) {
+ if (!TCPDF_STATIC::empty_string($page)) {
+ $this->_TOCpagenum = $page;
+ } else {
+ $this->_TOCpagenum = $this->page;
+ }
+
+ parent::AddTOC($page, $numbersfont, $filler, $toc_name, $style, $color);
+ }
+
+ function importAnnotations($pageno) {
+ $fn = $this->current_filename;
+ $parser =& $this->parsers[$fn];
+ $parser->setPageno($pageno);
+ $annots = $parser->getPageAnnotations();
+
+ if (is_array($annots) && $annots[0] == PDF_TYPE_ARRAY // It's an array
+ && is_array($annots[1]) && count($annots[1]) > 1) // It's not empty - there are annotations for this page
+ {
+ if (!isset($this->_obj_stack[$fn])) {
+ $this->_obj_stack[$fn] = array();
+ }
+
+ $this->_importedAnnots[$this->page] = array();
+ foreach ($annots[1] as $annot) {
+ $this->importAnnotation($annot);
+ }
+ }
+
+ if (is_array($annots) && $annots[0] == PDF_TYPE_OBJECT // We got an object
+ && is_array($annots[1]) && $annots[1][0] == PDF_TYPE_ARRAY // It's an array
+ && is_array($annots[1][1]) && count($annots[1][1]) > 1) // It's not empty - there are annotations for this page
+ {
+ if (!isset($this->_obj_stack[$fn])) {
+ $this->_obj_stack[$fn] = array();
+ }
+
+ $this->_importedAnnots[$this->page] = array();
+ foreach ($annots[1][1] as $annot) {
+ $this->importAnnotation($annot);
+ }
+ }
+ }
+
+ function importAnnotation($annotation) {
+ $fn = $this->current_filename;
+ $old_id = $annotation[1];
+ $value = array(PDF_TYPE_OBJREF, $old_id, 0);
+ if (!isset($this->_don_obj_stack[$fn][$old_id])) {
+ $this->_newobj(false, true);
+ $this->_obj_stack[$fn][$old_id] = array($this->n, $value);
+ $this->_don_obj_stack[$fn][$old_id] = array($this->n, $value);
+ }
+ $objid = $this->_don_obj_stack[$fn][$old_id][0];
+ $this->_importedAnnots[$this->page][] = $objid;
+ }
+
+
+ /**
+ * Get references to page annotations.
+ * @param $n (int) page number
+ * @return string
+ * @protected
+ * @author Nicola Asuni
+ * @since 5.0.010 (2010-05-17)
+ */
+ protected function _getannotsrefs($n) {
+ if (!empty($this->_numTOCpages) && $n >= $this->_TOCpagenum) {
+ // Offset page number to account for TOC being inserted before page containing annotations.
+ $n -= $this->_numTOCpages;
+ }
+ if (!(isset($this->_importedAnnots[$n]) OR isset($this->PageAnnots[$n]) OR ($this->sign AND isset($this->signature_data['cert_type'])))) {
+ return '';
+ }
+ $out = ' /Annots [';
+ if (isset($this->_importedAnnots[$n])) {
+ foreach ($this->_importedAnnots[$n] as $key => $val) {
+ $out .= ' '.$val.' 0 R';
+ }
+ }
+ if (isset($this->PageAnnots[$n])) {
+ foreach ($this->PageAnnots[$n] as $key => $val) {
+ if (!in_array($val['n'], $this->radio_groups)) {
+ $out .= ' '.$val['n'].' 0 R';
+ }
+ }
+ // add radiobutton groups
+ if (isset($this->radiobutton_groups[$n])) {
+ foreach ($this->radiobutton_groups[$n] as $key => $data) {
+ if (isset($data['n'])) {
+ $out .= ' '.$data['n'].' 0 R';
+ }
+ }
+ }
+ }
+ if ($this->sign AND ($n == $this->signature_appearance['page']) AND isset($this->signature_data['cert_type'])) {
+ // set reference for signature object
+ $out .= ' '.$this->sig_obj_id.' 0 R';
+ }
+ if (!empty($this->empty_signature_appearance)) {
+ foreach ($this->empty_signature_appearance as $esa) {
+ if ($esa['page'] == $n) {
+ // set reference for empty signature objects
+ $out .= ' '.$esa['objid'].' 0 R';
+ }
+ }
+ }
+ $out .= ' ]';
+ return $out;
+ }
+
+
/**
* Returns the last used page box
*
diff --git a/htdocs/includes/tcpdi/tcpdi_parser.php b/htdocs/includes/tcpdi/tcpdi_parser.php
index 30c00b8a5d0..873c2a8bd9b 100644
--- a/htdocs/includes/tcpdi/tcpdi_parser.php
+++ b/htdocs/includes/tcpdi/tcpdi_parser.php
@@ -1,9 +1,9 @@
* @author Paul Nicholls
* @author Nicola Asuni
- * @version 1.0
+ * @version 1.1
*/
// include class for decoding filters
@@ -83,71 +83,71 @@ if (!defined ('PDF_TYPE_REAL'))
* This is a PHP class for parsing PDF documents.
* Based on TCPDF_PARSER, part of the TCPDF project by Nicola Asuni.
* @brief This is a PHP class for parsing PDF documents..
- * @version 1.0
+ * @version 1.1
* @author Paul Nicholls - github.com/pauln
* @author Nicola Asuni - info@tecnick.com
*/
class tcpdi_parser {
- /**
- * Unique parser ID
- * @public
- */
- public $uniqueid = '';
+ /**
+ * Unique parser ID
+ * @public
+ */
+ public $uniqueid = '';
- /**
- * Raw content of the PDF document.
- * @private
- */
- private $pdfdata = '';
+ /**
+ * Raw content of the PDF document.
+ * @private
+ */
+ private $pdfdata = '';
- /**
- * XREF data.
- * @protected
- */
- public $xref = array();
+ /**
+ * XREF data.
+ * @protected
+ */
+ protected $xref = array();
- /**
- * Object streams.
- * @protected
- */
- protected $objstreams = array();
+ /**
+ * Object streams.
+ * @protected
+ */
+ protected $objstreams = array();
- /**
- * Objects in objstreams.
- * @protected
- */
- protected $objstreamobjs = array();
+ /**
+ * Objects in objstreams.
+ * @protected
+ */
+ protected $objstreamobjs = array();
- /**
- * List of seen XREF data locations.
- * @protected
- */
- protected $xref_seen_offsets = array();
+ /**
+ * List of seen XREF data locations.
+ * @protected
+ */
+ protected $xref_seen_offsets = array();
- /**
- * Array of PDF objects.
- * @protected
- */
- protected $objects = array();
+ /**
+ * Array of PDF objects.
+ * @protected
+ */
+ protected $objects = array();
- /**
- * Array of object offsets.
- * @private
- */
- private $objoffsets = array();
+ /**
+ * Array of object offsets.
+ * @private
+ */
+ private $objoffsets = array();
- /**
- * Class object for decoding filters.
- * @private
- */
- private $FilterDecoders;
+ /**
+ * Class object for decoding filters.
+ * @private
+ */
+ private $FilterDecoders;
/**
* Pages
*
* @private array
*/
- public $pages;
+ private $pages;
/**
* Page count
@@ -176,66 +176,66 @@ class tcpdi_parser {
// -----------------------------------------------------------------------------
- /**
- * Parse a PDF document an return an array of objects.
- * @param $data (string) PDF data to parse.
- * @public
- * @since 1.0.000 (2011-05-24)
- */
- public function __construct($data, $uniqueid) {
- if (empty($data)) {
- $this->Error('Empty PDF data.');
- }
- $this->uniqueid = $uniqueid;
- $this->pdfdata = $data;
- // get length
- $pdflen = strlen($this->pdfdata);
- // initialize class for decoding filters
- $this->FilterDecoders = new TCPDF_FILTERS();
- // get xref and trailer data
- $this->xref = $this->getXrefData();
- $this->findObjectOffsets();
- // parse all document objects
- $this->objects = array();
- /*foreach ($this->xref['xref'] as $obj => $offset) {
- if (!isset($this->objects[$obj]) AND ($offset > 0)) {
- // decode only objects with positive offset
- //$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
- }
- }*/
+ /**
+ * Parse a PDF document an return an array of objects.
+ * @param $data (string) PDF data to parse.
+ * @public
+ * @since 1.0.000 (2011-05-24)
+ */
+ public function __construct($data, $uniqueid) {
+ if (empty($data)) {
+ $this->Error('Empty PDF data.');
+ }
+ $this->uniqueid = $uniqueid;
+ $this->pdfdata = $data;
+ // get length
+ $pdflen = strlen($this->pdfdata);
+ // initialize class for decoding filters
+ $this->FilterDecoders = new TCPDF_FILTERS();
+ // get xref and trailer data
+ $this->xref = $this->getXrefData();
+ $this->findObjectOffsets();
+ // parse all document objects
+ $this->objects = array();
+ /*foreach ($this->xref['xref'] as $obj => $offset) {
+ if (!isset($this->objects[$obj]) AND ($offset > 0)) {
+ // decode only objects with positive offset
+ //$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
+ }
+ }*/
$this->getPDFVersion();
- $this->readPages();
- }
+ $this->readPages();
+ }
- /**
- * Clean up when done, to free memory etc
- */
- public function cleanUp() {
- unset($this->pdfdata);
- $this->pdfdata = '';
- unset($this->objstreams);
- $this->objstreams = array();
- unset($this->objects);
- $this->objects = array();
- unset($this->objstreamobjs);
- $this->objstreamobjs = array();
- unset($this->xref);
- $this->xref = array();
- unset($this->objoffsets);
- $this->objoffsets = array();
- unset($this->pages);
- $this->pages = array();
- }
+ /**
+ * Clean up when done, to free memory etc
+ */
+ public function cleanUp() {
+ unset($this->pdfdata);
+ $this->pdfdata = '';
+ unset($this->objstreams);
+ $this->objstreams = array();
+ unset($this->objects);
+ $this->objects = array();
+ unset($this->objstreamobjs);
+ $this->objstreamobjs = array();
+ unset($this->xref);
+ $this->xref = array();
+ unset($this->objoffsets);
+ $this->objoffsets = array();
+ unset($this->pages);
+ $this->pages = array();
+ }
- /**
- * Return an array of parsed PDF document objects.
- * @return (array) Array of parsed PDF document objects.
- * @public
- * @since 1.0.000 (2011-06-26)
- */
- public function getParsedData() {
- return array($this->xref, $this->objects, $this->pages);
- }
+ /**
+ * Return an array of parsed PDF document objects.
+ * @return (array) Array of parsed PDF document objects.
+ * @public
+ * @since 1.0.000 (2011-06-26)
+ */
+ public function getParsedData() {
+ return array($this->xref, $this->objects, $this->pages);
+ }
/**
* Get PDF-Version
@@ -255,41 +255,41 @@ class tcpdi_parser {
*
*/
function readPages() {
- $params = $this->getObjectVal($this->xref['trailer'][1]['/Root']);
- $objref = null;
- foreach ($params[1][1] as $k=>$v) {
- if ($k == '/Pages') {
- $objref = $v;
- break;
- }
- }
- if ($objref == null || $objref[0] !== PDF_TYPE_OBJREF) {
- // Offset not found.
- return;
- }
+ $params = $this->getObjectVal($this->xref['trailer'][1]['/Root']);
+ $objref = null;
+ foreach ($params[1][1] as $k=>$v) {
+ if ($k == '/Pages') {
+ $objref = $v;
+ break;
+ }
+ }
+ if ($objref == null || $objref[0] !== PDF_TYPE_OBJREF) {
+ // Offset not found.
+ return;
+ }
- $dict = $this->getObjectVal($objref);
- if ($dict[0] == PDF_TYPE_OBJECT && $dict[1][0] == PDF_TYPE_DICTIONARY) {
- // Dict wrapped in an object
- $dict = $dict[1];
- }
+ $dict = $this->getObjectVal($objref);
+ if ($dict[0] == PDF_TYPE_OBJECT && $dict[1][0] == PDF_TYPE_DICTIONARY) {
+ // Dict wrapped in an object
+ $dict = $dict[1];
+ }
- if ($dict[0] !== PDF_TYPE_DICTIONARY) {
- return;
- }
+ if ($dict[0] !== PDF_TYPE_DICTIONARY) {
+ return;
+ }
- $this->pages = array();
- if (isset($dict[1]['/Kids'])) {
- $v = $dict[1]['/Kids'];
- if ($v[0] == PDF_TYPE_ARRAY) {
- foreach ($v[1] as $ref) {
- $page = $this->getObjectVal($ref);
- $this->readPage($page);
- }
- }
- }
+ $this->pages = array();
+ if (isset($dict[1]['/Kids'])) {
+ $v = $dict[1]['/Kids'];
+ if ($v[0] == PDF_TYPE_ARRAY) {
+ foreach ($v[1] as $ref) {
+ $page = $this->getObjectVal($ref);
+ $this->readPage($page);
+ }
+ }
+ }
- $this->page_count = count($this->pages);
+ $this->page_count = count($this->pages);
}
/**
@@ -297,15 +297,15 @@ class tcpdi_parser {
*
*/
private function readPage($page) {
- if (isset($page[1][1]['/Kids'])) {
- // Nested pages!
- foreach ($page[1][1]['/Kids'][1] as $subref) {
- $subpage = $this->getObjectVal($subref);
- $this->readPage($subpage);
- }
- } else {
- $this->pages[] = $page;
- }
+ if (isset($page[1][1]['/Kids'])) {
+ // Nested pages!
+ foreach ($page[1][1]['/Kids'][1] as $subref) {
+ $subpage = $this->getObjectVal($subref);
+ $this->readPage($subpage);
+ }
+ } else {
+ $this->pages[] = $page;
+ }
}
/**
@@ -317,835 +317,854 @@ class tcpdi_parser {
return $this->page_count;
}
- /**
- * Get Cross-Reference (xref) table and trailer data from PDF document data.
- * @param $offset (int) xref offset (if know).
- * @param $xref (array) previous xref array (if any).
- * @return Array containing xref and trailer data.
- * @protected
- * @since 1.0.000 (2011-05-24)
- */
- protected function getXrefData($offset=0, $xref=array()) {
- if ($offset == 0) {
- // find last startxref
- if (preg_match('/.*[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/is', $this->pdfdata, $matches) == 0) {
- $this->Error('Unable to find startxref');
- }
- $startxref = $matches[1];
- } else {
- if (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
- // Cross-Reference Stream object
- $startxref = $offset;
- } elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
- // startxref found
- $startxref = $matches[1][0];
- } else {
- $this->Error('Unable to find startxref');
- }
- }
- unset($matches);
+ /**
+ * Get Cross-Reference (xref) table and trailer data from PDF document data.
+ * @param $offset (int) xref offset (if know).
+ * @param $xref (array) previous xref array (if any).
+ * @return Array containing xref and trailer data.
+ * @protected
+ * @since 1.0.000 (2011-05-24)
+ */
+ protected function getXrefData($offset=0, $xref=array()) {
+ if ($offset == 0) {
+ // find last startxref
+ if (preg_match('/.*[\r\n]startxref[\s\r\n]+([0-9]+)[\s\r\n]+%%EOF/is', $this->pdfdata, $matches) == 0) {
+ $this->Error('Unable to find startxref');
+ }
+ $startxref = $matches[1];
+ } else {
+ if (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
+ // Cross-Reference Stream object
+ $startxref = $offset;
+ } elseif (preg_match('/[\r\n]startxref[\s\r\n]+([0-9]+)[\s\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
+ // startxref found
+ $startxref = $matches[1][0];
+ } else {
+ $this->Error('Unable to find startxref');
+ }
+ }
+ unset($matches);
- // DOMPDF gets the startxref wrong, giving us the linebreak before the xref starts.
- $startxref += strspn($this->pdfdata, "\r\n", $startxref);
+ // DOMPDF gets the startxref wrong, giving us the linebreak before the xref starts.
+ $startxref += strspn($this->pdfdata, "\r\n", $startxref);
- // check xref position
- if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
- // Cross-Reference
- $xref = $this->decodeXref($startxref, $xref);
- } else {
- // Cross-Reference Stream
- $xref = $this->decodeXrefStream($startxref, $xref);
- }
- if (empty($xref)) {
- $this->Error('Unable to find xref');
- }
+ // check xref position
+ if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
+ // Cross-Reference
+ $xref = $this->decodeXref($startxref, $xref);
+ } else {
+ // Cross-Reference Stream
+ $xref = $this->decodeXrefStream($startxref, $xref);
+ }
+ if (empty($xref)) {
+ $this->Error('Unable to find xref');
+ }
- return $xref;
- }
+ return $xref;
+ }
- /**
- * Decode the Cross-Reference section
- * @param $startxref (int) Offset at which the xref section starts.
- * @param $xref (array) Previous xref array (if any).
- * @return Array containing xref and trailer data.
- * @protected
- * @since 1.0.000 (2011-06-20)
- */
- protected function decodeXref($startxref, $xref=array()) {
- $this->xref_seen_offsets[] = $startxref;
+ /**
+ * Decode the Cross-Reference section
+ * @param $startxref (int) Offset at which the xref section starts.
+ * @param $xref (array) Previous xref array (if any).
+ * @return Array containing xref and trailer data.
+ * @protected
+ * @since 1.0.000 (2011-06-20)
+ */
+ protected function decodeXref($startxref, $xref=array()) {
+ $this->xref_seen_offsets[] = $startxref;
if (!isset($xref['xref_location'])) {
$xref['xref_location'] = $startxref;
$xref['max_object'] = 0;
- }
- // extract xref data (object indexes and offsets)
- $xoffset = $startxref + 5;
- // initialize object number
- $obj_num = 0;
- $offset = $xoffset;
- while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
- $offset = (strlen($matches[0][0]) + $matches[0][1]);
- if ($matches[3][0] == 'n') {
- // create unique object index: [object number]_[generation number]
- $gen_num = intval($matches[2][0]);
- $index = $obj_num.'_'.$gen_num;
- // check if object already exist
- if (!isset($xref['xref'][$obj_num][$gen_num])) {
- // store object offset position
- $xref['xref'][$obj_num][$gen_num] = intval($matches[1][0]);
- }
- ++$obj_num;
- $offset += 2;
- } elseif ($matches[3][0] == 'f') {
- ++$obj_num;
- $offset += 2;
- } else {
- // object number (index)
- $obj_num = intval($matches[1][0]);
- }
- }
- unset($matches);
- $xref['max_object'] = max($xref['max_object'], $obj_num);
- // get trailer data
- if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) {
- $trailer_data = $matches[1][0];
- if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
- // get only the last updated version
- $xref['trailer'] = array();
- $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
- $xref['trailer'][1] = array();
- // parse trailer_data
- if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
- $xref['trailer'][1]['/Size'] = array(PDF_TYPE_NUMERIC, intval($matches[1]));
- }
- if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
- $xref['trailer'][1]['/Root'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
- }
- if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
- $xref['trailer'][1]['/Encrypt'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
- }
- if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
- $xref['trailer'][1]['/Info'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
- }
- if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
- $xref['trailer'][1]['/ID'] = array(PDF_TYPE_ARRAY, array());
- $xref['trailer'][1]['/ID'][1][0] = array(PDF_TYPE_HEX, $matches[1]);
- $xref['trailer'][1]['/ID'][1][1] = array(PDF_TYPE_HEX, $matches[2]);
- }
- }
- if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
- // get previous xref
- $prevoffset = intval($matches[1]);
- if (!in_array($prevoffset, $this->xref_seen_offsets)) {
- $this->xref_seen_offsets[] = $prevoffset;
- $xref = $this->getXrefData($prevoffset, $xref);
- }
- }
- unset($matches);
- } else {
- $this->Error('Unable to find trailer');
- }
- return $xref;
- }
+ }
+ // extract xref data (object indexes and offsets)
+ $xoffset = $startxref + 5;
+ // initialize object number
+ $obj_num = 0;
+ $offset = $xoffset;
+ while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
+ $offset = (strlen($matches[0][0]) + $matches[0][1]);
+ if ($matches[3][0] == 'n') {
+ // create unique object index: [object number]_[generation number]
+ $gen_num = intval($matches[2][0]);
+ $index = $obj_num.'_'.$gen_num;
+ // check if object already exist
+ if (!isset($xref['xref'][$obj_num][$gen_num])) {
+ // store object offset position
+ $xref['xref'][$obj_num][$gen_num] = intval($matches[1][0]);
+ }
+ ++$obj_num;
+ $offset += 2;
+ } elseif ($matches[3][0] == 'f') {
+ ++$obj_num;
+ $offset += 2;
+ } else {
+ // object number (index)
+ $obj_num = intval($matches[1][0]);
+ }
+ }
+ unset($matches);
+ $xref['max_object'] = max($xref['max_object'], $obj_num);
+ // get trailer data
+ if (preg_match('/trailer[\s]*<<(.*)>>[\s\r\n]+(?:[%].*[\r\n]+)*startxref[\s\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) {
+ $trailer_data = $matches[1][0];
+ if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
+ // get only the last updated version
+ $xref['trailer'] = array();
+ $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
+ $xref['trailer'][1] = array();
+ // parse trailer_data
+ if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
+ $xref['trailer'][1]['/Size'] = array(PDF_TYPE_NUMERIC, intval($matches[1]));
+ }
+ if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
+ $xref['trailer'][1]['/Root'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
+ }
+ if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
+ $xref['trailer'][1]['/Encrypt'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
+ }
+ if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
+ $xref['trailer'][1]['/Info'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
+ }
+ if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
+ $xref['trailer'][1]['/ID'] = array(PDF_TYPE_ARRAY, array());
+ $xref['trailer'][1]['/ID'][1][0] = array(PDF_TYPE_HEX, $matches[1]);
+ $xref['trailer'][1]['/ID'][1][1] = array(PDF_TYPE_HEX, $matches[2]);
+ }
+ }
+ if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
+ // get previous xref
+ $prevoffset = intval($matches[1]);
+ if (!in_array($prevoffset, $this->xref_seen_offsets)) {
+ $this->xref_seen_offsets[] = $prevoffset;
+ $xref = $this->getXrefData($prevoffset, $xref);
+ }
+ }
+ unset($matches);
+ } else {
+ $this->Error('Unable to find trailer');
+ }
+ return $xref;
+ }
- /**
- * Decode the Cross-Reference Stream section
- * @param $startxref (int) Offset at which the xref section starts.
- * @param $xref (array) Previous xref array (if any).
- * @return Array containing xref and trailer data.
- * @protected
- * @since 1.0.003 (2013-03-16)
- */
- protected function decodeXrefStream($startxref, $xref=array()) {
- // try to read Cross-Reference Stream
- list($xrefobj, $unused) = $this->getRawObject($startxref);
- $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
+ /**
+ * Decode the Cross-Reference Stream section
+ * @param $startxref (int) Offset at which the xref section starts.
+ * @param $xref (array) Previous xref array (if any).
+ * @return Array containing xref and trailer data.
+ * @protected
+ * @since 1.0.003 (2013-03-16)
+ */
+ protected function decodeXrefStream($startxref, $xref=array()) {
+ // try to read Cross-Reference Stream
+ list($xrefobj, $unused) = $this->getRawObject($startxref);
+ $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
if (!isset($xref['xref_location'])) {
$xref['xref_location'] = $startxref;
$xref['max_object'] = 0;
- }
+ }
if (!isset($xref['xref'])) {
$xref['xref'] = array();
- }
- if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
- // get only the last updated version
- $xref['trailer'] = array();
- $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
- $xref['trailer'][1] = array();
- $filltrailer = true;
- } else {
- $filltrailer = false;
- }
- $valid_crs = false;
- $sarr = $xrefcrs[0][1];
- $keys = array_keys($sarr);
- $columns = 1; // Default as per PDF 32000-1:2008.
- $predictor = 1; // Default as per PDF 32000-1:2008.
- foreach ($keys as $k=>$key) {
- $v = $sarr[$key];
- if (($key == '/Type') AND ($v[0] == PDF_TYPE_TOKEN AND ($v[1] == 'XRef'))) {
- $valid_crs = true;
- } elseif (($key == '/Index') AND ($v[0] == PDF_TYPE_ARRAY AND count($v[1]) >= 2)) {
- // first object number in the subsection
- $index_first = intval($v[1][0][1]);
- // number of entries in the subsection
- $index_entries = intval($v[1][1][1]);
- } elseif (($key == '/Prev') AND ($v[0] == PDF_TYPE_NUMERIC)) {
- // get previous xref offset
- $prevxref = intval($v[1]);
- } elseif (($key == '/W') AND ($v[0] == PDF_TYPE_ARRAY)) {
- // number of bytes (in the decoded stream) of the corresponding field
- $wb = array();
- $wb[0] = intval($v[1][0][1]);
- $wb[1] = intval($v[1][1][1]);
- $wb[2] = intval($v[1][2][1]);
- } elseif (($key == '/DecodeParms') AND ($v[0] == PDF_TYPE_DICTIONARY)) {
- $decpar = $v[1];
- foreach ($decpar as $kdc => $vdc) {
- if (($kdc == '/Columns') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
- $columns = intval($vdc[1]);
- } elseif (($kdc == '/Predictor') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
- $predictor = intval($vdc[1]);
- }
- }
- } elseif ($filltrailer) {
- switch($key) {
- case '/Size':
- case '/Root':
- case '/Info':
- case '/ID':
- $xref['trailer'][1][$key] = $v;
- break;
- default:
- break;
- }
- }
- }
- // decode data
- $obj_num = 0;
- if ($valid_crs AND isset($xrefcrs[1][3][0])) {
- // number of bytes in a row
- $rowlen = ($columns + 1);
- // convert the stream into an array of integers
- $sdata = unpack('C*', $xrefcrs[1][3][0]);
- // split the rows
- $sdata = array_chunk($sdata, $rowlen);
- // initialize decoded array
- $ddata = array();
- // initialize first row with zeros
- $prev_row = array_fill (0, $rowlen, 0);
- // for each row apply PNG unpredictor
- foreach ($sdata as $k => $row) {
- // initialize new row
- $ddata[$k] = array();
- // get PNG predictor value
- if (empty($predictor)) {
- $predictor = (10 + $row[0]);
- }
- // for each byte on the row
- for ($i=1; $i<=$columns; ++$i) {
- // new index
- $j = ($i - 1);
- $row_up = $prev_row[$j];
- if ($i == 1) {
- $row_left = 0;
- $row_upleft = 0;
- } else {
- $row_left = $row[($i - 1)];
- $row_upleft = $prev_row[($j - 1)];
- }
- switch ($predictor) {
- case 1: // No prediction (equivalent to PNG None)
- case 10: { // PNG prediction (on encoding, PNG None on all rows)
- $ddata[$k][$j] = $row[$i];
- break;
- }
- case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
- $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
- break;
- }
- case 12: { // PNG prediction (on encoding, PNG Up on all rows)
- $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
- break;
- }
- case 13: { // PNG prediction (on encoding, PNG Average on all rows)
- $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
- break;
- }
- case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
- // initial estimate
- $p = ($row_left + $row_up - $row_upleft);
- // distances
- $pa = abs($p - $row_left);
- $pb = abs($p - $row_up);
- $pc = abs($p - $row_upleft);
- $pmin = min($pa, $pb, $pc);
- // return minumum distance
- switch ($pmin) {
- case $pa: {
- $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
- break;
- }
- case $pb: {
- $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
- break;
- }
- case $pc: {
- $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
- break;
- }
- }
- break;
- }
- default: { // PNG prediction (on encoding, PNG optimum)
- $this->Error("Unknown PNG predictor $predictor");
- break;
- }
- }
- }
- $prev_row = $ddata[$k];
- } // end for each row
- // complete decoding
- unset($sdata);
- $sdata = array();
- // for every row
- foreach ($ddata as $k => $row) {
- // initialize new row
- $sdata[$k] = array(0, 0, 0);
- if ($wb[0] == 0) {
- // default type field
- $sdata[$k][0] = 1;
- }
- $i = 0; // count bytes on the row
- // for every column
- for ($c = 0; $c < 3; ++$c) {
- // for every byte on the column
- for ($b = 0; $b < $wb[$c]; ++$b) {
- $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
- ++$i;
- }
- }
- }
- unset($ddata);
- // fill xref
- if (isset($index_first)) {
- $obj_num = $index_first;
- } else {
- $obj_num = 0;
- }
- foreach ($sdata as $k => $row) {
- switch ($row[0]) {
- case 0: { // (f) linked list of free objects
- ++$obj_num;
- break;
- }
- case 1: { // (n) objects that are in use but are not compressed
- // create unique object index: [object number]_[generation number]
- $index = $obj_num.'_'.$row[2];
- // check if object already exist
- if (!isset($xref['xref'][$obj_num][$row[2]])) {
- // store object offset position
- $xref['xref'][$obj_num][$row[2]] = $row[1];
- }
- ++$obj_num;
- break;
- }
- case 2: { // compressed objects
- // $row[1] = object number of the object stream in which this object is stored
- // $row[2] = index of this object within the object stream
- /*$index = $row[1].'_0_'.$row[2];
- $xref['xref'][$row[1]][0][$row[2]] = -1;*/
- break;
- }
- default: { // null objects
- break;
- }
- }
- }
- } // end decoding data
- $xref['max_object'] = max($xref['max_object'], $obj_num);
- if (isset($prevxref)) {
- // get previous xref
- $xref = $this->getXrefData($prevxref, $xref);
- }
- return $xref;
- }
+ }
+ if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
+ // get only the last updated version
+ $xref['trailer'] = array();
+ $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
+ $xref['trailer'][1] = array();
+ $filltrailer = true;
+ } else {
+ $filltrailer = false;
+ }
+ $valid_crs = false;
+ $sarr = $xrefcrs[0][1];
+ $keys = array_keys($sarr);
+ $columns = 1; // Default as per PDF 32000-1:2008.
+ $predictor = 1; // Default as per PDF 32000-1:2008.
+ foreach ($keys as $k=>$key) {
+ $v = $sarr[$key];
+ if (($key == '/Type') AND ($v[0] == PDF_TYPE_TOKEN AND ($v[1] == 'XRef'))) {
+ $valid_crs = true;
+ } elseif (($key == '/Index') AND ($v[0] == PDF_TYPE_ARRAY AND count($v[1]) >= 2)) {
+ // first object number in the subsection
+ $index_first = intval($v[1][0][1]);
+ // number of entries in the subsection
+ $index_entries = intval($v[1][1][1]);
+ } elseif (($key == '/Prev') AND ($v[0] == PDF_TYPE_NUMERIC)) {
+ // get previous xref offset
+ $prevxref = intval($v[1]);
+ } elseif (($key == '/W') AND ($v[0] == PDF_TYPE_ARRAY)) {
+ // number of bytes (in the decoded stream) of the corresponding field
+ $wb = array();
+ $wb[0] = intval($v[1][0][1]);
+ $wb[1] = intval($v[1][1][1]);
+ $wb[2] = intval($v[1][2][1]);
+ } elseif (($key == '/DecodeParms') AND ($v[0] == PDF_TYPE_DICTIONARY)) {
+ $decpar = $v[1];
+ foreach ($decpar as $kdc => $vdc) {
+ if (($kdc == '/Columns') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
+ $columns = intval($vdc[1]);
+ } elseif (($kdc == '/Predictor') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
+ $predictor = intval($vdc[1]);
+ }
+ }
+ } elseif ($filltrailer) {
+ switch($key) {
+ case '/Size':
+ case '/Root':
+ case '/Info':
+ case '/ID':
+ $xref['trailer'][1][$key] = $v;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ // decode data
+ $obj_num = 0;
+ if ($valid_crs AND isset($xrefcrs[1][3][0])) {
+ // number of bytes in a row
+ $rowlen = ($columns + 1);
+ // convert the stream into an array of integers
+ $sdata = unpack('C*', $xrefcrs[1][3][0]);
+ // split the rows
+ $sdata = array_chunk($sdata, $rowlen);
+ // initialize decoded array
+ $ddata = array();
+ // initialize first row with zeros
+ $prev_row = array_fill (0, $rowlen, 0);
+ // for each row apply PNG unpredictor
+ foreach ($sdata as $k => $row) {
+ // initialize new row
+ $ddata[$k] = array();
+ // get PNG predictor value
+ if (empty($predictor)) {
+ $predictor = (10 + $row[0]);
+ }
+ // for each byte on the row
+ for ($i=1; $i<=$columns; ++$i) {
+ if (!isset($row[$i])) {
+ // No more data in this row - we're done here.
+ break;
+ }
+ // new index
+ $j = ($i - 1);
+ $row_up = $prev_row[$j];
+ if ($i == 1) {
+ $row_left = 0;
+ $row_upleft = 0;
+ } else {
+ $row_left = $row[($i - 1)];
+ $row_upleft = $prev_row[($j - 1)];
+ }
+ switch ($predictor) {
+ case 1: // No prediction (equivalent to PNG None)
+ case 10: { // PNG prediction (on encoding, PNG None on all rows)
+ $ddata[$k][$j] = $row[$i];
+ break;
+ }
+ case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
+ $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
+ break;
+ }
+ case 12: { // PNG prediction (on encoding, PNG Up on all rows)
+ $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
+ break;
+ }
+ case 13: { // PNG prediction (on encoding, PNG Average on all rows)
+ $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
+ break;
+ }
+ case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
+ // initial estimate
+ $p = ($row_left + $row_up - $row_upleft);
+ // distances
+ $pa = abs($p - $row_left);
+ $pb = abs($p - $row_up);
+ $pc = abs($p - $row_upleft);
+ $pmin = min($pa, $pb, $pc);
+ // return minumum distance
+ switch ($pmin) {
+ case $pa: {
+ $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
+ break;
+ }
+ case $pb: {
+ $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
+ break;
+ }
+ case $pc: {
+ $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
+ break;
+ }
+ }
+ break;
+ }
+ default: { // PNG prediction (on encoding, PNG optimum)
+ $this->Error("Unknown PNG predictor $predictor");
+ break;
+ }
+ }
+ }
+ $prev_row = $ddata[$k];
+ } // end for each row
+ // complete decoding
+ unset($sdata);
+ $sdata = array();
+ // for every row
+ foreach ($ddata as $k => $row) {
+ // initialize new row
+ $sdata[$k] = array(0, 0, 0);
+ if ($wb[0] == 0) {
+ // default type field
+ $sdata[$k][0] = 1;
+ }
+ $i = 0; // count bytes on the row
+ // for every column
+ for ($c = 0; $c < 3; ++$c) {
+ // for every byte on the column
+ for ($b = 0; $b < $wb[$c]; ++$b) {
+ if (isset($row[$i])) {
+ $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
+ }
+ ++$i;
+ }
+ }
+ }
+ unset($ddata);
+ // fill xref
+ if (isset($index_first)) {
+ $obj_num = $index_first;
+ } else {
+ $obj_num = 0;
+ }
+ foreach ($sdata as $k => $row) {
+ switch ($row[0]) {
+ case 0: { // (f) linked list of free objects
+ ++$obj_num;
+ break;
+ }
+ case 1: { // (n) objects that are in use but are not compressed
+ // create unique object index: [object number]_[generation number]
+ $index = $obj_num.'_'.$row[2];
+ // check if object already exist
+ if (!isset($xref['xref'][$obj_num][$row[2]])) {
+ // store object offset position
+ $xref['xref'][$obj_num][$row[2]] = $row[1];
+ }
+ ++$obj_num;
+ break;
+ }
+ case 2: { // compressed objects
+ // $row[1] = object number of the object stream in which this object is stored
+ // $row[2] = index of this object within the object stream
+ /*$index = $row[1].'_0_'.$row[2];
+ $xref['xref'][$row[1]][0][$row[2]] = -1;*/
+ break;
+ }
+ default: { // null objects
+ break;
+ }
+ }
+ }
+ } // end decoding data
+ $xref['max_object'] = max($xref['max_object'], $obj_num);
+ if (isset($prevxref)) {
+ // get previous xref
+ $xref = $this->getXrefData($prevxref, $xref);
+ }
+ return $xref;
+ }
- /**
- * Get raw stream data
- * @param $offset (int) Stream offset.
- * @param $length (int) Stream length.
- * @return string Steam content
- * @protected
- */
- protected function getRawStream($offset, $length) {
- $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
- $offset += 6; // "stream"
- $offset += strspn($this->pdfdata, "\r\n", $offset);
+ /**
+ * Get raw stream data
+ * @param $offset (int) Stream offset.
+ * @param $length (int) Stream length.
+ * @return string Steam content
+ * @protected
+ */
+ protected function getRawStream($offset, $length) {
+ $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
+ $offset += 6; // "stream"
+ $offset += strspn($this->pdfdata, "\x20", $offset);
+ $offset += strspn($this->pdfdata, "\r\n", $offset);
- $obj = array();
- $obj[] = PDF_TYPE_STREAM;
- $obj[] = substr($this->pdfdata, $offset, $length);
+ $obj = array();
+ $obj[] = PDF_TYPE_STREAM;
+ $obj[] = substr($this->pdfdata, $offset, $length);
- return array($obj, $offset+$length);
- }
+ return array($obj, $offset+$length);
+ }
- /**
- * Get object type, raw value and offset to next object
- * @param $offset (int) Object offset.
- * @return array containing object type, raw value and offset to next object
- * @protected
- * @since 1.0.000 (2011-06-20)
- */
- protected function getRawObject($offset=0, $data=null) {
- if ($data == null) {
- $data =& $this->pdfdata;
- }
- $objtype = ''; // object type to be returned
- $objval = ''; // object value to be returned
- // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
+ /**
+ * Get object type, raw value and offset to next object
+ * @param $offset (int) Object offset.
+ * @return array containing object type, raw value and offset to next object
+ * @protected
+ * @since 1.0.000 (2011-06-20)
+ */
+ protected function getRawObject($offset=0, $data=null) {
+ if ($data == null) {
+ $data =& $this->pdfdata;
+ }
+ $objtype = ''; // object type to be returned
+ $objval = ''; // object value to be returned
+ // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
while (strspn($data[$offset], "\x00\x09\x0a\x0c\x0d\x20") == 1) {
- $offset++;
- }
- // get first char
+ $offset++;
+ }
+ // get first char
$char = $data[$offset];
- // get object type
- switch ($char) {
- case '%': { // \x25 PERCENT SIGN
- // skip comment and search for next token
- $next = strcspn($data, "\r\n", $offset);
- if ($next > 0) {
- $offset += $next;
- return $this->getRawObject($offset, $data);
- }
- break;
- }
- case '/': { // \x2F SOLIDUS
- // name object
- $objtype = PDF_TYPE_TOKEN;
- ++$offset;
- $length = strcspn($data, "\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset);
- $objval = substr($data, $offset, $length);
- $offset += $length;
- break;
- }
- case '(': // \x28 LEFT PARENTHESIS
- case ')': { // \x29 RIGHT PARENTHESIS
- // literal string object
- $objtype = PDF_TYPE_STRING;
- ++$offset;
- $strpos = $offset;
- if ($char == '(') {
- $open_bracket = 1;
- while ($open_bracket > 0) {
+ // get object type
+ switch ($char) {
+ case '%': { // \x25 PERCENT SIGN
+ // skip comment and search for next token
+ $next = strcspn($data, "\r\n", $offset);
+ if ($next > 0) {
+ $offset += $next;
+ list($obj, $unused) = $this->getRawObject($offset, $data);
+ return $obj;
+ }
+ break;
+ }
+ case '/': { // \x2F SOLIDUS
+ // name object
+ $objtype = PDF_TYPE_TOKEN;
+ ++$offset;
+ $length = strcspn($data, "\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset);
+ $objval = substr($data, $offset, $length);
+ $offset += $length;
+ break;
+ }
+ case '(': // \x28 LEFT PARENTHESIS
+ case ')': { // \x29 RIGHT PARENTHESIS
+ // literal string object
+ $objtype = PDF_TYPE_STRING;
+ ++$offset;
+ $strpos = $offset;
+ if ($char == '(') {
+ $open_bracket = 1;
+ while ($open_bracket > 0) {
if (!isset($data[$strpos])) {
- break;
- }
+ break;
+ }
$ch = $data[$strpos];
- switch ($ch) {
- case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
- // skip next character
- ++$strpos;
- break;
- }
- case '(': { // LEFT PARENHESIS (28h)
- ++$open_bracket;
- break;
- }
- case ')': { // RIGHT PARENTHESIS (29h)
- --$open_bracket;
- break;
- }
- }
- ++$strpos;
- }
- $objval = substr($data, $offset, ($strpos - $offset - 1));
- $offset = $strpos;
- }
- break;
- }
- case '[': // \x5B LEFT SQUARE BRACKET
- case ']': { // \x5D RIGHT SQUARE BRACKET
- // array object
- $objtype = PDF_TYPE_ARRAY;
- ++$offset;
- if ($char == '[') {
- // get array content
- $objval = array();
- do {
- // get element
- list($element, $offset) = $this->getRawObject($offset, $data);
- $objval[] = $element;
- } while ($element[0] !== ']');
- // remove closing delimiter
- array_pop($objval);
- } else {
- $objtype = ']';
- }
- break;
- }
- case '<': // \x3C LESS-THAN SIGN
- case '>': { // \x3E GREATER-THAN SIGN
+ switch ($ch) {
+ case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
+ // skip next character
+ ++$strpos;
+ break;
+ }
+ case '(': { // LEFT PARENHESIS (28h)
+ ++$open_bracket;
+ break;
+ }
+ case ')': { // RIGHT PARENTHESIS (29h)
+ --$open_bracket;
+ break;
+ }
+ }
+ ++$strpos;
+ }
+ $objval = substr($data, $offset, ($strpos - $offset - 1));
+ $offset = $strpos;
+ }
+ break;
+ }
+ case '[': // \x5B LEFT SQUARE BRACKET
+ case ']': { // \x5D RIGHT SQUARE BRACKET
+ // array object
+ $objtype = PDF_TYPE_ARRAY;
+ ++$offset;
+ if ($char == '[') {
+ // get array content
+ $objval = array();
+ do {
+ // get element
+ list($element, $offset) = $this->getRawObject($offset, $data);
+ $objval[] = $element;
+ } while ($element[0] !== ']');
+ // remove closing delimiter
+ array_pop($objval);
+ } else {
+ $objtype = ']';
+ }
+ break;
+ }
+ case '<': // \x3C LESS-THAN SIGN
+ case '>': { // \x3E GREATER-THAN SIGN
if (isset($data[($offset + 1)]) AND ($data[($offset + 1)] == $char)) {
- // dictionary object
- $objtype = PDF_TYPE_DICTIONARY;
- if ($char == '<') {
- list ($objval, $offset) = $this->getDictValue($offset, $data);
- } else {
- $objtype = '>>';
- $offset += 2;
- }
- } else {
- // hexadecimal string object
- $objtype = PDF_TYPE_HEX;
- ++$offset;
- // The "Panose" entry in the FontDescriptor Style dict seems to have hex bytes separated by spaces.
- if (($char == '<') AND (preg_match('/^([0-9A-Fa-f ]+)[>]/iU', substr($data, $offset), $matches) == 1)) {
- $objval = $matches[1];
- $offset += strlen($matches[0]);
- unset($matches);
- }
- }
- break;
- }
- default: {
+ // dictionary object
+ $objtype = PDF_TYPE_DICTIONARY;
+ if ($char == '<') {
+ list ($objval, $offset) = $this->getDictValue($offset, $data);
+ } else {
+ $objtype = '>>';
+ $offset += 2;
+ }
+ } else {
+ // hexadecimal string object
+ $objtype = PDF_TYPE_HEX;
+ ++$offset;
+ // The "Panose" entry in the FontDescriptor Style dict seems to have hex bytes separated by spaces.
+ if (($char == '<') AND (preg_match('/^([0-9A-Fa-f ]+)[>]/iU', substr($data, $offset), $matches) == 1)) {
+ $objval = $matches[1];
+ $offset += strlen($matches[0]);
+ unset($matches);
+ }
+ }
+ break;
+ }
+ default: {
$frag = $data[$offset] . @$data[$offset+1] . @$data[$offset+2] . @$data[$offset+3];
- switch ($frag) {
- case 'endo':
- // indirect object
- $objtype = 'endobj';
- $offset += 6;
- break;
- case 'stre':
- // Streams should always be indirect objects, and thus processed by getRawStream().
- // If we get here, treat it as a null object as something has gone wrong.
- case 'null':
- // null object
- $objtype = PDF_TYPE_NULL;
- $offset += 4;
- $objval = 'null';
- break;
- case 'true':
- // boolean true object
- $objtype = PDF_TYPE_BOOLEAN;
- $offset += 4;
- $objval = true;
- break;
- case 'fals':
- // boolean false object
- $objtype = PDF_TYPE_BOOLEAN;
- $offset += 5;
- $objval = false;
- break;
- case 'ends':
- // end stream object
- $objtype = 'endstream';
- $offset += 9;
- break;
- default:
- if (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+([Robj]{1,3})/i', substr($data, $offset, 33), $matches) == 1) {
- if ($matches[3] == 'R') {
- // indirect object reference
- $objtype = PDF_TYPE_OBJREF;
- $offset += strlen($matches[0]);
- $objval = array(intval($matches[1]), intval($matches[2]));
- } elseif ($matches[3] == 'obj') {
- // object start
- $objtype = PDF_TYPE_OBJECT;
- $objval = intval($matches[1]).'_'.intval($matches[2]);
- $offset += strlen ($matches[0]);
- }
- } elseif (($numlen = strspn($data, '+-.0123456789', $offset)) > 0) {
- // numeric object
- $objval = substr($data, $offset, $numlen);
- $objtype = (intval($objval) != $objval) ? PDF_TYPE_REAL : PDF_TYPE_NUMERIC;
- $offset += $numlen;
- }
- unset($matches);
- break;
- }
- break;
- }
- }
- $obj = array();
- $obj[] = $objtype;
- if ($objtype == PDF_TYPE_OBJREF && is_array($objval)) {
- foreach ($objval as $val) {
- $obj[] = $val;
- }
- } else {
- $obj[] = $objval;
- }
- return array($obj, $offset);
- }
- private function getDictValue($offset, &$data) {
- $objval = array();
+ switch ($frag) {
+ case 'endo':
+ // indirect object
+ $objtype = 'endobj';
+ $offset += 6;
+ break;
+ case 'stre':
+ // Streams should always be indirect objects, and thus processed by getRawStream().
+ // If we get here, treat it as a null object as something has gone wrong.
+ case 'null':
+ // null object
+ $objtype = PDF_TYPE_NULL;
+ $offset += 4;
+ $objval = 'null';
+ break;
+ case 'true':
+ // boolean true object
+ $objtype = PDF_TYPE_BOOLEAN;
+ $offset += 4;
+ $objval = true;
+ break;
+ case 'fals':
+ // boolean false object
+ $objtype = PDF_TYPE_BOOLEAN;
+ $offset += 5;
+ $objval = false;
+ break;
+ case 'ends':
+ // end stream object
+ $objtype = 'endstream';
+ $offset += 9;
+ break;
+ default:
+ if (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+([Robj]{1,3})/i', substr($data, $offset, 33), $matches) == 1) {
+ if ($matches[3] == 'R') {
+ // indirect object reference
+ $objtype = PDF_TYPE_OBJREF;
+ $offset += strlen($matches[0]);
+ $objval = array(intval($matches[1]), intval($matches[2]));
+ } elseif ($matches[3] == 'obj') {
+ // object start
+ $objtype = PDF_TYPE_OBJECT;
+ $objval = intval($matches[1]).'_'.intval($matches[2]);
+ $offset += strlen ($matches[0]);
+ }
+ } elseif (($numlen = strspn($data, '+-.0123456789', $offset)) > 0) {
+ // numeric object
+ $objval = substr($data, $offset, $numlen);
+ $objtype = (intval($objval) != $objval) ? PDF_TYPE_REAL : PDF_TYPE_NUMERIC;
+ $offset += $numlen;
+ }
+ unset($matches);
+ break;
+ }
+ break;
+ }
+ }
+ $obj = array();
+ $obj[] = $objtype;
+ if ($objtype == PDF_TYPE_OBJREF && is_array($objval)) {
+ foreach ($objval as $val) {
+ $obj[] = $val;
+ }
+ } else {
+ $obj[] = $objval;
+ }
+ return array($obj, $offset);
+ }
+ private function getDictValue($offset, &$data) {
+ $objval = array();
- // Extract dict from data.
- $i=1;
- $dict = '';
- $offset += 2;
- do {
+ // Extract dict from data.
+ $i=1;
+ $dict = '';
+ $offset += 2;
+ do {
if ($data[$offset] == '>' && $data[$offset+1] == '>') {
- $i--;
- $dict .= '>>';
- $offset += 2;
+ $i--;
+ $dict .= '>>';
+ $offset += 2;
} else if ($data[$offset] == '<' && $data[$offset+1] == '<') {
- $i++;
- $dict .= '<<';
- $offset += 2;
- } else {
+ $i++;
+ $dict .= '<<';
+ $offset += 2;
+ } else {
$dict .= $data[$offset];
- $offset++;
- }
- } while ($i>0);
+ $offset++;
+ }
+ } while ($i>0);
- // Now that we have just the dict, parse it.
- $dictoffset = 0;
- do {
- // Get dict element.
- list($key, $eloffset) = $this->getRawObject($dictoffset, $dict);
- if ($key[0] == '>>') {
- break;
- }
- list($element, $dictoffset) = $this->getRawObject($eloffset, $dict);
- $objval['/'.$key[1]] = $element;
- unset($key);
- unset($element);
- } while (true);
+ // Now that we have just the dict, parse it.
+ $dictoffset = 0;
+ do {
+ // Get dict element.
+ list($key, $eloffset) = $this->getRawObject($dictoffset, $dict);
+ if ($key[0] == '>>') {
+ break;
+ }
+ list($element, $dictoffset) = $this->getRawObject($eloffset, $dict);
+ $objval['/'.$key[1]] = $element;
+ unset($key);
+ unset($element);
+ } while (true);
- return array($objval, $offset);
- }
+ return array($objval, $offset);
+ }
- /**
- * Get content of indirect object.
- * @param $obj_ref (string) Object number and generation number separated by underscore character.
- * @param $offset (int) Object offset.
- * @param $decoding (boolean) If true decode streams.
- * @return array containing object data.
- * @protected
- * @since 1.0.000 (2011-05-24)
- */
- protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
- $obj = explode('_', $obj_ref);
- if (($obj === false) OR (count($obj) != 2)) {
- $this->Error('Invalid object reference: '.$obj);
- return;
- }
- $objref = $obj[0].' '.$obj[1].' obj';
+ /**
+ * Get content of indirect object.
+ * @param $obj_ref (string) Object number and generation number separated by underscore character.
+ * @param $offset (int) Object offset.
+ * @param $decoding (boolean) If true decode streams.
+ * @return array containing object data.
+ * @protected
+ * @since 1.0.000 (2011-05-24)
+ */
+ protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
+ $obj = explode('_', $obj_ref);
+ if (($obj === false) OR (count($obj) != 2)) {
+ $this->Error('Invalid object reference: '.$obj);
+ return;
+ }
+ $objref = $obj[0].' '.$obj[1].' obj';
- if (strpos($this->pdfdata, $objref, $offset) != $offset) {
- // an indirect reference to an undefined object shall be considered a reference to the null object
- return array('null', 'null', $offset);
- }
- // starting position of object content
- $offset += strlen($objref);
- // get array of object content
- $objdata = array();
- $i = 0; // object main index
- do {
- if (($i > 0) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY) AND array_key_exists('/Length', $objdata[($i - 1)][1])) {
- // Stream - get using /Length in stream's dict
- $lengthobj = $objdata[($i-1)][1]['/Length'];
- if ($lengthobj[0] === PDF_TYPE_OBJREF) {
- $lengthobj = $this->getObjectVal($lengthobj);
- if ($lengthobj[0] === PDF_TYPE_OBJECT) {
- $lengthobj = $lengthobj[1];
- }
- }
- $streamlength = $lengthobj[1];
- list($element, $offset) = $this->getRawStream($offset, $streamlength);
- } else {
- // get element
- list($element, $offset) = $this->getRawObject($offset);
- }
- // decode stream using stream's dictionary information
- if ($decoding AND ($element[0] == PDF_TYPE_STREAM) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY)) {
- $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
- }
- $objdata[$i] = $element;
- ++$i;
- } while ($element[0] != 'endobj');
- // remove closing delimiter
- array_pop($objdata);
- // return raw object content
- return $objdata;
- }
+ if (strpos($this->pdfdata, $objref, $offset) != $offset) {
+ // an indirect reference to an undefined object shall be considered a reference to the null object
+ return array('null', 'null', $offset);
+ }
+ // starting position of object content
+ $offset += strlen($objref);
+ // get array of object content
+ $objdata = array();
+ $i = 0; // object main index
+ do {
+ if (($i > 0) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY) AND array_key_exists('/Length', $objdata[($i - 1)][1])) {
+ // Stream - get using /Length in stream's dict
+ $lengthobj = $objdata[($i-1)][1]['/Length'];
+ if ($lengthobj[0] === PDF_TYPE_OBJREF) {
+ $lengthobj = $this->getObjectVal($lengthobj);
+ if ($lengthobj[0] === PDF_TYPE_OBJECT) {
+ $lengthobj = $lengthobj[1];
+ }
+ }
+ $streamlength = $lengthobj[1];
+ list($element, $offset) = $this->getRawStream($offset, $streamlength);
+ } else {
+ // get element
+ list($element, $offset) = $this->getRawObject($offset);
+ }
+ // decode stream using stream's dictionary information
+ if ($decoding AND ($element[0] == PDF_TYPE_STREAM) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY)) {
+ $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
+ }
+ $objdata[$i] = $element;
+ ++$i;
+ } while ($element[0] != 'endobj');
+ // remove closing delimiter
+ array_pop($objdata);
+ // return raw object content
+ return $objdata;
+ }
- /**
- * Get the content of object, resolving indect object reference if necessary.
- * @param $obj (string) Object value.
- * @return array containing object data.
- * @public
- * @since 1.0.000 (2011-06-26)
- */
- public function getObjectVal($obj) {
- if ($obj[0] == PDF_TYPE_OBJREF) {
- if (strpos($obj[1], '_') !== false) {
- $key = explode('_', $obj[1]);
- } else {
- $key = array($obj[1], $obj[2]);
- }
+ /**
+ * Get the content of object, resolving indect object reference if necessary.
+ * @param $obj (string) Object value.
+ * @return array containing object data.
+ * @public
+ * @since 1.0.000 (2011-06-26)
+ */
+ public function getObjectVal($obj) {
+ if ($obj[0] == PDF_TYPE_OBJREF) {
+ if (strpos($obj[1], '_') !== false) {
+ $key = explode('_', $obj[1]);
+ } else {
+ $key = array($obj[1], $obj[2]);
+ }
- $ret = array(0=>PDF_TYPE_OBJECT, 'obj'=>$key[0], 'gen'=>$key[1]);
+ $ret = array(0=>PDF_TYPE_OBJECT, 'obj'=>$key[0], 'gen'=>$key[1]);
- // reference to indirect object
- $object = null;
- if (isset($this->objects[$key[0]][$key[1]])) {
- // this object has been already parsed
- $object = $this->objects[$key[0]][$key[1]];
- } elseif (($offset = $this->findObjectOffset($key)) !== false) {
- // parse new object
- $this->objects[$key[0]][$key[1]] = $this->getIndirectObject($key[0].'_'.$key[1], $offset, false);
- $object = $this->objects[$key[0]][$key[1]];
- } elseif (($key[1] == 0) && isset($this->objstreamobjs[$key[0]])) {
- // Object is in an object stream
- $streaminfo = $this->objstreamobjs[$key[0]];
- $objs = $streaminfo[0];
- if (!isset($this->objstreams[$objs[0]][$objs[1]])) {
- // Fetch and decode object stream
- $offset = $this->findObjectOffset($objs);;
- $objstream = $this->getObjectVal(array(PDF_TYPE_OBJREF, $objs[0], $objs[1]));
- $decoded = $this->decodeStream($objstream[1][1], $objstream[2][1]);
- $this->objstreams[$objs[0]][$objs[1]] = $decoded[0]; // Store just the data, in case we need more from this objstream
- // Free memory
- unset($objstream);
- unset($decoded);
- }
- $this->objects[$key[0]][$key[1]] = $this->getRawObject($streaminfo[1], $this->objstreams[$objs[0]][$objs[1]]);
- $object = $this->objects[$key[0]][$key[1]];
- }
- if (!is_null($object)) {
- $ret[1] = $object[0];
- if (isset($object[1][0]) && $object[1][0] == PDF_TYPE_STREAM) {
- $ret[0] = PDF_TYPE_STREAM;
- $ret[2] = $object[1];
- }
- return $ret;
- }
- }
- return $obj;
- }
+ // reference to indirect object
+ $object = null;
+ if (isset($this->objects[$key[0]][$key[1]])) {
+ // this object has been already parsed
+ $object = $this->objects[$key[0]][$key[1]];
+ } elseif (($offset = $this->findObjectOffset($key)) !== false) {
+ // parse new object
+ $this->objects[$key[0]][$key[1]] = $this->getIndirectObject($key[0].'_'.$key[1], $offset, false);
+ $object = $this->objects[$key[0]][$key[1]];
+ } elseif (($key[1] == 0) && isset($this->objstreamobjs[$key[0]])) {
+ // Object is in an object stream
+ $streaminfo = $this->objstreamobjs[$key[0]];
+ $objs = $streaminfo[0];
+ if (!isset($this->objstreams[$objs[0]][$objs[1]])) {
+ // Fetch and decode object stream
+ $offset = $this->findObjectOffset($objs);;
+ $objstream = $this->getObjectVal(array(PDF_TYPE_OBJREF, $objs[0], $objs[1]));
+ $decoded = $this->decodeStream($objstream[1][1], $objstream[2][1]);
+ $this->objstreams[$objs[0]][$objs[1]] = $decoded[0]; // Store just the data, in case we need more from this objstream
+ // Free memory
+ unset($objstream);
+ unset($decoded);
+ }
+ $this->objects[$key[0]][$key[1]] = $this->getRawObject($streaminfo[1], $this->objstreams[$objs[0]][$objs[1]]);
+ $object = $this->objects[$key[0]][$key[1]];
+ }
+ if (!is_null($object)) {
+ $ret[1] = $object[0];
+ if (isset($object[1][0]) && $object[1][0] == PDF_TYPE_STREAM) {
+ $ret[0] = PDF_TYPE_STREAM;
+ $ret[2] = $object[1];
+ }
+ return $ret;
+ }
+ }
+ return $obj;
+ }
/**
* Extract object stream to find out what it contains.
*
*/
function extractObjectStream($key) {
- $objref = array(PDF_TYPE_OBJREF, $key[0], $key[1]);
- $obj = $this->getObjectVal($objref);
- if ($obj[0] !== PDF_TYPE_STREAM || !isset($obj[1][1]['/First'][1])) {
- // Not a valid object stream dictionary - skip it.
- return;
- }
- $stream = $this->decodeStream($obj[1][1], $obj[2][1]);// Decode object stream, as we need the first bit
- $first = intval($obj[1][1]['/First'][1]);
- $ints = explode(' ', substr($stream[0], 0, $first)); // Get list of object / offset pairs
- for ($j=1; $jobjstreamobjs[$ints[$j-1]] = array($key, $ints[$j]+$first);
- }
- }
+ $objref = array(PDF_TYPE_OBJREF, $key[0], $key[1]);
+ $obj = $this->getObjectVal($objref);
+ if ($obj[0] !== PDF_TYPE_STREAM || !isset($obj[1][1]['/First'][1])) {
+ // Not a valid object stream dictionary - skip it.
+ return;
+ }
+ $stream = $this->decodeStream($obj[1][1], $obj[2][1]);// Decode object stream, as we need the first bit
+ $first = intval($obj[1][1]['/First'][1]);
+ $ints = preg_split('/\s/', substr($stream[0], 0, $first)); // Get list of object / offset pairs
+ for ($j=1; $jobjstreamobjs[$ints[$j-1]] = array($key, $ints[$j]+$first);
+ }
+ }
- // Free memory - we may not need this at all.
- unset($obj);
- unset($stream);
+ // Free memory - we may not need this at all.
+ unset($obj);
+ unset($stream);
}
- /**
- * Find all object offsets. Saves having to scour the file multiple times.
- * @private
- */
- private function findObjectOffsets() {
- $this->objoffsets = array();
- if (preg_match_all('/(*ANYCRLF)^[\s]*([0-9]+)[\s]+([0-9]+)[\s]+obj/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE) >= 1) {
- $i = 0;
- foreach($matches[0] as $match) {
- $offset = $match[1] + strspn($match[0], "\x00\x09\x0a\x0c\x0d\x20");
- $this->objoffsets[trim($match[0])] = $offset;
- $dictoffset = $match[1] + strlen($match[0]);
- if (preg_match('|^\s+<<[^>]+/ObjStm|', substr($this->pdfdata, $dictoffset, 256), $objstm) == 1) {
- $this->extractObjectStream(array($matches[1][$i][0], $matches[2][$i][0]));
- }
- $i++;
- }
- }
- unset($matches);
- }
+ /**
+ * Find all object offsets. Saves having to scour the file multiple times.
+ * @private
+ */
+ private function findObjectOffsets() {
+ $this->objoffsets = array();
+ if (preg_match_all('/(*ANYCRLF)^[\s]*([0-9]+)[\s]+([0-9]+)[\s]+obj/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE) >= 1) {
+ $i = 0;
+ $laststreamend = 0;
+ foreach($matches[0] as $match) {
+ $offset = $match[1] + strspn($match[0], "\x00\x09\x0a\x0c\x0d\x20");
+ if ($offset < $laststreamend) {
+ // Contained within another stream, skip it.
+ continue;
+ }
+ $this->objoffsets[trim($match[0])] = $offset;
+ $dictoffset = $match[1] + strlen($match[0]);
+ $dictfrag = substr($this->pdfdata, $dictoffset, 256);
+ if (preg_match('|^\s+<<[^>]+/Length\s+(\d+)|', $dictfrag, $lengthmatch, PREG_OFFSET_CAPTURE) == 1) {
+ $laststreamend += intval($lengthmatch[1][0]);
+ }
+ if (preg_match('|^\s+<<[^>]+/ObjStm|', $dictfrag, $objstm) == 1) {
+ $this->extractObjectStream(array($matches[1][$i][0], $matches[2][$i][0]));
+ }
+ $i++;
+ }
+ }
+ unset($lengthmatch);
+ unset($dictfrag);
+ unset($matches);
+ }
- /**
- * Get offset of an object. Checks xref first, then offsets found by scouring the file.
- * @param $key (array) Object key to find (obj, gen).
- * @return int Offset of the object in $this->pdfdata.
- * @private
- */
- private function findObjectOffset($key) {
- $objref = $key[0].' '.$key[1].' obj';
- if (isset($this->xref['xref'][$key[0]][$key[1]])) {
- $offset = $this->xref['xref'][$key[0]][$key[1]];
- if (strpos($this->pdfdata, $objref, $offset) === $offset) {
- // Offset is in xref table and matches actual position in file
- //echo "Offset in XREF is correct, returning
";
- return $this->xref['xref'][$key[0]][$key[1]];
- }
- }
- if (array_key_exists($objref, $this->objoffsets)) {
- //echo "Offset found in internal reftable
";
- return $this->objoffsets[$objref];
- }
- return false;
- }
+ /**
+ * Get offset of an object. Checks xref first, then offsets found by scouring the file.
+ * @param $key (array) Object key to find (obj, gen).
+ * @return int Offset of the object in $this->pdfdata.
+ * @private
+ */
+ private function findObjectOffset($key) {
+ $objref = $key[0].' '.$key[1].' obj';
+ if (isset($this->xref['xref'][$key[0]][$key[1]])) {
+ $offset = $this->xref['xref'][$key[0]][$key[1]];
+ if (strpos($this->pdfdata, $objref, $offset) === $offset) {
+ // Offset is in xref table and matches actual position in file
+ //echo "Offset in XREF is correct, returning
";
+ return $this->xref['xref'][$key[0]][$key[1]];
+ }
+ }
+ if (array_key_exists($objref, $this->objoffsets)) {
+ //echo "Offset found in internal reftable
";
+ return $this->objoffsets[$objref];
+ }
+ return false;
+ }
- /**
- * Decode the specified stream.
- * @param $sdic (array) Stream's dictionary array.
- * @param $stream (string) Stream to decode.
- * @return array containing decoded stream data and remaining filters.
- * @protected
- * @since 1.0.000 (2011-06-22)
- */
- protected function decodeStream($sdic, $stream) {
- // get stream lenght and filters
- $slength = strlen($stream);
- if ($slength <= 0) {
- return array('', array());
- }
- $filters = array();
- foreach ($sdic as $k => $v) {
- if ($v[0] == PDF_TYPE_TOKEN) {
- if (($k == '/Length') AND ($v[0] == PDF_TYPE_NUMERIC)) {
- // get declared stream lenght
- $declength = intval($v[1]);
- if ($declength < $slength) {
- $stream = substr($stream, 0, $declength);
- $slength = $declength;
- }
- } elseif ($k == '/Filter') {
- if ($v[0] == PDF_TYPE_TOKEN) {
- // single filter
- $filters[] = $v[1];
- } elseif ($v[0] == PDF_TYPE_ARRAY) {
- // array of filters
- foreach ($v[1] as $flt) {
- if ($flt[0] == PDF_TYPE_TOKEN) {
- $filters[] = $flt[1];
- }
- }
- }
- }
- }
- }
- // decode the stream
- $remaining_filters = array();
- foreach ($filters as $filter) {
- if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
- $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
- } else {
- // add missing filter to array
- $remaining_filters[] = $filter;
- }
- }
- return array($stream, $remaining_filters);
- }
+ /**
+ * Decode the specified stream.
+ * @param $sdic (array) Stream's dictionary array.
+ * @param $stream (string) Stream to decode.
+ * @return array containing decoded stream data and remaining filters.
+ * @protected
+ * @since 1.0.000 (2011-06-22)
+ */
+ protected function decodeStream($sdic, $stream) {
+ // get stream lenght and filters
+ $slength = strlen($stream);
+ if ($slength <= 0) {
+ return array('', array());
+ }
+ $filters = array();
+ foreach ($sdic as $k => $v) {
+ if ($v[0] == PDF_TYPE_TOKEN) {
+ if (($k == '/Length') AND ($v[0] == PDF_TYPE_NUMERIC)) {
+ // get declared stream lenght
+ $declength = intval($v[1]);
+ if ($declength < $slength) {
+ $stream = substr($stream, 0, $declength);
+ $slength = $declength;
+ }
+ } elseif ($k == '/Filter') {
+ if ($v[0] == PDF_TYPE_TOKEN) {
+ // single filter
+ $filters[] = $v[1];
+ } elseif ($v[0] == PDF_TYPE_ARRAY) {
+ // array of filters
+ foreach ($v[1] as $flt) {
+ if ($flt[0] == PDF_TYPE_TOKEN) {
+ $filters[] = $flt[1];
+ }
+ }
+ }
+ }
+ }
+ }
+ // decode the stream
+ $remaining_filters = array();
+ foreach ($filters as $filter) {
+ if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
+ $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
+ } else {
+ // add missing filter to array
+ $remaining_filters[] = $filter;
+ }
+ }
+ return array($stream, $remaining_filters);
+ }
/**
@@ -1178,27 +1197,63 @@ class tcpdi_parser {
* @param array $obj Array of pdf-data
*/
private function _getPageResources ($obj) { // $obj = /Page
- $obj = $this->getObjectVal($obj);
+ $obj = $this->getObjectVal($obj);
// If the current object has a resources
- // dictionary associated with it, we use
- // it. Otherwise, we move back to its
- // parent object.
+ // dictionary associated with it, we use
+ // it. Otherwise, we move back to its
+ // parent object.
if (isset ($obj[1][1]['/Resources'])) {
- $res = $obj[1][1]['/Resources'];
- if ($res[0] == PDF_TYPE_OBJECT)
+ $res = $obj[1][1]['/Resources'];
+ if ($res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
- } else {
- if (!isset ($obj[1][1]['/Parent'])) {
- return false;
- } else {
+ } else {
+ if (!isset ($obj[1][1]['/Parent'])) {
+ return false;
+ } else {
$res = $this->_getPageResources($obj[1][1]['/Parent']);
if ($res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
- }
- }
+ }
+ }
+ }
+
+ /**
+ * Get annotations from current page
+ *
+ * @return array
+ */
+ public function getPageAnnotations() {
+ return $this->_getPageAnnotations($this->pages[$this->pageno]);
+ }
+
+ /**
+ * Get annotations from /Page
+ *
+ * @param array $obj Array of pdf-data
+ */
+ private function _getPageAnnotations ($obj) { // $obj = /Page
+ $obj = $this->getObjectVal($obj);
+
+ // If the current object has an annotations
+ // dictionary associated with it, we use
+ // it. Otherwise, we move back to its
+ // parent object.
+ if (isset ($obj[1][1]['/Annots'])) {
+ $annots = $obj[1][1]['/Annots'];
+ } else {
+ if (!isset ($obj[1][1]['/Parent'])) {
+ return false;
+ } else {
+ $annots = $this->_getPageAnnotations($obj[1][1]['/Parent']);
+ }
+ }
+
+ if ($annots[0] == PDF_TYPE_OBJREF)
+ return $this->getObjectVal($annots);
+ return $annots;
}
@@ -1276,7 +1331,7 @@ class tcpdi_parser {
$stream = $obj[2][1];
foreach ($filters AS $_filter) {
- $stream = $this->FilterDecoders->decodeFilter($_filter[1], $stream);
+ $stream = $this->FilterDecoders->decodeFilter($_filter[1], $stream);
}
return $stream;
@@ -1361,34 +1416,34 @@ class tcpdi_parser {
}
private function _getPageRotation($obj) { // $obj = /Page
- $obj = $this->getObjectVal($obj);
- if (isset ($obj[1][1]['/Rotate'])) {
- $res = $this->getObjectVal($obj[1][1]['/Rotate']);
+ $obj = $this->getObjectVal($obj);
+ if (isset ($obj[1][1]['/Rotate'])) {
+ $res = $this->getObjectVal($obj[1][1]['/Rotate']);
if (isset($res[0]) && $res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
- } else {
- if (!isset ($obj[1][1]['/Parent'])) {
- return false;
- } else {
+ } else {
+ if (!isset ($obj[1][1]['/Parent'])) {
+ return false;
+ } else {
$res = $this->_getPageRotation($obj[1][1]['/Parent']);
if (isset($res[0]) && $res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
- }
- }
+ }
+ }
}
- /**
- * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
- * @param $msg (string) The error message
- * @public
- * @since 1.0.000 (2011-05-23)
- */
- public function Error($msg) {
- // exit program and print error
- die('TCPDF_PARSER ERROR: '.$msg);
- }
+ /**
+ * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
+ * @param $msg (string) The error message
+ * @public
+ * @since 1.0.000 (2011-05-23)
+ */
+ public function Error($msg) {
+ // exit program and print error
+ die("TCPDI_PARSER ERROR [{$this->uniqueid}]: ".$msg);
+ }
} // END OF TCPDF_PARSER CLASS