w:@Cz薺\ڰ fFl9=.Y0t`𕔓Eh/&\6\8 #DUab(XQ2X޳3{6#Jefϖ#y/nZN5H1} 0Q0^_Rcʜ}xafVg[  )v'o aȽ,4I :\$Pd[+{5ǬD** g\FouM(|ةDHTtJruŒ;m6d_ cܹ5p"şSJbuŸu%c5-턓nZpL?κw@cQDZ4rǣ_0Ee m'I @ įrVA/rzZyTąDU*lٺU ޴ӣ7;02w LGضyD,ʞ¢ x {ڽŒQo5w2WKYwăE?]S[mt&tqyڜo;㓨-煺N/庖MrL>}ޅa,ZdU3C4Z_t|Iw^w=<,ā'&kO2=+~GD5tPbXaɞԼ#g".-o8E&>+Nwl?Ajt.D.n!apV@н/W63iM7rI'z;wZDnc)f'r<NAjQҝN,<ӯQ]$[XuX]̸VUF*I^|X=<Dj"!{bǷMVa @䰅,pV頑!HO<5-ʎߓMpr"۰n2Hm.qC/'  .ORFԀvwS!vg@4W2kʙ3]ʸT1 ZQg I jNl.*y^BinHx1~cecMt}?%:Szi//Yҡ.ą##Y3ѫ;=2aEڼedtMxF:U RɀG54="@o3DR-0Ȝ6+𦟠X,_.$b,Du~@RD̴oORZfXsZ޴q&JE}~_lgO|Φ= $9ohl?ċE{[#O#+~ 6ӟfȇ1lp9E]FR)xY7mJ|Ġ2JQy s@DpvKw,@dPsF4X)6k+|QAl]aOI6ML6ݷ88%TFӀX̲!~¶]9ĵo.~Ckg$ >: 'rFK67ZyȂ ŧj q1H#aW8!K$xuWg&+-N ET=%֣Cޡ0%moWY,BXG:6k%#(j$'W);6x|43]ho{2F44 bE&m%ph3q%1b{Z=f62., 7 _OE=o#yu{ l'(c ԉBlDQ[:-lPDQ#B1C.<6T?]sB:nmyrk{OO=CDD%VH=@5.o~."bj /ruBxVJUMDQ ǪBgt</6ܵ$zfSoS铘{*0AbN%s, sM8+-2ߺ*sdu?a3|bgoN(C8DkT27apX]{Ѻ}?KTT/A'O`0`SwFxb4„QDZ*>U_te.\ sd1* !>zߤparseError('Received null character.'); $this->text .= $tok; $this->scanner->consume(); break; } $this->text .= $this->scanner->charsUntil("<&\0"); } } return $this->carryOn; } /** * Parse anything that looks like character data. * * Different rules apply based on the current text mode. * * @see Elements::TEXT_RAW Elements::TEXT_RCDATA. */ protected function characterData() { $tok = $this->scanner->current(); if (false === $tok) { return false; } switch ($this->textMode) { case Elements::TEXT_RAW: return $this->rawText($tok); case Elements::TEXT_RCDATA: return $this->rcdata($tok); default: if ('<' === $tok || '&' === $tok) { return false; } return $this->text($tok); } } /** * This buffers the current token as character data. * * @param string $tok The current token. * * @return bool */ protected function text($tok) { // This should never happen... if (false === $tok) { return false; } // NULL character if ("\00" === $tok) { $this->parseError('Received null character.'); } $this->buffer($tok); $this->scanner->consume(); return true; } /** * Read text in RAW mode. * * @param string $tok The current token. * * @return bool */ protected function rawText($tok) { if (is_null($this->untilTag)) { return $this->text($tok); } $sequence = 'untilTag . '>'; $txt = $this->readUntilSequence($sequence); $this->events->text($txt); $this->setTextMode(0); return $this->endTag(); } /** * Read text in RCDATA mode. * * @param string $tok The current token. * * @return bool */ protected function rcdata($tok) { if (is_null($this->untilTag)) { return $this->text($tok); } $sequence = 'untilTag; $txt = ''; $caseSensitive = !Elements::isHtml5Element($this->untilTag); while (false !== $tok && !('<' == $tok && ($this->scanner->sequenceMatches($sequence, $caseSensitive)))) { if ('&' == $tok) { $txt .= $this->decodeCharacterReference(); $tok = $this->scanner->current(); } else { $txt .= $tok; $tok = $this->scanner->next(); } } $len = strlen($sequence); $this->scanner->consume($len); $len += $this->scanner->whitespace(); if ('>' !== $this->scanner->current()) { $this->parseError('Unclosed RCDATA end tag'); } $this->scanner->unconsume($len); $this->events->text($txt); $this->setTextMode(0); return $this->endTag(); } /** * If the document is read, emit an EOF event. */ protected function eof() { // fprintf(STDOUT, "EOF"); $this->flushBuffer(); $this->events->eof(); $this->carryOn = false; } /** * Look for markup. */ protected function markupDeclaration() { $tok = $this->scanner->next(); // Comment: if ('-' == $tok && '-' == $this->scanner->peek()) { $this->scanner->consume(2); return $this->comment(); } elseif ('D' == $tok || 'd' == $tok) { // Doctype return $this->doctype(); } elseif ('[' == $tok) { // CDATA section return $this->cdataSection(); } // FINISH $this->parseError('Expected . Emit an empty comment because 8.2.4.46 says to. if ('>' == $tok) { // Parse error. Emit the comment token. $this->parseError("Expected comment data, got '>'"); $this->events->comment(''); $this->scanner->consume(); return true; } // Replace NULL with the replacement char. if ("\0" == $tok) { $tok = UTF8Utils::FFFD; } while (!$this->isCommentEnd()) { $comment .= $tok; $tok = $this->scanner->next(); } $this->events->comment($comment); $this->scanner->consume(); return true; } /** * Check if the scanner has reached the end of a comment. * * @return bool */ protected function isCommentEnd() { $tok = $this->scanner->current(); // EOF if (false === $tok) { // Hit the end. $this->parseError('Unexpected EOF in a comment.'); return true; } // If next two tokens are not '--', not the end. if ('-' != $tok || '-' != $this->scanner->peek()) { return false; } $this->scanner->consume(2); // Consume '-' and one of '!' or '>' // Test for '>' if ('>' == $this->scanner->current()) { return true; } // Test for '!>' if ('!' == $this->scanner->current() && '>' == $this->scanner->peek()) { $this->scanner->consume(); // Consume the last '>' return true; } // Unread '-' and one of '!' or '>'; $this->scanner->unconsume(2); return false; } /** * Parse a DOCTYPE. * * Parse a DOCTYPE declaration. This method has strong bearing on whether or * not Quirksmode is enabled on the event handler. * * @todo This method is a little long. Should probably refactor. * * @return bool */ protected function doctype() { // Check that string is DOCTYPE. if ($this->scanner->sequenceMatches('DOCTYPE', false)) { $this->scanner->consume(7); } else { $chars = $this->scanner->charsWhile('DOCTYPEdoctype'); $this->parseError('Expected DOCTYPE, got %s', $chars); return $this->bogusComment('scanner->whitespace(); $tok = $this->scanner->current(); // EOF: die. if (false === $tok) { $this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true); $this->eof(); return true; } // NULL char: convert. if ("\0" === $tok) { $this->parseError('Unexpected null character in DOCTYPE.'); } $stop = " \n\f>"; $doctypeName = $this->scanner->charsUntil($stop); // Lowercase ASCII, replace \0 with FFFD $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD)); $tok = $this->scanner->current(); // If false, emit a parse error, DOCTYPE, and return. if (false === $tok) { $this->parseError('Unexpected EOF in DOCTYPE declaration.'); $this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, null, true); return true; } // Short DOCTYPE, like if ('>' == $tok) { // DOCTYPE without a name. if (0 == strlen($doctypeName)) { $this->parseError('Expected a DOCTYPE name. Got nothing.'); $this->events->doctype($doctypeName, 0, null, true); $this->scanner->consume(); return true; } $this->events->doctype($doctypeName); $this->scanner->consume(); return true; } $this->scanner->whitespace(); $pub = strtoupper($this->scanner->getAsciiAlpha()); $white = $this->scanner->whitespace(); // Get ID, and flag it as pub or system. if (('PUBLIC' == $pub || 'SYSTEM' == $pub) && $white > 0) { // Get the sys ID. $type = 'PUBLIC' == $pub ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM; $id = $this->quotedString("\0>"); if (false === $id) { $this->events->doctype($doctypeName, $type, $pub, false); return true; } // Premature EOF. if (false === $this->scanner->current()) { $this->parseError('Unexpected EOF in DOCTYPE'); $this->events->doctype($doctypeName, $type, $id, true); return true; } // Well-formed complete DOCTYPE. $this->scanner->whitespace(); if ('>' == $this->scanner->current()) { $this->events->doctype($doctypeName, $type, $id, false); $this->scanner->consume(); return true; } // If we get here, we have scanner->charsUntil('>'); $this->parseError('Malformed DOCTYPE.'); $this->events->doctype($doctypeName, $type, $id, true); $this->scanner->consume(); return true; } // Else it's a bogus DOCTYPE. // Consume to > and trash. $this->scanner->charsUntil('>'); $this->parseError('Expected PUBLIC or SYSTEM. Got %s.', $pub); $this->events->doctype($doctypeName, 0, null, true); $this->scanner->consume(); return true; } /** * Utility for reading a quoted string. * * @param string $stopchars Characters (in addition to a close-quote) that should stop the string. * E.g. sometimes '>' is higher precedence than '"' or "'". * * @return mixed String if one is found (quotations omitted). */ protected function quotedString($stopchars) { $tok = $this->scanner->current(); if ('"' == $tok || "'" == $tok) { $this->scanner->consume(); $ret = $this->scanner->charsUntil($tok . $stopchars); if ($this->scanner->current() == $tok) { $this->scanner->consume(); } else { // Parse error because no close quote. $this->parseError('Expected %s, got %s', $tok, $this->scanner->current()); } return $ret; } return false; } /** * Handle a CDATA section. * * @return bool */ protected function cdataSection() { $cdata = ''; $this->scanner->consume(); $chars = $this->scanner->charsWhile('CDAT'); if ('CDATA' != $chars || '[' != $this->scanner->current()) { $this->parseError('Expected [CDATA[, got %s', $chars); return $this->bogusComment('scanner->next(); do { if (false === $tok) { $this->parseError('Unexpected EOF inside CDATA.'); $this->bogusComment('scanner->next(); } while (!$this->scanner->sequenceMatches(']]>')); // Consume ]]> $this->scanner->consume(3); $this->events->cdata($cdata); return true; } // ================================================================ // Non-HTML5 // ================================================================ /** * Handle a processing instruction. * * XML processing instructions are supposed to be ignored in HTML5, * treated as "bogus comments". However, since we're not a user * agent, we allow them. We consume until ?> and then issue a * EventListener::processingInstruction() event. * * @return bool */ protected function processingInstruction() { if ('?' != $this->scanner->current()) { return false; } $tok = $this->scanner->next(); $procName = $this->scanner->getAsciiAlpha(); $white = $this->scanner->whitespace(); // If not a PI, send to bogusComment. if (0 == strlen($procName) || 0 == $white || false == $this->scanner->current()) { $this->parseError("Expected processing instruction name, got $tok"); $this->bogusComment('. while (!('?' == $this->scanner->current() && '>' == $this->scanner->peek())) { $data .= $this->scanner->current(); $tok = $this->scanner->next(); if (false === $tok) { $this->parseError('Unexpected EOF in processing instruction.'); $this->events->processingInstruction($procName, $data); return true; } } $this->scanner->consume(2); // Consume the closing tag $this->events->processingInstruction($procName, $data); return true; } // ================================================================ // UTILITY FUNCTIONS // ================================================================ /** * Read from the input stream until we get to the desired sequene * or hit the end of the input stream. * * @param string $sequence * * @return string */ protected function readUntilSequence($sequence) { $buffer = ''; // Optimization for reading larger blocks faster. $first = substr($sequence, 0, 1); while (false !== $this->scanner->current()) { $buffer .= $this->scanner->charsUntil($first); // Stop as soon as we hit the stopping condition. if ($this->scanner->sequenceMatches($sequence, false)) { return $buffer; } $buffer .= $this->scanner->current(); $this->scanner->consume(); } // If we get here, we hit the EOF. $this->parseError('Unexpected EOF during text read.'); return $buffer; } /** * Check if upcomming chars match the given sequence. * * This will read the stream for the $sequence. If it's * found, this will return true. If not, return false. * Since this unconsumes any chars it reads, the caller * will still need to read the next sequence, even if * this returns true. * * Example: $this->scanner->sequenceMatches('') will * see if the input stream is at the start of a * '' string. * * @param string $sequence * @param bool $caseSensitive * * @return bool */ protected function sequenceMatches($sequence, $caseSensitive = true) { @trigger_error(__METHOD__ . ' method is deprecated since version 2.4 and will be removed in 3.0. Use Scanner::sequenceMatches() instead.', E_USER_DEPRECATED); return $this->scanner->sequenceMatches($sequence, $caseSensitive); } /** * Send a TEXT event with the contents of the text buffer. * * This emits an EventHandler::text() event with the current contents of the * temporary text buffer. (The buffer is used to group as much PCDATA * as we can instead of emitting lots and lots of TEXT events.) */ protected function flushBuffer() { if ('' === $this->text) { return; } $this->events->text($this->text); $this->text = ''; } /** * Add text to the temporary buffer. * * @see flushBuffer() * * @param string $str */ protected function buffer($str) { $this->text .= $str; } /** * Emit a parse error. * * A parse error always returns false because it never consumes any * characters. * * @param string $msg * * @return string */ protected function parseError($msg) { $args = func_get_args(); if (count($args) > 1) { array_shift($args); $msg = vsprintf($msg, $args); } $line = $this->scanner->currentLine(); $col = $this->scanner->columnOffset(); $this->events->parseError($msg, $line, $col); return false; } /** * Decode a character reference and return the string. * * If $inAttribute is set to true, a bare & will be returned as-is. * * @param bool $inAttribute Set to true if the text is inside of an attribute value. * false otherwise. * * @return string */ protected function decodeCharacterReference($inAttribute = false) { // Next char after &. $tok = $this->scanner->next(); $start = $this->scanner->position(); if (false === $tok) { return '&'; } // These indicate not an entity. We return just // the &. if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) { // $this->scanner->next(); return '&'; } // Numeric entity if ('#' === $tok) { $tok = $this->scanner->next(); if (false === $tok) { $this->parseError('Expected &#DEC; &#HEX;, got EOF'); $this->scanner->unconsume(1); return '&'; } // Hexidecimal encoding. // X[0-9a-fA-F]+; // x[0-9a-fA-F]+; if ('x' === $tok || 'X' === $tok) { $tok = $this->scanner->next(); // Consume x // Convert from hex code to char. $hex = $this->scanner->getHex(); if (empty($hex)) { $this->parseError('Expected &#xHEX;, got &#x%s', $tok); // We unconsume because we don't know what parser rules might // be in effect for the remaining chars. For example. '&#>' // might result in a specific parsing rule inside of tag // contexts, while not inside of pcdata context. $this->scanner->unconsume(2); return '&'; } $entity = CharacterReference::lookupHex($hex); } // Decimal encoding. // [0-9]+; else { // Convert from decimal to char. $numeric = $this->scanner->getNumeric(); if (false === $numeric) { $this->parseError('Expected &#DIGITS;, got &#%s', $tok); $this->scanner->unconsume(2); return '&'; } $entity = CharacterReference::lookupDecimal($numeric); } } elseif ('=' === $tok && $inAttribute) { return '&'; } else { // String entity. // Attempt to consume a string up to a ';'. // [a-zA-Z0-9]+; $cname = $this->scanner->getAsciiAlphaNum(); $entity = CharacterReference::lookupName($cname); // When no entity is found provide the name of the unmatched string // and continue on as the & is not part of an entity. The & will // be converted to & elsewhere. if (null === $entity) { if (!$inAttribute || '' === $cname) { $this->parseError("No match in entity table for '%s'", $cname); } $this->scanner->unconsume($this->scanner->position() - $start); return '&'; } } // The scanner has advanced the cursor for us. $tok = $this->scanner->current(); // We have an entity. We're done here. if (';' === $tok) { $this->scanner->consume(); return $entity; } // Failing to match ; means unconsume the entire string. $this->scanner->unconsume($this->scanner->position() - $start); $this->parseError('Expected &ENTITY;, got &ENTITY%s (no trailing ;) ', $tok); return '&'; } }