From 483c845a274b86b3b71663c600ab3fc75ae0dc83 Mon Sep 17 00:00:00 2001 From: Latif Khalifa Date: Thu, 3 Oct 2013 16:46:20 +0200 Subject: [PATCH] Added report parser and LLSD classes --- htdocs/lib/ReportParser.php | 50 +++++++ htdocs/lib/llsd_classes.php | 213 +++++++++++++++++++++++++++ htdocs/lib/llsd_decode.php | 277 ++++++++++++++++++++++++++++++++++++ htdocs/lib/llsd_encode.php | 253 ++++++++++++++++++++++++++++++++ 4 files changed, 793 insertions(+) create mode 100644 htdocs/lib/ReportParser.php create mode 100644 htdocs/lib/llsd_classes.php create mode 100644 htdocs/lib/llsd_decode.php create mode 100644 htdocs/lib/llsd_encode.php diff --git a/htdocs/lib/ReportParser.php b/htdocs/lib/ReportParser.php new file mode 100644 index 0000000..90b0202 --- /dev/null +++ b/htdocs/lib/ReportParser.php @@ -0,0 +1,50 @@ + +* @copyright Copyright © 2012, Latif Khalifa +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files +* (the "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to permit +* persons to whom the Software is furnished to do so, subject to the +* following conditions: +* +* - The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +*/ + +require_once SITE_ROOT.'/lib/llsd_classes.php'; +require_once SITE_ROOT.'/lib/llsd_decode.php'; + +class ReportParser +{ + function parse($id) + { + global $DB; + $q = kl_str_sql("select * from raw_reports where report_id=!i", $id); + if (!$res = $DB->query($q) OR !$row = $DB->fetchRow($res)) + { + return; + } + $data = new stdClass; + $DB->loadFromDbRow($data, $res, $row); + $data->report = llsd_decode($data->raw_data); + unset($data->raw_data); + return $data; + } +} \ No newline at end of file diff --git a/htdocs/lib/llsd_classes.php b/htdocs/lib/llsd_classes.php new file mode 100644 index 0000000..632e620 --- /dev/null +++ b/htdocs/lib/llsd_classes.php @@ -0,0 +1,213 @@ +Value = self::$_NULL_UUID; + } + + function CheckUUID($UUID) + { + $uuidlen = strlen($UUID); + if (32 === $uuidlen) + { + return ctype_xdigit($UUID); + } + elseif (36 === $uuidlen) + { + // We support UUIDs in 8-4-4-4-12 form + return ctype_xdigit(substr($UUID, 0, 8)) && + ($UUID[8] == '-') && + ctype_xdigit(substr($UUID, 9, 4)) && + ($UUID[13] == '-') && + ctype_xdigit(substr($UUID, 14, 4)) && + ($UUID[18] == '-') && + ctype_xdigit(substr($UUID, 19, 4)) && + ($UUID[23] == '-') && + ctype_xdigit(substr($UUID, 24, 12)); + } + return False; + } + + function Set($UUID) + { + if ($UUID == '') + { + $this->Value = self::$_NULL_UUID; + return true; + } + + if ($this->CheckUUID($UUID)) + { + $this->Value = $UUID; + return true; + } + else + { + unset($this->Value); + throw new ImproperInvocationException('Invalid UUID string passed to class'); + } + } + + function Get() + { + if (isset($this->Value)) + { + // Return actual UUID + return $this->Value; + } + else + { + // Null UUID - Mimics Python's UUID class + return self::$_NULL_UUID; + } + } + + function __toString() + { + return $this->Get(); + } + +}; + +class llsd_URI +{ + var $Value; + + function llsd_URI() + { + $this->Value = ''; + } + + function Set($URI) + { + $this->Value = $URI; + return true; + } + + function Get() + { + return $this->Value; + } +}; + +class llsd_Date +{ + var $Value; + + function llsd_Date() + { + $this->Value = ''; + } + + function Set($Date) + { + $this->Value = $Date; + return true; + } + + function Get() + { + return $this->Value; + } +}; + +class llsd_Undef +{ + function __toString() + { + return undef; + } +}; + +class llsd_Binary +{ + var $Value; + var $Encoding; + + function Set($EncodedData = '', $Encoding = 'base64') + { + switch ($Encoding) + { + case 'base64': + $this->Value = base64_decode($EncodedData); + + if ($this->Value === FALSE) + { + // Decode failed + unset($this->Value); + return false; + } + + break; + + default: + break; + } + + if (isset($this->Value)) + { + // Decode successful + $this->Encoding = $Encoding; + return true; + } + else + { + // Data invalid + return false; + } + } + + function GetData() + { + if (isset($this->Value)) + return $this->Value; + else + return false; + } + + function GetEncoding() + { + if (isset($this->Encoding)) + return $this->Encoding; + else + return false; + } + + public function __toString() { + return $this->GetData(); + } +}; + +?> diff --git a/htdocs/lib/llsd_decode.php b/htdocs/lib/llsd_decode.php new file mode 100644 index 0000000..ccf4604 --- /dev/null +++ b/htdocs/lib/llsd_decode.php @@ -0,0 +1,277 @@ +parser = xml_parser_create(); + $this->result = null; + $this->inLLSDElement = false; + + $this->stack = array(); + $this->keyStack = array(); + + $this->depth = 0; + $this->skipping = false; + $this->skipThrough; + + $this->currentContent = ''; + + xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, False); + xml_set_object($this->parser, $this); + xml_set_element_handler($this->parser, 'tag_open', 'tag_close'); + xml_set_character_data_handler($this->parser, 'cdata'); + } + + function GetLLSDObject() + { + return $this->result; + } + + function parse($data) + { + $result = xml_parse($this->parser, $data); + + if( $result == 0 ) + { + $errno = xml_get_error_code( $this->parser ); + $errstr = xml_error_string( $errno ); + + $line = xml_get_current_line_number( $this->parser ); + $col = xml_get_current_column_number( $this->parser ); + + $msg = "$errstr (line $line, col $col)"; + + throw new Exception( $msg, $errno ); + } + } + + function startSkipping() + { + $this->skipping = true; + $this->skipThrough = $this->depth; + } + + function tag_open($parser, $tag, $attributes) + { + $this->depth += 1; + if ($this->skipping) + return; + + $this->currentContent = ''; + + switch ($tag) + { + case 'llsd': + if ($this->inLLSDElement) + return $this->startSkipping(); + $this->inLLSDElement = true; + return; + + case 'key': + if (empty($this->keyStack) or end($this->keyStack) === false) + return $this->startSkipping(); + return; + } + + if (!$this->inLLSDElement) + return $this->startSkipping(); + + switch ($tag) + { + case 'binary': + $this->currentEncoding = $attributes['encoding']; + break; + + case 'map': + $this->stack[] = array(); + $this->keyStack[] = true; + break; + + case 'array': + $this->stack[] = array(); + $this->keyStack[] = false; + break; + } + } + + function tag_close($parser, $tag) + { + $this->depth -= 1; + if ($this->skipping) + { + if ($this->depth < $this->skipThrough) + { + $this->skipping = false; + } + return; + } + switch ($tag) + { + case 'llsd': + $this->inLLSDElement = false; + return; + + case 'key': + array_pop($this->keyStack); + $this->keyStack[] = $this->currentContent; + return; + } + if (!$this->inLLSDElement) return; + + $content = $this->currentContent; + $value = null; + switch ($tag) + { + case 'undef': + $value = null; + break; + + case 'boolean': + $value = $content == 'true' || $content == '1'; + break; + + case 'integer': + $value = (int)$content; + break; + + case 'real': + $value = (float)$content; + break; + + case 'string': + $value = (string)$content; + break; + + case 'uuid': + $value = new llsd_UUID; + $value->Set($content); + break; + + case 'date': + $value = new llsd_Date; + $value->Set($content); + break; + + case 'uri': + $value = new llsd_URI; + $value->Set($content); + break; + + case 'binary': + $value = new llsd_Binary; + $value->Set($content, $this->currentEncoding); + break; + + case 'array': + case 'map': + $value = array_pop($this->stack); + array_pop($this->keyStack); + break; + + default: + $value = null; + break; + } + if (empty($this->stack)) + { + $this->result = $value; + } + else + { + $n = count($this->stack) - 1; + $struct = &$this->stack[$n]; + $key = $this->keyStack[$n]; + if ($key === false) + { + $struct[] = $value; + } + else + { + $struct[$key] = $value; + } + } + } + + function cdata($parser, $cdata) + { + if ($this->skipping) + return; + $this->currentContent .= $cdata; + } +} + +function llsd_decode($str) +{ + try + { + $LLSDParser = new LLSDParser(); + $LLSDParser->parse($str); + } + catch (Exception $e) + { + return array(); + } + + return $LLSDParser->GetLLSDObject(); +} + +?> diff --git a/htdocs/lib/llsd_encode.php b/htdocs/lib/llsd_encode.php new file mode 100644 index 0000000..51a9480 --- /dev/null +++ b/htdocs/lib/llsd_encode.php @@ -0,0 +1,253 @@ +'; + $this->encode_node($node); + echo ''; + return ob_get_clean(); + } + + function encode_node(&$node) + { + switch (gettype($node)) + { + case 'array': // if (is_array($node)) + if ($this->detect_map($node)) + { + echo ''; + foreach ($node as $key => &$value) + { + echo '', + htmlspecialchars($key, ENT_NOQUOTES), + ''; + $this->encode_node($value); + } + echo ''; + } + else + { + echo ''; + foreach ($node as &$value) + { + $this->encode_node($value); + } + echo ''; + } + break; + + case 'integer': // else if (is_int($node)) + echo '', + htmlspecialchars($node, ENT_NOQUOTES), + ''; + break; + + case 'double': // else if (is_float($node)) + echo '', + htmlspecialchars($node, ENT_NOQUOTES), + ''; + break; + + case 'boolean': // else if (is_bool($node)) + if ($node) echo 'true'; + else echo 'false'; + break; + + case 'object': // else if (is_object($node)) + switch (get_class($node)) + { + case "llsd_UUID": + echo '', + htmlspecialchars($node->Get(), ENT_NOQUOTES), + ''; + break; + + case "llsd_URI": + echo '', + htmlspecialchars($node->Get(), ENT_NOQUOTES), + ''; + break; + + case "llsd_Date": + echo '', + htmlspecialchars($node->Get(), ENT_NOQUOTES), + ''; + break; + + case "llsd_Binary": + echo ''; + $this->encode_binary($node); + echo ''; + break; + + default: + echo '', + htmlspecialchars($node, ENT_NOQUOTES), + ''; + break; + } + break; + + case 'NULL': // else if ($node === null) + echo ''; + break; + + default: //else + echo '', + htmlspecialchars($node, ENT_NOQUOTES), + ''; + } + } + + + function detect_map(&$node) + { + // This routine accounts for only about 10% of the time + $index = 0; + foreach ($node as $key => &$value) + { + if ($key !== $index) return true; + ++$index; + } + return false; + } + + + function encode_string(&$node) + { + # NB: This function has been in-lined into encode_node() + echo htmlspecialchars($node, ENT_NOQUOTES); + # NB: DO NOT add a charset argument ('UTF-8') + # In that case, PHP only supports 16-bit Unicode chars. + # Which is horribly, horribly broken. + } + + function encode_attribute(&$node) + { + # NB: This function has been in-lined into encode_node() + echo htmlspecialchars($node, ENT_QUOTES); + # NB: DO NOT add a charset argument ('UTF-8') + # In that case, PHP only supports 16-bit Unicode chars. + # Which is horribly, horribly broken. + } + + function encode_binary(&$node) + { + $encoding = $node->GetEncoding(); + + if ($encoding == "base64") + { + echo base64_encode($node->Value); + } + else + { + echo ''; + } + } + +} + +function llsd_encode(&$node) +{ + $encoder = new LLSD_Encoder(); + return $encoder->encode($node); +} + +/* OPTIMIZATION + +This file has been heavily optimized as it has been shown to be one of the +significant bottlenecks in the system. Due to the vagaries of PHP, many of +these optimizations are counter to good coding practice. Do not undo these +changes without careful profiling and understanding. All of these methods +were confirmed by careful timing tests. Many are listed among common PHP +optimizations by other programmers on the Internet. + +1) Generating the string. +Three output methods were compared, and are shown with relative timing: + concatenating strings 1.00 (baseline) + writing to php://temp 1.14 + output buffering 0.90 + +Savings: 10%. + + +2) Using switch(gettype($node)) +The PHP manual cautions on using gettype() in that they don't guarantee that +the returned strings won't change in future versions of PHP. The unit tests +will catch if this changes in a way that breaks this code. + +The recommend way of using a cascade of if tests on is_array(), is_int(), etc. +is retained in the comments here for reference, but should not be used. + +Savings: 3.5% + + +3) Inlining calls +Function call overhead, even to compiled in library functions, is very +expensive in PHP. Calls to encode_string and encode_attribute were inlined +into encode_node. The functions were left in, as the comments therein are +extremely important for future programmers. + +Savings: 10% + +4) Combining echo statements +Echo in PHP is not a function, and can take a number of values in a single +statement. Though the code would be clearer with multiple echo statements in +a many places, they have been combined into one statement. Note that it is not +worth returning a string from a called function just so that it can be combined +into a single echo statement of the caller. In those cases, it is best to just +echo from within the called function. + +Savings: 6% + +5) Using foreach +While the PHP manual states that foreach($a as $k => $v) is the same as +reset($k); while (($k, $v) = each($a)), the foreach version is faster. + +Savings: 24% + +6) Using & in foreach +Coding foreach with a reference on the value yields an improvement. + +Savings: 6% +----------- +Total Savings after all optimizations: 46%, or 1.8x faster! + +*/ + +?>