/* //////////////////////////////////////////////////////////////////////////////// // Jazarsoft HTML Parser // //////////////////////////////////////////////////////////////////////////////// // // // VERSION : 1.0 // // AUTHOR : James Azarja // // CREATED : 2 May 2001 // // WEBSITE : http://www.jazarsoft.com/ // // SUPPORT : support@jazarsoft.com // // BUG-REPORT : bugreport@jazarsoft.com // // COMMENT : comment@jazarsoft.com // // LEGAL : Copyright (C) 2001 Jazarsoft. // // // //////////////////////////////////////////////////////////////////////////////// // // // This code may be used and modified by anyone so long as this header and // // copyright information remains intact. // // // // The code is provided "as-is" and without warranty of any kind, // // expressed, implied or otherwise, including and without limitation, any // // warranty of merchantability or fitness for a particular purpose.═ // // // // In no event shall the author be liable for any special, incidental, // // indirect or consequential damages whatsoever (including, without // // limitation, damages for loss of profits, business interruption, loss // // of information, or any other loss), whether or not advised of the // // possibility of damage, and on any theory of liability, arising out of // // or in connection with the use or inability to use this software.══ // // // //////////////////////////////////////////////////////////////////////////////// // HISTORY : // //////////////////////////////////////////////////////////////////////////////// // // // 1.0, May 2001 // // - Initial Development (Convert from Pascal/Delphi) // // // //////////////////////////////////////////////////////////////////////////////// */ class htmlparser_class { var $html=""; var $ontagfound=""; var $ontextfound=""; var $elements=array(); function InsertHTML($htmlcode) { $this->html = ""; $this->html=$htmlcode; return true; } function LoadHTML($filename) { $this->html = ""; if (!file_exists ($filename)) { //return false; } //$filename="http://www.dynamick.it"; //echo $filename."
";print_r($attr);echo ""; if (is_array($attr)) foreach ($attr as $count=>$attrArr) { if (is_array($attrArr)) foreach ($attrArr as $i=>$a) { if ($a!="" and $count==2) $res[$a]=$attr[3][$i]; if ($a!="" and $count==5) $res[$a]=$attr[6][$i]; if ($a!="" and $count==8) $res[$a]=$attr[9][$i]; } } return $res; } function linkAnalyzer($url) { $regexp = "(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"; $regexp = "(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"; eregi($regexp,$url,$ret); $r["url"]=$url; $r["scheme"]=$ret[2]; $r["authority"]=$ret[4]; $r["path"]=$ret[5]; $r["query"]=$ret[7]; $r["fragment"]=$ret[9]; return $r; } function getTagResource($tag="a") { global $elements; $tag="<".$tag; while (list($key, $code) = each ($this->elements)){ if (strtolower(substr($code,0,strlen($tag)))==$tag) { $attribArr[]=$this->getAttributes($code); } } return $attribArr; } function includeImportCss($html,$path="",$level=1) { global $urlToGrabArr; preg_match_all('/@import\s[\"]*((http:\/\/[^\/]*){0,1}(.*?))[\"]{0,1};/i', $html,$result); //echo "
";print_r($result);echo "";die; if (is_array($result[3])) foreach ($result[3] as $k=>$v) { $url=dirname($path)."/".$result[3][$k]; //echo "url ricavato: $url