<?php
        
/////////////////////////////////////////////////////////
        //
        //  AHNAR CSA3004 APT University of Malta
        //  Ian Bugeja 2005
        //
        //////////////////////////////////////////////////////////

        
require_once 'HTML_Parser.php';
        require_once 
'HTTP_Connect.php';
        require_once 
'AHNAR_Tools.php';
        
session_start();
        
header("Cache-control: private"); // IE 6 Fix.
        
$starttime microtime(true); //TO count script execution

        
$url $_REQUEST["ahnarurl"] or AHNAR_Tools::redError(0"Missing url...please specify ?ahnarurl= parameter with URL to script.");
        if (
stripos($url'ahnar.iannet.org') !== falseAHNAR_Tools::redError(0" Sorry you cannot visit AHNAR from within");
        
$queryget HTTP_Connect::sort_query($_GET);
        
$querypost HTTP_Connect::sort_query($_POST);
        
trim($querypost);

        echo 
"<!--";
        do
        {
                if ((
stripos($url'http://') === false) && (stripos($url'https://') === false)) $url 'http://' $url;
                
$url str_replace('*amp;''&'$url);
                
$url trim($url);
                
$urlparsed parse_url($url);
                
$scheme $urlparsed["scheme"]; if ($scheme == ''$scheme 'http'; elseif ($scheme == 'https'AHNAR_TOOLS::stopError(1"Sorry https not allowed.");
                
$host $urlparsed["host"]; if ($host == ''AHNAR_TOOLS::stopError(1"$host incorrect host");
                
$port $urlparsed["port"]; if ($port == ''$port 80;
                
$path $urlparsed["path"]; if ($path == ''$path '/';
                
$query $queryget//$urlparsed["query"];
                
if ($query != '')
                        
$retrieve $path.'?'.$query;
                else
                {
                        
$retrieve $path;
                        if (
stripos($url"?") !== false//when moved
                        
{
                                
$mp stripos($url"?");
                                
$retrieve $retrieve substr($url$mpstrlen($url)-$mp);
                        }
                }
                
$retrieve str_replace(" ""%20"$retrieve);

                
$fnd strpos($path'.');
                if (
$fnd !== false)
                {
                        
$fnd strrpos($path'/');
                        
$path1 substr($path0$fnd);
                }
                else
                        
$path1 $path;
                
$nurl 'http://'.$urlparsed['host'].$urlparsed['port'].$path1;

                
$cookiestring "";
                if (isset(
$_SESSION['cookies']))
                foreach(
$_SESSION['cookies'] as $cookurl => $cookie)   //Cookie Handling
                
{
                        if (
$cookurl == $urlparsed["host"])
                        {
                                
$cookiestring HTTP_Connect::sort_cookie($cookie);
                                break;
                        }
                }

                
$loopflag false;
                
$httpcon = new HTTP_Connect();
                
/////////////////////////////////////////////////////////////
                //Construct HTTP GET/POST Header----------------------------
                //===========================================================
                ///HEAD
                        
if ($querypost == "")
                                
$reqheader "GET $retrieve HTTP/1.1\r\n";
                        else
                                
$reqheader "POST $retrieve HTTP/1.1\r\n";
                
///TYPE
                        
$reqheader .= "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*\r\n";
                
///LANGUAGE
                        
$reqheader .= "Accept-Language: en-us\r\n";
                
///Encoding
                        //$reqheader .= "Accept-Encoding: deflate\r\n";
                ///HOST
                        
$reqheader .= "Host: $host\r\n";
                
///COOKIE
                        
if ($cookiestring != ""$reqheader .= "Cookie: ".$cookiestring."\r\n";
                
///USER AGENT
                        
$reqheader .= "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)\r\n";
                
///CONTENT LENGTH
                        
if ($querypost != ""//Case of POST
                        
{
                                
$reqheader .= "Content-Length: ".strlen($querypost)."\r\n";
                                
$reqheader .= "Content-Type: application/x-www-form-urlencoded\r\n";
                        }
                
//CONNECTION TYPE
                        
$reqheader .= "Connection: Close\r\n\r\n";
                
//POST BODY
                        
if ($querypost != "")
                                
$reqheader .= $querypost;
                
$contents $httpcon->http_request($host$port$reqheader); //send request and get response

                
if (isset($httpcon->resheader['set-cookie'])) //Add Cookie to SESSION
                        
$_SESSION['cookies'][$urlparsed['host']] = $httpcon->resheader['set-cookie'];

                if (
$httpcon->error[0] == 'nohost')
                {
                        echo 
"--><html><head><link rel='stylesheet' href='ahnarstyle.css'></head><body class='ahnarbody'>";
                        echo 
"Host not found...Please specify a correct host next time. Go [<a href='javascript:history.back()'>Back</a>]<br>";
                        echo 
"<a href='http://ahnar.dyndns.org/'>AHNAR</a><br>";
                        die(
"Error: ".$httpcon->error[1]." Error Number: ".$httpcon->error[2]."</body></html>");
                }
                elseif (
$httpcon->error[0] == 'connerr')
                {
                        echo 
"--><html><head><link rel='stylesheet' href='ahnarstyle.css'></head><body class='ahnarbody'>";
                        echo 
"Error in connection...please try again... Go [<a href='javascript:history.back()'>Back</a>]<br>";
                        echo 
"<a href='http://ahnar.dyndns.org/'>AHNAR</a><br>";
                        die(
"Error: ".$httpcon->error[1]." Error Number: ".$httpcon->error[2]."</body></html>");
                }
                elseif (
$httpcon->error[0] == 'nocontent'//content other than text/html, text/plain
                
{
                        
$newcontents "<body class='ahnarbody'>";
                        
$newcontents .= "<h1 class='ahnarlarge'>AHNAR&nbsp;&nbsp;&nbsp;&nbsp;<span class='ahnarmedium'>[<a href='javascript:history.back()'>Back</a>]</span><hr style='color:white'></h1>";
                        
$newcontents .= "Incorrect Content Type.. ".$httpcon->resheader['content-type']." not supported by AHNAR<br>";
                        
$newcontents .= "Contents of the <a href='$url' target='_blank'>URL</a> is also below:<br>";
                        
$newcontents .= "<iframe src='$url' width='95%' height='80%'>IFrame not supported</iframe>";
                        
$newcontents .= "<br><br><div class='ahnarsmall'>Ian Bugeja 2005, University of Malta, CSA3004 APT</div>";
                }
                elseif (
$httpcon->error[0] == 'moved')
                {       
$querypost "";
                        
$moved $httpcon->resheader['location'];
                        if (
stripos($moved'http://') === false//not found thus must reside part of domain
                        
{
                                if (
$moved[0] == '/'$url $host.$moved;
                                elseif (
$moved[0] == '.'$url $host.substr($moved2strlen($moved)-2);
                                else
                                {
                                        if (
$url[strlen($url)-1] == '/')
                                                
$url $url $moved;
                                        else
                                                
$url $url.'/'.$moved;
                                }
                        }
                        else
                                
$url $moved;

                        
$loopflag true;
                }
                elseif (
$httpcon->error[0] == 'server')
                {
                        
$rse $urlparsed['host'];
                        die(
"-->$rse Server error..Page Cannot be retrieved");
                }
                else
                {
                        if ((
$_REQUEST['ahnarback'] != 'true') && ($httpcon->error[0] != 'nocontent'))
                        {
                                if (isset(
$_SESSION['visited']))
                                {
                                        if (
end($_SESSION['visited']) != $url)
                                                
$_SESSION['visited'][] = $url//store visited URL in Session
                                
}
                                else
                                        
$_SESSION['visited'][] = $url//store visited URL in Session
                        
}
                        
//
                        //HTML PARSING-------------------------------------------
                        //============
                        
$htmlparser = new HTML_Parser(''$nurl);
                        
$newcontents $htmlparser->html_parse($contents);

                        
$_SESSION['titles'][] = trim($htmlparser->pagetitle); //store page title in Session
                
}
        }
        while (
$loopflag == true);
 
?>
-->
<?php
        
if ($htmlparser->docutype != ''//<!DOCTYPE tag
                
echo $htmlparser->docutype;
        else
        {
?>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<?php   ?>

<!-- Page Processed on:<?php echo date("l dS of F Y h:i:s A"); ?>-->
<!-- URL Retrieved: <?php echo $url?>-->
<html>

   <head>
      <title>AHNAR <?php if ($htmlparser->pagetitle != "") echo '[ '.$htmlparser->pagetitle.' ]'?></title>
      <meta http-equiv="generator" content="AHNAR">
      <?php if ($htmlparser->stshlink != '') echo $htmlparser->stshlink;
            if (
$httpcon->error[0] == 'nocontent') echo "<link rel='stylesheet' href='ahnarstyle.css'>";
      
?>
   </head>

<?php
   
if ($htmlparser->conframes == false)  //page does not contain frames
   
if (strlen($htmlparser->words) >= 50)   //page contents > 50
   
{
      if (
$httpcon->error[0] != '') echo "<body class='ahnarbody'>";
      elseif (
$htmlparser->bodytag != "") echo $htmlparser->bodytag;
      else echo 
"<body style='text-align: left'>";
      
//Leave empty line for header
      
echo "&nbsp;<br>";
      echo 
"<div style='position:absolute; top:0px; left:0px'><nobr><span style=\"background-color: Background; color: white; font-family: Tahoma; font-size: x-small\">";
      echo 
"&nbsp;<strong>AHNAR</strong> ---- (header loading....)&nbsp;</span></nobr></div>";

   } else echo 
$htmlparser->bodytag;
?>

<!--
<?php
        
//KEYWORD PROCESSING (common words)
        //if page is requested as back do not do any keyword processing
        //this also applies if contents (ie tags removed) is less than 100
        
if (($_REQUEST['ahnarback'] != 'true') && (strlen($htmlparser->words) >= 100))
        {
            require_once 
'AdaptiveHyp.php';
            
$adapt = new AdaptiveHyp();

            if (
$url != $_SESSION['visited'][count($_SESSION['visited'])-2]) //do not age/reupdate keyword if page refresh
            
{
                
//Reset Keywords in Session
                
if (isset($_SESSION['keywords'])) //keywords in session exists
                
foreach($_SESSION['keywords'] as &$okey)
                {
                        
$okey[3] = $okey[3] + 1;  //age the keyword
                        
$okey[2] = 0//updated last is 0
                
}


                
//Remove old keywords with no importance
                
$visit count($_SESSION['visited']);
                if (isset(
$_SESSION['keywords']))
                {
                        for (
$i=0$i count($_SESSION['keywords']); $i++)
                        {
                                if (
$_SESSION['keywords'][$i][1] < $visit//occurrance
                                
if ($_SESSION['keywords'][$i][3] > $visit/2//age
                                
{
                                        
array_splice($_SESSION['keywords'], $i1);  //REMOVE
                                
}
                        }

                        if (
count($_SESSION['keywords']) > 25//keep below 25 approx
                        
{
                                require_once 
"AHNAR_Tools.php";
                                
$ahtools = new AHNAR_Tools();
                                
$Okeywords $ahtools->KeywordSort($_SESSION['keywords']);

                                for (; 
count($Okeywords) >= 25; )
                                        
$Okeywords array_pop($Okeywords);

                                
$_SESSION['keywords'] = $Okeywords;
                        }
                }

                
//get popular keywords
                
$wordcount substr_count($htmlparser->words" ");  //compute approx number of words present
                
if ($wordcount >= 1000$maxa 8;        //assign how much popular keywords to get
                
elseif ($wordcount >= 800$maxa 7;
                elseif (
$wordcount >= 500$maxa 5;
                elseif (
$wordcount >= 200$maxa 3;
                else 
$maxa 0;
                if (
$maxa 0)
                        
$Okeywords $adapt->popular($htmlparser->words$_SESSION['keywords'], $maxa);
                else
                        unset(
$Okeywords);
                unset(
$_SESSION['latestkeys']);  //empty latest keys

                //Merge found keywords into Session
                
if (isset($Okeywords)) //if keywords have been found
                
foreach($Okeywords as $k)
                {
                        
$kflag false;

                        if (isset(
$_SESSION['keywords'])) //there exist some keywords in session
                        
foreach($_SESSION['keywords'] as &$okey)
                        {
                                if (
AdaptiveHyp::equal($okey[0], $k[0])) //if ($okey[5] == $k[5]) //compare
                                
{       //MERGE into SESSION
                                        
$kflag true;
                                        
$okey[1] = $okey[1] + $k[1];
                                        
$okey[2] = 1//marks that updated last
                                        
$okey[4] = $okey[4] + 1//updated times
                                        
$_SESSION['latestkeys'][] = $k;
                                        break;
                                }
                        }

                        if (
$kflag == false//not found thus add to list
                        
{       //ADD to SESSION
                                
$_SESSION['keywords'][] = $k;
                                
$_SESSION['latestkeys'][] = $k;
                        }
                }
            }


            
//---------------------------------
            //Suggestion of Links to Follow----
            //=================================
            
if (count($_SESSION['visited']) > 2)  //SUGGEST ONLY IF 2 OR MORE HAVE BEEN VISITED
            
{
                
$adapttags1 $adapt->adaptKeyLinks($htmlparser->ataglist$htmlparser->atagcount$htmlparser->middleatext$_SESSION['keywords'], $newcontents);
                
$adapttags2 $adapt->adaptLinks($htmlparser->ataglist$htmlparser->atagcount$newcontents);
                
//merge the 2 above arrays removing duplicates
                
if ((isset($adapttags1)) && isset($adapttags2))
                {
                        foreach(
$adapttags1 as $ad1k => $ad1v)
                        {
                                
$adapttags2[$ad1k] = $ad1v;
                        }
                        
$adapttags $adapttags2;
                }
                elseif (isset(
$adapttags1))
                        
$adapttags $adapttags1;
                elseif (isset(
$adapttags2))
                        
$adapttags $adapttags2;

                
//print_r($adapttags); print_r($htmlparser->middleatext);
                
if (isset($adapttags))
                {
                        
$adaptcount count($adapttags);
                        foreach(
$adapttags as $kadt => $adt)    //out of adaptable find those to adapt for sure
                        
{
                                
$adaptflag true;
                                if (isset(
$sureadapt))          //if already suggested a link to same location
                                
foreach($sureadapt as $suk => $suv)
                                {
                                        if (
trim($htmlparser->adest[$suk], "/ ") == trim($htmlparser->adest[$kadt], "/ "))
                                        {
                                                
$adaptflag false//do not suggest
                                        
}
                                }


                                if (
$adaptflag == false)
                                {
                                        
//do not suggest (already suggested link to same location above check)
                                
}
                                elseif (
stripos($htmlparser->middleatext[$kadt], "@") !== false) { /*email should not be suggested*/ }
                                elseif (
$htmlparser->middleatext[$kadt] == "") { /*neither empty*/ }
                                elseif (
AdaptiveHyp::commonlink($htmlparser->middleatext[$kadt]))
                                {
                                        
//common link...do nothing
                                
}
                                elseif (
stripos($htmlparser->adest[$kadt], '#') !== false)
                                {
                                        if (
AdaptiveHyp::inPageLink($htmlparser->ataglist$htmlparser->middleatext$htmlparser->adest[$kadt], $newcontents$_SESSION['keywords']))
                                        {
                                                
$sureadapt[$kadt] = $adt;    //keyword near link
                                                
$adaptcolor[$kadt] = 0;
                                        }
                                }
                                elseif (
trim($htmlparser->adest[$kadt], "/ ") == trim($url"/ "))
                                {
                                        
//points to same page...do nothing
                                
}
                                elseif (
AdaptiveHyp::linkList($adt$newcontents))
                                {       
//echo $htmlparser->middleatext[$kadt];
                                        //link list
                                
}
                                elseif (
AdaptiveHyp::beforeAfterLink($adt$newcontents$_SESSION['keywords']))
                                {
                                       
$sureadapt[$kadt] = $adt;    //keyword near link
                                       
$adaptcolor[$kadt] = 1;
                                }
                                elseif (
AdaptiveHyp::textLink($htmlparser->middleatext[$kadt], $_SESSION['keywords']))
                                {
                                       
$sureadapt[$kadt] = $adt;    //link text matches keyword
                                       
$adaptcolor[$kadt] = 1;
                                }
                                elseif (
AdaptiveHyp::implink($htmlparser->middleatext[$kadt]))
                                {
                                       
$sureadapt[$kadt] = $adt;
                                       
$adaptcolor[$kadt] = 2;
                                }
                        }

                        
//Further Filter on Suggestion   ----------------FURTHER FILTER
                        
if (isset($sureadapt))
                              foreach(
$sureadapt as $ksadt => &$sadt)
                              {       
//usefull when page contains a number of links with same text
                                    
foreach($sureadapt as $ksadt2 => &$sadt2)
                                    {
                                          if (
$ksadt != $ksadt2)
                                          if (
AdaptiveHyp::implink($htmlparser->middleatext[$ksadt]) == false)
                                          if (
AdaptiveHyp::equal($htmlparser->middleatext[$ksadt], $htmlparser->middleatext[$ksadt2]))
                                          {
                                              
$sadt 0;  $sadt2 0//remove some of them
                                          
}
                                    }
                              }


                        if (
count($sureadapt) < $htmlparser->atagcount 0.05//??? Problem SUGGESTING TOO MUCH
                        
{
                                foreach(
$adapttags as $kadt => $adt)
                                {
                                     if (
trim($htmlparser->adest[$kadt], "/ ") == trim($url"/ "))
                                     {
                                        
//url points to same page
                                     
}
                                     elseif (
AdaptiveHyp::commonlink($htmlparser->middleatext[$kadt]))
                                     {
                                        
//common link...do nothing
                                     
}
                                     elseif (
AdaptiveHyp::textLink($htmlparser->middleatext[$kadt], $_SESSION['keywords'], 1))
                                     {
                                        
$sureadapt[$kadt] = $adt;    //link text matches keyword
                                        
$adaptcolor[$kadt] = 3;
                                     }
                                }
                        }

                }

                
//Arrow to insert
                
require_once "AHNAR_Tools.php";
                
$arrow AHNAR_Tools::getArrow(0);
                
$arrowlen strlen($arrow);
                
$arrowcount 0;

                
//Modify Page contents by inserting Arrow
                
if (isset($sureadapt))
                {
                        
asort($sureadapt);
                        
$sureadapt array_unique($sureadapt); //?????? to check
                        
foreach ($sureadapt as $ksadt => &$sadt)
                        {
                             if (
$sadt != 0)
                             {
                                if ((
$adaptcolor[$ksadt] != 3) || ($htmlparser->atagcount*0.2 >= count($sureadapt)))
                                {
                                        
$arrow AHNAR_Tools::getArrow($adaptcolor[$ksadt]);
                                        
$posl $sadt + ($arrowlen $arrowcount);
                                        
$newcontents substr($newcontents0$posl). $arrow substr($newcontents$poslstrlen($newcontents)-($posl-1));
                                        
$arrowcount++;
                                }
                             }
                        }
                }
            }
        }

?>

-->

        <?php echo $newcontents?>


<?php
      
if ($htmlparser->conframes == false)
      if (
$httpcon->error[0] != 'nocontent'///insert AHNAR Header
      
{
                echo 
"<div style='position:absolute; top:0px; left:0px'>";
                echo 
"<nobr>";
                echo 
"<span style='background-color: Background; color: white; font-family: Tahoma; font-size: x-small'>";
                        echo 
"&nbsp;<a style='background-color: Background; color: white' href='index.php'><strong>AHNAR</strong></a> -";
                        echo 
"<b>";
                        if (
trim($htmlparser->pagetitle) == "")  //no pagetitle URL
                                
echo '[ '.$url.' ]';
                        elseif (
strlen($htmlparser->pagetitle) > 40//LONG page title shorten
                                
echo '[ '.substr($htmlparser->pagetitle040).'... ]';
                        else
                                echo 
'[ '.$htmlparser->pagetitle.' ]';

                        echo 
"</b> -<i> Ian Bugeja 2005, University of Malta, CSA3004 APT </i>";

                        if (
count($_SESSION['visited']) > 2//see also available on more than 2 page visits
                                
echo "[<a style=\"background-color: Background; color: white\" href=\"seealso.php\" target=\"_blank\">See also...</a>]";
                        else
                                echo 
"[<a style=\"background-color: Background; color: white\" href=\"javascript:alert('Sorry See Also... function disabled. Visit more than 2 pages please.')\">See also...</a>]";

                        
//$ptit = str_replace(" ", "%20", $htmlparser->pagetitle);
                        //$ptit = str_replace("'", "", $ptit);
                        
$ptit urlencode($htmlparser->pagetitle);
                        echo 
"[<a style='background-color: Background; color: white' href='bookmark.php?ahurl=$url&ahtit=$ptit' target='_blank'>Bookmark</a>]";
                        echo 
"[<a style='background-color: Background; color: white' href='correct.php' target='_blank'>Correct</a>]";
                        echo 
"[<a style='background-color: Background; color: white' href='negative.php?ahneg=$url'>Negative</a>]";
                        echo 
"[<a style='background-color: Background; color: white' href='destroy.php' target='_top'>Reset</a>]&nbsp;";
                echo 
"</span>";
                echo 
"</nobr> </div>";
   } 
?>

   </body>
</html>

<!--
<?php $endtime microtime(true); $timetaken $endtime $starttime; echo "Execution Time: $timetaken"?>
-->