Entry #8
[syntax=php]<html>
<head>
<title>PHP Web Crawler Recon</title>
</head>
<body>
<style type="text/css">
.results_box {
background-color:#FFFEE0;
overflow: auto;
}
.headers {
font-size:22px;
}
.results_format{
border-style:ridge;
border-width:5px;
}
body {
background-color:#F9F4E4;
}
</style>
<form action="crawl.php" name="crawler" method="POST">
URL to crawl: <input type="text" name="url" value="http://www.google.com">
<input type="submit" name="crawlSubmit" value="Crawl!">
</form>
<?php
//Simple PHP reconnaissance tool, written for betterphp.co.uk competition
Hope I do well Jacek .{-_-}.
//PLEASE Make sure you run this on your 'local' web server for all functionality to work
class Recon {
public $urlGiven; //User defined
var $ipAddressGiven; //Resolved by gethostbyname
/*********************************
* Url Validation & Errors *
* Validates URL and gives *
* two kinds of errors warn *
* and die, a.k.a fatal. *
* *
*********************************/
public function validateForm($url, $formSubmitted)
{
$regex = "@^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)+$@i"; //Restriced URL to match .com"/test" "not allowed"
$is_valid_url = preg_match($regex, $url);
if($is_valid_url === 1) //If bad URL entered die
{
$this->errorDie("Please enter a valid URL: Format accepted 'http://www.*.com'");
}
}
public function errorWarn($error_string) //Used to warn, but not stop operation
{
echo $error_string . "<br>";
}
public function errorDie($die_string) //Fatal warning such as bad URL, stop execution
{
die($die_string);
}
/*********************************
* grabLinks() *
* Grabs links that are pre- *
* sent in URL homepage. *
* *
* *
*********************************/
public function grabLinks() {
$urlGiven = $this->urlGiven;
if(!empty($urlGiven))
{
$links = @file_get_contents($urlGiven); //Surpressed but error handled.
$regex = "@href[ ]*=[ ]*('|\")([^\"'])*('|\")@"; //Regex searches for <a href> tags
preg_match_all($regex, $links, $linksRetrievedArr); //Matches all a href's
$links_count = count($linksRetrievedArr[0]); //Count amount matched
//Cycle and clean href tags, then output all found
echo "<div style='height:250; width:500;' class='results_box'>";
for($i = 0; $i < $links_count; $i++) //Repeat for every link in array
{
$regex = "@href[ ]*=[ ]*('|\")@"; //Splits on href to get clean URL
$cleaned_href = preg_split($regex, $linksRetrievedArr[0][$i]); //Cycle through matches [array][url]
$regex = "@['|\"]@"; //Searches for trailing ", left behind on first regex
$cleaned_href = preg_split($regex, $cleaned_href[1]); //Split the ", we now have a fully clean path or url
echo htmlentities($cleaned_href[0]) . "<br>";
}
echo "</div>";
}
}
/*********************************
* grabIP() *
* Gets host IP address *
* achieved via Reverse-DNS *
* gethostbyname() *
* *
*********************************/
public function grabIP()
{
//Get remote server IP address
$cleanURL = $this->urlGiven;
$regex = "@^(?:http://)?([^/]+)@i"; //Mathches http
preg_match_all($regex, $cleanURL, $cleanedURL);
(array) $cleaned = $cleanedURL[1][0];
$serverip = gethostbyname($cleaned); //URL cleaned to www for gethostbyname (!http://)
$this->ipAddressGiven = $serverip; //Storing to property
echo "<div style='height:20; width:500;' class='results_box'>";
echo "Server IP Address: " . htmlentities($serverip) . "<br>"; //Output IP resolved
echo "</div>";
}
/*********************************
* grabServers() *
* Grabs MX and NS servers *
* associated with target *
* URL address. *
* *
*********************************/
public function grabServers()
{
$clean_URL_server = $this->urlGiven;
$regex = "@^(http://)www\.@"; //Strip
http://www .
$cleaned = preg_split($regex, $clean_URL_server);
$cleaned = $cleaned[1]; //This is the cleaned URL after split
$servers_found = @dns_get_record($cleaned); //Grab all DNS records
$server_count = count($servers_found);
echo "<div class='results_box' style='height:100; width:500;'>";
for($i = 0; $i < $server_count; $i++) //Cycle and find different types
{
if($servers_found[$i]['type'] == "MX") //Check for MX Servers found
{
(array) $get_server_name = $servers_found[$i]['target'] . "<br>";
echo "MX Server: " . "<i>" . $get_server_name . "</i>" . "<br>";
}
}
for($i = 0; $i < $server_count; $i++)
{
if($servers_found[$i]['type'] == "NS") //Check for NS Servers found
{
(array) $get_server_name = htmlentities($servers_found[$i]['target']) . "<br>";
echo "NS: " . "<i>" . $get_server_name . "</i>" . "</br>";
}
}
echo "</div>";
}
/*********************************
* grabHeaders() *
* Grabs current HTTP Hea- *
* der information, will *
* try and force 403 or similar *
* *
*********************************/
public function grabHeaders()
{
$url = $this->urlGiven;
$ch = curl_init(); //Start cURL
curl_setopt($ch, CURLOPT_URL, $url); //Target
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //Grab returned data
curl_setopt($ch, CURLOPT_TIMEOUT, 10); //Time out after 10 seconds
$headers = curl_exec($ch); //Execute and store result
echo "<div class='results_box' style='height:200; width:500;'>";
htmlentities(print_r($headers)); //Print headers returned safely
echo "</div>";
}
/*********************************
* grabSSH() *
* Grabs ssh server version. *
* This is grabbed from banner *
* so do not trust 100%. *
* *
*********************************/
//THIS WILL NOT WORK ON A WEB SERVER UNLESS YOU HAVE DEDICATED ACCESS, OR HAVE THE POWER TO OPEN LOCAL PORTS.
//THIS WILL WORK ON YOUR LOCAL SYSTEM, ENSURE YOU HAVE THE PORTS NOT BLOCKED.
public function grabSSH($url, $port)
{
$url = str_replace("http://", "", $url); //Clean for fsock
$sock = @fsockopen($url, $port, $errno, $errstr, 15); //Open connection to TARGET on PORT with timeout of 15 secs
if(!$sock)
{
$this->errorWarn("Connection to target failed, host could be blocking our attempts"); //Catch failed connection
}
else
{
echo "<div class='results_box' style='height:100; width:500;'>";
echo htmlentities(fgetss($sock, 150)); //Safely output returned data.
echo "</div>";
fclose($sock); //Close socket
}
}
/*********************************
* grabFTP() *
* Grabs ftp server version. *
* This is grabbed from banner *
* so do not trust 100%. *
* *
*********************************/
//THIS WILL NOT WORK ON A WEB SERVER UNLESS YOU HAVE DEDICATED ACCESS, OR HAVE THE POWER TO OPEN LOCAL PORTS.
public function grabFTP($url, $port)
{
$url = str_replace("http://", "", $url); //Clean for fsock
$sock = @fsockopen($url, $port, $errno, $errstr, 15); //Open connection to TARGET on PORT with a timeout of 15 secs
if(!$sock)
{
$this->errorWarn("Connection to target failed, host could be blocking our attempts, or authentication needed!"); //Catch failed connection
}
else
{
echo "<div class='results_box' style='height:100; width:500;'>";
echo fgetss($sock, 150); //Safely output returned data.
echo "</div>";
fclose($sock); //Close socket
}
}
/*********************************
* whois() *
* Grabs various domain info *
* ONLY GODADDY compatible *
* this is to change. *
* *
*********************************/
public function whois($query, $server)
{
$query = preg_replace("@^http://www.@", "", $query);
define('AE_WHOIS_TIMEOUT', 15); //Connection timeout
global $ae_whois_errno, $ae_whois_errstr;
//Connecting
$f = fsockopen($server, 43, $ae_whois_errno, $ae_whois_errstr, AE_WHOIS_TIMEOUT);
if (!$f)
return false; // connection failed
//Sending query
fwrite($f, $query."\r\n");
//Receving response
$response = '';
while (!feof($f))
$response .= fgets($f, 1024);
//Closing connection
fclose($f);
echo "<div class='results_box' style='height:200; word-wrap:break-word; width:500'>";
echo $response;
echo "</div>";
return $response;
}
//End of Recon Class
}
/*******************
* *
* OUTPUT *
* *
*******************/
//Check form subbed & URL not blank, if so continue.
$formSubmitted = (isset($_POST['crawlSubmit']))? 1 : 0;
$urlGiven = (isset($_POST['url']))? htmlentities($_POST['url']) : NULL;
if($formSubmitted == 1 && $urlGiven != NULL)
{
$crawler = new Recon();
/*******************
* Validate URL *
* & *
* Form *
*******************/
$crawler->validateForm($urlGiven, $formSubmitted); //Checks for valid URL
$crawler->urlGiven = $urlGiven; //If valid then store to class property
echo "<div class='results_format'>";
/*******************
* Grab Links *
* (Crawl) *
* *
*******************/
echo "<div class='headers'> URL's Grabbed From Site </div> <br>";
$crawler->grabLinks(); //Grabs links
echo "<br>";
/*******************
* *
* Grab IP Addy *
* *
*******************/
echo "<div class='headers'> IP Address </div> <br>";
$crawler->grabIP(); //Reverse-DNS to generate IP
echo "<br>";
/*******************
* *
* HTTP Headers *
* *
*******************/
echo "<div class='headers'> Header Info </div> <br>";
$crawler->grabHeaders(); //Grabs header info (403 to be forced soon)
echo "<br>";
/*******************
* *
* MX & NS found *
* *
*******************/
echo "<div class='headers'> Servers found! </div> <br>";
$crawler->grabServers(); //Find MX and NS server hostnames (IP TO BE INCLUDED SOON)
echo "<br>";
/*******************
* *
* Grab SSH Ver. *
* *
*******************/
echo "<div class='headers'> SSH Banner Grabbed (Don't rely on this, Nmap is more accurate)</div>";
$crawler->grabSSH($urlGiven, '22');
echo "<br>";
/*******************
* *
* Grab FTP Ver. *
* *
*******************/
echo "<div class='headers'> FTP Banner Grabbed (Don't rely on this, Nmap is more accurate)</div>";
$crawler->grabFTP($urlGiven, '21');
echo "<br>";
/*******************
* *
* Whois Results *
* *
*******************/
echo "<div class='headers'> Whois Info </div>";
$whois_response = $crawler->whois($urlGiven, 'whois.godaddy.com'); //Searches for URL given in the GoDaddy whois records
if(strlen($whois_response) < 100)
{
$crawler->errorWarn('There was an error contacting the whois database. Only GoDaddy domains available.');
}
//Catching the horrible whois server error, and making it neat.
echo "</div>";
}
//Have fun Jacek
?>
</body>
</html>[/syntax]