Jump to content
badrh0

[Perl] get_rank automatisé (Alexa)

Recommended Posts

Salam 3alaykoum,

Bon voilà j'ai écrit un petit script qui m'obtient le rang d'un site a partir du site d'Alexa, en jetant un ptit coup-d'oeil dans le code html de par exemple :

get_rank(orumdz.com) on voit ça :

Forumdz.comforumdz.com

Forumdz.com	has a  traffic rank of:	 1455,198

Mais seulement voila en visitant ledit site on se rend compte que les requetes automatisées sont payantes :eek: alors que pour mon browser elles sont gratuites :cool: alors j'ai éssayé de faire un ptit script Perl qui me donne le rank d'un site mais le problème est que je ne m'y connais pas trop en Perl, alors mon code se plante environs 1 fois sur 8 :confused: en plus pour des ranks trop nuls (genre 22,253,256) il se plante a coup sûre :confused: alors si un connaisseur peut y jeter un coup d'oeil pour voir ;) en plus ça peut servir a un tas de choses a mon avis (en tout cas moi ça me sert) pour l'instant je fais deux requetes et je les compare, le cas échéant j'en refait une troisieme ...etc , mais évidement ça multiplie le temps de travail par au moins deux :confused:

[Pardon c'est pas ordonné :D]

 

#!/usr/bin/perl

use strict;
use warnings;
use LWP::UserAgent;

sub get_rank 
{ # ------------------begin get-rank()---------------------------

my ($url) = @_;

my $ua = LWP::UserAgent->new;
$ua->agent("User-Agent=Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.5) Gecko/20061201 Firefox/3.0.0.1");
my $req =  HTTP::Request->new(GET => "http://www.alexa.com/data/details/traffic_details/".$url);
my $req_res = $ua->request($req);
my $content = $req_res->content;

my @content_2 = split(/has\s*a\s*traffic\s*rank\s*of:/, $content);
my @content_3 = split(/Traffic\s*History\s*Graph\s*for\s*$url/, $content_2[1]);
#print "\n*********************************************\n".$content_3[0]."\n*********************************************\n";
my @content_4 = split(/".*?>.*?#my @content_5 = split(/\D/, $content_3[0]); 
my @content_5 = split(/(,?)(,?)/, $content_3[0]); 

#print "identities\n";
#foreach (@content_4) {print $_."\n\n---\n\n"}
#print "nmbers\n";
#foreach (@content_5) {print $_."\n\n---\n\n"}

my @identity;
my $i = 0;
foreach (@content_4) {
		if(length($_) == 4 && $_ !~ m//) {$identity[$i] = $_; $i++;}
	     }

my @numbers;
my $j = 0;
foreach(@content_5) 
{
#	if (length($_)!=0 && $_ !~ m/&/ && $_ !~ m/,/) {$numbers[$j] = $_; $j++;}
if (length($_)!=0 && $_ !~ m/&/ && $_ ne ",") {$numbers[$j] = $_; $j++;}
}

#print "identities(clean)\n";
#foreach (@identity) {print $_." -- "}
#print "\nnmbers(clean)\n";
#foreach (@numbers) {print $_." ~~ "}
#print "\n*********************************************\n";
my %identified;
my $m=1;


sub check_if_exist 
{
my ($n) = @_;
my $exist = 0;
	    foreach my $v (values(%identified))
	    {
	       if($v eq $n) {$exist = 1;};
	    }
return $exist;
}

my $sz_numbers = @numbers;
if(@numbers != 1)
{ #------------------------------------------------------------------------------------------------------

foreach my $numb (@numbers)
{
my $pass = 0;
foreach my $id (@identity)
	{
		if ($pass != 1)
		{
			if($content_3[0] =~ m/(,?)$numb{1}(,?)) 
			{
			$identified{$m} = $id.":".$numb if(!check_if_exist($id.":".$numb));
#				$n++;
			$m++;
			$pass = 1;

			}elsif ($content_3[0] =~ m/-->(,?)$numb(,?)(,?)$numb(,?)				{
			$identified{$m} = "void".":".$numb if(!check_if_exist("void".":".$numb));
			$m++;
			$pass = 1;

#				}elsif ($content_3[0] =~ m/>$numb or $content_3[0] =~ m/$numb,\d+/ )
#				{
#				$identified{$m} = "void*".":".$numb if(!check_if_exist("void*".":".$numb));
#				$m++;
#				$pass = 1;
#				}
			}
		}
	}
}



# ------------------------------ CSS -----------------------------------

my $req2 =  HTTP::Request->new(GET => "http://client.alexa.com/common/css/scramble.css");
my $req_res2 = $ua->request($req2);
my $css_content = $req_res2->content;
my @css_content_2 = split(/\s{\ndisplay: none\n}\n\./, $css_content);
my $sz = @css_content_2;
my @temp0 = split(/\./, $css_content_2[0]);
$css_content_2[0] = $temp0[1];
my @tempL = split(" {", $css_content_2[$sz-1]);
$css_content_2[$sz-1] = $tempL[0];
#foreach (@css_content_2) {print $_."\n";}

#    while( my ($key,$val) = each(%identified) )
#    {
#       foreach (@css_content_2) {
#       				if ($val =~ m/$_/ ) {$key = 0; print "okkkkkkkkkkkkkkkk\n";} 
#       				}
#    }

   foreach my $k (values(%identified))
   {
      foreach (@css_content_2) {
      				if($k =~ /$_/){$k = "0:0";}
      				}
   }

   foreach my $k (keys(%identified))
   {
#       print "Clef=$k Valeur=$identified{$k}\n";
   }

my $rank;
my @identifiedK = keys(%identified);
#my @identifiedV = values(%identified);
my $size_identified = @identifiedK;

for(my $num=1; $num	{
my @bit_rank = split(":", $identified{$num});
$rank = $rank.$bit_rank[1] if($identified{$num} ne "0:0" );
}


#print "\n=====>".$rank."\n";
return $rank;
} #------------------------------------------------------------------------------------------------------------------

else

{
#print "one parameter\n";
my $rank = $numbers[0];
#print $rank."\n";
return $rank;
}

} # ------------------end get-rank()---------------------------

#my $tries = 1;
#sub try
#{
#my $try1= get_rank($url);
#my $try2= get_rank($url);
#if ($try1 == $try2) {return $try1."(".$tries.")";}
#else
#{$tries++; try(); }
#}
#print try()."\n";

my $url = "forumdz.com";
print get_rank($url)."\n";

 

[Ma faiblaisse c'est bien le regex :mad: je crois que le problème vient de là]

Share this post


Link to post
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.




×
×
  • Create New...