Dynadot โ€” .com Registration $8.99

Overture scrapper - perl

Spaceship Spaceship
Watch
PHP:
use strict;
use IO::Socket;
$| = 1;

# http://www.spiderninja.com/
# [email protected]

# COUNTRY CODES
# -------------
# Australia      => 'au'
# Austria        => 'at'
# Denmark        => 'dk'
# Finland        => 'fi'
# France         => 'fr'
# Germany        => 'de'
# Italia         => 'it'
# Netherlands    => 'nl'
# Norway         => 'no'
# Spain          => 'es'
# Sweden         => 'se'
# Switzerland    => 'ch'
# United Kingdom => 'uk'
# United States  => 'us'

my $mkt = "de";
my @keywords = qw(spider ninja);
my $include_count = 0;

for my $keyword (@keywords) {
    get_overture_data($mkt, $keyword, $include_count);
    sleep 1;
}

sub get_overture_data {
    my ($mkt, $keyword, $include_count) = @_;
    my $socket = IO::Socket::INET->new(PeerAddr => "inventory.overture.com",
                                       PeerPort => 80,
                                       Proto    => "tcp",
                                       Type     => SOCK_STREAM,
                                       Timeout  => 5);
    if (!$socket) {
        print "couldn't connect to overture";
        exit;
    }
    my $request = get_request($keyword, $mkt);
    print $socket $request;

    while (<$socket>) {
        # grab keyword
        if (/color=#000000>(.*)<\/a><\/td>/) {
            print "$1\n";
        }
        # grab number
        if ($include_count) {
            if (/size=1>\ย (.*)<\/td>/) {
                print "$1 - ";
            }
        }
        # special case where suggestion equals search term
        if (/color=E8E8E8>&nbsp\;(.*)<\/a><\/td>/) {
            print "$1\n";
        }
        # grab number for special case
        if ($include_count) {
            if (/color=E8E8E8>\ย (\d+)/) {
                print "$1 - ";
            }
        }
        # or...note if nothing is there
        if (/<em>(No suggestions for .*)<\/em>/) {
            print "$1\n";
        }
        last if /<\/html>/;
    }
    close $socket;
    return;
}

sub get_request {
    my ($keyword, $mkt) = @_;
    my $post_request = "mkt=$mkt&term=$keyword";
    my $length = length($post_request);

    my $http_request = qq{POST /d/searchinventory/suggestion/ HTTP/1.0
Host: inventory.overture.com
Accept: text/html, text/plain
Accept-Encoding: gzip
Accept-Language: en
User-Agent: Lynx/2.8.3rel.1 libwww-FM/2.14
Referer: http://inventory.overture.com/d/searchinventory/suggestion/
Content-type: application/x-www-form-urlencoded
Content-length: $length

$post_request};

    return $http_request;
}

nice tool that does what you want it to do ;)
 
0
•••
The views expressed on this page by users and staff are their own, not those of NamePros.
AfternicAfternic
Dynadot โ€” .com Registration $8.99Dynadot โ€” .com Registration $8.99
Unstoppable Domains
Domain Recover
DomainEasy โ€” Live Options
  • The sidebar remains visible by scrolling at a speed relative to the pageโ€™s height.
Back