pricecharts

track prices of consumer electronics
Log | Files | Refs | README

BestBuy.pm (3462B)


      1 package PS::BestBuy;
      2 use strict;
      3 
      4 use HTML::Grabber;
      5 use Log::Log4perl qw(:easy);
      6 use URI::Escape;
      7 
      8 use PS::Database;
      9 use PS::UserAgent;
     10 
     11 my $logger = get_logger('pricesloth.best_buy');
     12 
     13 sub new {
     14 	my ($class) = @_;
     15 
     16 	my $self = {
     17 		color => "#003B64",
     18 		url => "http://www.bestbuy.ca/Search/SearchResults.aspx?query=",
     19 		ua => PS::UserAgent->new(),
     20 		db => PS::Database->new()
     21 	};
     22 
     23 	bless ($self, $class);
     24 	$logger->debug("new(): success");
     25 
     26 	# XXX: make sure row in retailer table is created
     27 
     28 	return $self;
     29 }
     30 
     31 sub create_search {
     32 	my ($self, $manufacturer, $part_num) = @_;
     33 
     34 	return $self->{url} . uri_escape("$manufacturer $part_num");
     35 }
     36 
     37 sub scrape_part_num {
     38 	my ($self, $resp) = @_;
     39 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     40 
     41 	# Part number is inside this ridiculous tag. Seems to be page unique
     42 	# too.
     43 	my $part_num = $dom->find("#ctl00_CP_ctl00_PD_lblModelNumber")->text();
     44 	return $part_num;
     45 }
     46 
     47 sub scrape_description {
     48 	my ($self, $resp) = @_;
     49 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     50 
     51 	my $title = $dom->find("#ctl00_CP_ctl00_PD_lblProductTitle")->text();
     52 	# Part number is at the end, regex that out
     53 	my ($descr) = ($title =~ /(.*) \(.+\)/);
     54 	return $descr;
     55 }
     56 
     57 sub scrape_price {
     58 	my ($self, $resp) = @_;
     59 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     60 
     61 	my $price = $dom->find(".price-wrapper .prodprice")->text();
     62 	$price =~ s/^\s+//;
     63 	$price =~ s/\s+$//;
     64 	# Remove dollar sign and any commas between digits
     65 	$price =~ s/^\$//;
     66 	$price =~ s/,//;
     67 
     68 	return $price;
     69 }
     70 
     71 sub find_product_page {
     72 	my ($self, $resp) = @_;
     73 	my $ua = $self->{ua};
     74 
     75 	my $product_url = "http://www.bestbuy.ca/en-CA/product/";
     76 	my $search_url = "http://www.bestbuy.ca/Search/SearchResults.aspx?";
     77 	# The search url has "//" characters that need to be escaped before
     78 	# being used in regular expressions
     79 	$search_url = quotemeta $search_url;
     80 	$product_url = quotemeta $product_url;
     81 
     82 	my $uri = $resp->base;
     83 	if ($uri =~ /$product_url/) {
     84 		# We landed on the product page directly, great.
     85 		return ($resp);
     86 	}
     87 	elsif ($uri =~ m/$search_url/) {
     88 		# We landed on the search page.
     89 		my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     90 
     91 		my ($first_result, @others) = $dom->find(".listing-items .listing-item")->html_array();
     92 		return unless $first_result;
     93 
     94 		my $first_dom = HTML::Grabber->new( html => $first_result );
     95 		my $product_url = $first_dom->find(".prod-title a")->attr("href");
     96 
     97 		my $base_url = "http://www.bestbuy.ca";
     98 		my $resp = $ua->get_dom($base_url . $product_url);
     99 		return unless $resp->is_success;
    100 
    101 		return ($resp, @others);
    102 	}
    103 	else {
    104 		$logger->error("find_product_page(): unexpected search URI '$uri'");
    105 		return;
    106 	}
    107 }
    108 
    109 sub scrape {
    110 	my ($self, $manufacturer, $part_num) = @_;
    111 	my $ua = $self->{ua};
    112 	my $db = $self->{db};
    113 	my $start = time;
    114 
    115 	my $search = $self->create_search($manufacturer, $part_num);
    116 	my $resp = $ua->get_dom($search);
    117 	return unless ($resp->is_success);
    118 
    119 	# Searching can sometimes take you to different places
    120 	($resp) = $self->find_product_page($resp);
    121 	return unless ($resp);
    122 
    123 	# my $part_num = $self->scrape_part_num($resp);
    124 	my ($price) = $self->scrape_price($resp);
    125 	my $desc = $self->scrape_description($resp);
    126 
    127 	$db->insert_price($manufacturer, $part_num, "Best Buy", $price, time - $start);
    128 	$db->insert_descr($manufacturer, $part_num, "Besy Buy", $desc) if ($desc);
    129 
    130 	$logger->debug("scrape_price(): added price \$$price\n");
    131 	return $price;
    132 }