BestBuy.pm (3462B)
1 package PS::BestBuy; 2 use strict; 3 4 use HTML::Grabber; 5 use Log::Log4perl qw(:easy); 6 use URI::Escape; 7 8 use PS::Database; 9 use PS::UserAgent; 10 11 my $logger = get_logger('pricesloth.best_buy'); 12 13 sub new { 14 my ($class) = @_; 15 16 my $self = { 17 color => "#003B64", 18 url => "http://www.bestbuy.ca/Search/SearchResults.aspx?query=", 19 ua => PS::UserAgent->new(), 20 db => PS::Database->new() 21 }; 22 23 bless ($self, $class); 24 $logger->debug("new(): success"); 25 26 # XXX: make sure row in retailer table is created 27 28 return $self; 29 } 30 31 sub create_search { 32 my ($self, $manufacturer, $part_num) = @_; 33 34 return $self->{url} . uri_escape("$manufacturer $part_num"); 35 } 36 37 sub scrape_part_num { 38 my ($self, $resp) = @_; 39 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 40 41 # Part number is inside this ridiculous tag. Seems to be page unique 42 # too. 43 my $part_num = $dom->find("#ctl00_CP_ctl00_PD_lblModelNumber")->text(); 44 return $part_num; 45 } 46 47 sub scrape_description { 48 my ($self, $resp) = @_; 49 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 50 51 my $title = $dom->find("#ctl00_CP_ctl00_PD_lblProductTitle")->text(); 52 # Part number is at the end, regex that out 53 my ($descr) = ($title =~ /(.*) \(.+\)/); 54 return $descr; 55 } 56 57 sub scrape_price { 58 my ($self, $resp) = @_; 59 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 60 61 my $price = $dom->find(".price-wrapper .prodprice")->text(); 62 $price =~ s/^\s+//; 63 $price =~ s/\s+$//; 64 # Remove dollar sign and any commas between digits 65 $price =~ s/^\$//; 66 $price =~ s/,//; 67 68 return $price; 69 } 70 71 sub find_product_page { 72 my ($self, $resp) = @_; 73 my $ua = $self->{ua}; 74 75 my $product_url = "http://www.bestbuy.ca/en-CA/product/"; 76 my $search_url = "http://www.bestbuy.ca/Search/SearchResults.aspx?"; 77 # The search url has "//" characters that need to be escaped before 78 # being used in regular expressions 79 $search_url = quotemeta $search_url; 80 $product_url = quotemeta $product_url; 81 82 my $uri = $resp->base; 83 if ($uri =~ /$product_url/) { 84 # We landed on the product page directly, great. 85 return ($resp); 86 } 87 elsif ($uri =~ m/$search_url/) { 88 # We landed on the search page. 89 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 90 91 my ($first_result, @others) = $dom->find(".listing-items .listing-item")->html_array(); 92 return unless $first_result; 93 94 my $first_dom = HTML::Grabber->new( html => $first_result ); 95 my $product_url = $first_dom->find(".prod-title a")->attr("href"); 96 97 my $base_url = "http://www.bestbuy.ca"; 98 my $resp = $ua->get_dom($base_url . $product_url); 99 return unless $resp->is_success; 100 101 return ($resp, @others); 102 } 103 else { 104 $logger->error("find_product_page(): unexpected search URI '$uri'"); 105 return; 106 } 107 } 108 109 sub scrape { 110 my ($self, $manufacturer, $part_num) = @_; 111 my $ua = $self->{ua}; 112 my $db = $self->{db}; 113 my $start = time; 114 115 my $search = $self->create_search($manufacturer, $part_num); 116 my $resp = $ua->get_dom($search); 117 return unless ($resp->is_success); 118 119 # Searching can sometimes take you to different places 120 ($resp) = $self->find_product_page($resp); 121 return unless ($resp); 122 123 # my $part_num = $self->scrape_part_num($resp); 124 my ($price) = $self->scrape_price($resp); 125 my $desc = $self->scrape_description($resp); 126 127 $db->insert_price($manufacturer, $part_num, "Best Buy", $price, time - $start); 128 $db->insert_descr($manufacturer, $part_num, "Besy Buy", $desc) if ($desc); 129 130 $logger->debug("scrape_price(): added price \$$price\n"); 131 return $price; 132 }