pricecharts

track prices of consumer electronics
Log | Files | Refs | README

LondonDrugs.pm (3918B)


      1 package PS::LondonDrugs;
      2 use strict;
      3 
      4 use HTML::Grabber;
      5 use Log::Log4perl qw(:easy);
      6 use URI::Escape;
      7 
      8 use PS::Database;
      9 use PS::UserAgent;
     10 
     11 my $logger = get_logger('pricesloth.london_drugs');
     12 
     13 sub new {
     14 	my ($class) = @_;
     15 
     16 	my $self = {
     17 		color => "#005DAB",
     18 		url => "http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q=",
     19 		ua => PS::UserAgent->new(),
     20 		db => PS::Database->new()
     21 	};
     22 
     23 	bless ($self, $class);
     24 	$logger->debug("new(): success");
     25 
     26 	# XXX: make sure row in retailer table is created
     27 
     28 	return $self;
     29 }
     30 
     31 sub create_search {
     32 	my ($self, $manufacturer, $part_num) = @_;
     33 
     34 	# London drugs search looks like it work well when both manufacturer and
     35 	# part number are given.
     36 	return $self->{url} . uri_escape("$manufacturer $part_num");
     37 }
     38 
     39 sub scrape_part_num {
     40 	my ($self, $resp) = @_;
     41 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     42 
     43 	my ($title) = $dom->find(".productname")->text_array();
     44 	my ($part_num) = ($title =~ m/.* - (.*)\r/);
     45 	return $part_num;
     46 }
     47 
     48 sub scrape_description {
     49 	my ($self, $resp) = @_;
     50 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     51 
     52 	my ($title) = $dom->find(".productname")->text_array();
     53 	my ($descr) = ($title =~ m/^\s+(.*) - .*\r/);
     54 	return $descr;
     55 }
     56 
     57 sub scrape_price {
     58 	my ($self, $resp) = @_;
     59 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     60 
     61 	# There are many .salesprice tags on the page but only one is inside of
     62 	# .productpricing which is the main product on the page.
     63 	my $price_container = $dom->find(".productpricing .salesprice")->text();
     64 	$price_container =~ s/^\s+//;
     65 	$price_container =~ s/\s+$//;
     66 
     67 	# Try and match a dollars dot cents format with leeway for comma
     68 	# separated digits.
     69 	my ($price, @others) = ($price_container =~ m/(\d[\d,]+.\d\d)/);
     70 	$logger->warn("memexp: found more than 1 price") if (@others);
     71 
     72 	# Remove any commas we may have matched earlier
     73 	$price =~ s/,//;
     74 
     75 	return ($price, @others);
     76 }
     77 
     78 sub find_product_page {
     79 	my ($self, $resp) = @_;
     80 	my $ua = $self->{ua};
     81 
     82 	my $search_url = $self->{url};
     83 	# The search url has "//" characters that need to be escaped before
     84 	# being used in regular expressions
     85 	$search_url = quotemeta $search_url;
     86 
     87 	my $uri = $resp->base;
     88 	if ($uri =~ /http:\/\/www.londondrugs.com\/.*\.html/) {
     89 		# We landed on the product page directly, great.
     90 		return ($resp);
     91 	}
     92 	elsif ($uri =~ /$search_url/) {
     93 		# We landed on the search page.
     94 		my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     95 
     96 		my ($first_result, @others) = $dom->find(".productlisting .product")->html_array();
     97 		return unless ($first_result);
     98 
     99 		my $num_total = scalar (@others) + 1;
    100 		$logger->debug("find_product_page(): found $num_total thumbnails");
    101 
    102 		# For every thumbnail there is a div with class="name" with a
    103 		# link to the product page inside
    104 		my $thumb_dom = HTML::Grabber->new( html => $first_result );
    105 		my $product_url = $thumb_dom->find(".name a")->attr('href');
    106 
    107 		$resp = $ua->get_dom($product_url);
    108 		return unless $resp->is_success;
    109 
    110 		return ($resp, @others);
    111 	}
    112 	else {
    113 		$logger->error("find_product_page(): unexpected search URI '$uri'");
    114 		return;
    115 	}
    116 }
    117 
    118 sub scrape {
    119 	my ($self, $manufacturer, $part_num) = @_;
    120 	my $ua = $self->{ua};
    121 	my $db = $self->{db};
    122 	my $start = time;
    123 
    124 	my $search = $self->create_search($manufacturer, $part_num);
    125 	my $resp = $ua->get_dom($search);
    126 	return unless ($resp->is_success);
    127 
    128 	# Searching can sometimes take you to different places
    129 	($resp) = $self->find_product_page($resp);
    130 	return unless ($resp);
    131 
    132 	# my $part_num = $self->scrape_part_num($resp);
    133 	my ($price) = $self->scrape_price($resp);
    134 	my $desc = $self->scrape_description($resp);
    135 
    136 	$db->insert_price($manufacturer, $part_num, "London Drugs", $price, time - $start);
    137 	$db->insert_descr($manufacturer, $part_num, "London Drugs", $desc) if ($desc);
    138 
    139 	$logger->debug("scrape_price(): added price \$$price\n");
    140 	return $price;
    141 }