pricecharts

track prices of consumer electronics
Log | Files | Refs | README

MemoryExpress.pm (3825B)


      1 package PS::MemoryExpress;
      2 use strict;
      3 
      4 use HTML::Grabber;
      5 use Log::Log4perl qw(:easy);
      6 use URI::Escape;
      7 
      8 use PS::Database;
      9 use PS::UserAgent;
     10 
     11 my $logger = Log::Log4perl::get_logger('pricesloth.memory_express');
     12 
     13 
     14 sub new {
     15 	my ($class) = @_;
     16 
     17 	my $self = {
     18 		color => "#56B849",
     19 		url => "http://www.memoryexpress.com/Search/Products?Search=",
     20 		ua => PS::UserAgent->new(),
     21 		db => PS::Database->new()
     22 	};
     23 
     24 	bless ($self, $class);
     25 	$logger->debug("new(): success");
     26 
     27 	# XXX: make sure row in retailer table is created
     28 
     29 	return $self;
     30 }
     31 
     32 sub create_search {
     33 	my ($self, $manufacturer, $part_num) = @_;
     34 
     35 	# As learned in the Seagate ST8000AS0002 case searching for manufacturer
     36 	# concatenated to part num will hide valid search results.
     37 	# Instead search only for part number. We'll have to deal with thumbnail
     38 	# view return vs a full page product.
     39 	return $self->{url} . uri_escape($part_num);
     40 }
     41 
     42 sub scrape_part_num {
     43 	my ($self, $resp) = @_;
     44 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     45 
     46 	# Product part number is inside of this div id
     47 	my $product_add = $dom->find("#ProductAdd")->text();
     48 	my ($part_num) = ($product_add =~ m/Part #:\s*(.*)\r/);
     49 	return $part_num;
     50 }
     51 
     52 sub scrape_price {
     53 	my ($self, $resp) = @_;
     54 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     55 
     56 	my $grand_total_tag = $dom->find(".GrandTotal")->text();
     57 	# -> text() does not trim all whitespace
     58 	$grand_total_tag =~ s/^\s+//;
     59 	$grand_total_tag =~ s/\s+$//;
     60 
     61 	# Try and match a dollars dot cents format with leeway for comma
     62 	# separated digits.
     63 	# This also remove the "Only" text right beside the price.
     64 	my ($price, @others) = ($grand_total_tag =~ m/(\d[\d,]+.\d\d)/);
     65 	$logger->warn("memexp: found more than 1 price") if (@others);
     66 
     67 	# Remove any commas we may have matched earlier
     68 	$price =~ s/,//;
     69 
     70 	return ($price, @others);
     71 }
     72 
     73 sub scrape_description {
     74 	my ($self, $resp) = @_;
     75 	my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     76 
     77 	# Product page description is inside <h1> tags
     78 	my $description = $dom->find(".PDH_HeaderBlock h1")->text();
     79 	return $description;
     80 }
     81 
     82 sub find_product_page {
     83 	my ($self, $resp) = @_;
     84 	my $ua = $self->{ua};
     85 
     86 	my $uri = $resp->base;
     87 	if ($uri =~ /.*\/Products\/.*/) {
     88 		# We landed on the product page directly, great.
     89 		return ($resp);
     90 	}
     91 	elsif ($uri =~ /.*\/Search\/.*/) {
     92 		# We landed on the search page.
     93 		my $dom = HTML::Grabber->new( html => $resp->decoded_content );
     94 
     95 		# We're only going to search the first page of results
     96 		my ($first_result, @others) = $dom->find('.PIV_Regular')->html_array();
     97 		return unless ($first_result);
     98 
     99 		my $thumb_dom = HTML::Grabber->new( html => $first_result );
    100 		my $product_id = $thumb_dom->find(".ProductId")->text();
    101 		return unless ($product_id);
    102 
    103 		my $product_url = "http://www.memoryexpress.com/Products/" . $product_id;
    104 
    105 		$resp = $ua->get_dom($product_url);
    106 		return unless $resp->is_success;
    107 
    108 		return ($resp, @others);
    109 	}
    110 	else {
    111 		$logger->error("find_product_page(): unexpected search URI '$uri'");
    112 		return;
    113 	}
    114 }
    115 
    116 sub scrape {
    117 	my ($self, $manufacturer, $part_num) = @_;
    118 	my $ua = $self->{ua};
    119 	my $db = $self->{db};
    120 	my $start = time;
    121 
    122 	my $search = $self->create_search($manufacturer, $part_num);
    123 	return unless ($search);
    124 
    125 	my $resp = $ua->get_dom($search);
    126 	return unless ($resp->is_success);
    127 
    128 	# Searching can sometimes take you to different places
    129 	($resp) = $self->find_product_page($resp);
    130 	return unless ($resp);
    131 
    132 	# my $part_num = $self->scrape_part_num($resp);
    133 	my ($price) = $self->scrape_price($resp);
    134 	my $desc = $self->scrape_description($resp);
    135 
    136 	$db->insert_price($manufacturer, $part_num, "Memory Express", $price, time - $start);
    137 	$db->insert_descr($manufacturer, $part_num, "Memory Express", $desc) if ($desc);
    138 
    139 	$logger->debug("scrape_price(): added price \$$price\n");
    140 	return $price;
    141 }