MemoryExpress.pm (3825B)
1 package PS::MemoryExpress; 2 use strict; 3 4 use HTML::Grabber; 5 use Log::Log4perl qw(:easy); 6 use URI::Escape; 7 8 use PS::Database; 9 use PS::UserAgent; 10 11 my $logger = Log::Log4perl::get_logger('pricesloth.memory_express'); 12 13 14 sub new { 15 my ($class) = @_; 16 17 my $self = { 18 color => "#56B849", 19 url => "http://www.memoryexpress.com/Search/Products?Search=", 20 ua => PS::UserAgent->new(), 21 db => PS::Database->new() 22 }; 23 24 bless ($self, $class); 25 $logger->debug("new(): success"); 26 27 # XXX: make sure row in retailer table is created 28 29 return $self; 30 } 31 32 sub create_search { 33 my ($self, $manufacturer, $part_num) = @_; 34 35 # As learned in the Seagate ST8000AS0002 case searching for manufacturer 36 # concatenated to part num will hide valid search results. 37 # Instead search only for part number. We'll have to deal with thumbnail 38 # view return vs a full page product. 39 return $self->{url} . uri_escape($part_num); 40 } 41 42 sub scrape_part_num { 43 my ($self, $resp) = @_; 44 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 45 46 # Product part number is inside of this div id 47 my $product_add = $dom->find("#ProductAdd")->text(); 48 my ($part_num) = ($product_add =~ m/Part #:\s*(.*)\r/); 49 return $part_num; 50 } 51 52 sub scrape_price { 53 my ($self, $resp) = @_; 54 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 55 56 my $grand_total_tag = $dom->find(".GrandTotal")->text(); 57 # -> text() does not trim all whitespace 58 $grand_total_tag =~ s/^\s+//; 59 $grand_total_tag =~ s/\s+$//; 60 61 # Try and match a dollars dot cents format with leeway for comma 62 # separated digits. 63 # This also remove the "Only" text right beside the price. 64 my ($price, @others) = ($grand_total_tag =~ m/(\d[\d,]+.\d\d)/); 65 $logger->warn("memexp: found more than 1 price") if (@others); 66 67 # Remove any commas we may have matched earlier 68 $price =~ s/,//; 69 70 return ($price, @others); 71 } 72 73 sub scrape_description { 74 my ($self, $resp) = @_; 75 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 76 77 # Product page description is inside <h1> tags 78 my $description = $dom->find(".PDH_HeaderBlock h1")->text(); 79 return $description; 80 } 81 82 sub find_product_page { 83 my ($self, $resp) = @_; 84 my $ua = $self->{ua}; 85 86 my $uri = $resp->base; 87 if ($uri =~ /.*\/Products\/.*/) { 88 # We landed on the product page directly, great. 89 return ($resp); 90 } 91 elsif ($uri =~ /.*\/Search\/.*/) { 92 # We landed on the search page. 93 my $dom = HTML::Grabber->new( html => $resp->decoded_content ); 94 95 # We're only going to search the first page of results 96 my ($first_result, @others) = $dom->find('.PIV_Regular')->html_array(); 97 return unless ($first_result); 98 99 my $thumb_dom = HTML::Grabber->new( html => $first_result ); 100 my $product_id = $thumb_dom->find(".ProductId")->text(); 101 return unless ($product_id); 102 103 my $product_url = "http://www.memoryexpress.com/Products/" . $product_id; 104 105 $resp = $ua->get_dom($product_url); 106 return unless $resp->is_success; 107 108 return ($resp, @others); 109 } 110 else { 111 $logger->error("find_product_page(): unexpected search URI '$uri'"); 112 return; 113 } 114 } 115 116 sub scrape { 117 my ($self, $manufacturer, $part_num) = @_; 118 my $ua = $self->{ua}; 119 my $db = $self->{db}; 120 my $start = time; 121 122 my $search = $self->create_search($manufacturer, $part_num); 123 return unless ($search); 124 125 my $resp = $ua->get_dom($search); 126 return unless ($resp->is_success); 127 128 # Searching can sometimes take you to different places 129 ($resp) = $self->find_product_page($resp); 130 return unless ($resp); 131 132 # my $part_num = $self->scrape_part_num($resp); 133 my ($price) = $self->scrape_price($resp); 134 my $desc = $self->scrape_description($resp); 135 136 $db->insert_price($manufacturer, $part_num, "Memory Express", $price, time - $start); 137 $db->insert_descr($manufacturer, $part_num, "Memory Express", $desc) if ($desc); 138 139 $logger->debug("scrape_price(): added price \$$price\n"); 140 return $price; 141 }