pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 23913733a25d556f4d99e7856203de595c13afcb
parent f380ec22bb401f300395b5e3fe300783d0b46a34
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Mon,  3 Nov 2014 21:27:13 -0700

product_scraper: look for part number earlier

Diffstat:
Mproduct_scraper.pl | 18++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/product_scraper.pl b/product_scraper.pl @@ -29,6 +29,8 @@ $dbh->do("create table if not exists products(" . # my $vendor = "Memory Express"; +# use this to look up individual products +my $product_url = "http://www.memoryexpress.com/Products/"; my %product_map = ("televisions" => "Televisions", "laptops" => "LaptopsNotebooks", "hard_drives" => "HardDrives"); @@ -92,6 +94,14 @@ for (keys %product_map) { my $product_id = get_tag_text($thumbnail_dom, ".ProductId"); next unless (defined $product_id); + # get the part number from the product page as early as possible + my $product_dom = get_dom("$product_url$product_id", $ua); + my $part_num = get_tag_text($product_dom, "#ProductAdd"); + next unless (defined $part_num); + + ($part_num) = ($part_num =~ m/Part #:\s*(.*)\r/); + next unless (defined $part_num && $part_num ne ""); + my $description = get_tag_text($thumbnail_dom, ".ProductTitle"); next unless (defined $description); @@ -104,14 +114,6 @@ for (keys %product_map) { } next if (not_defined($brand, "brand", $thumbnail_html)); - my $product_url = "http://www.memoryexpress.com/Products/"; - my $product_dom = get_dom("$product_url$product_id", $ua); - - # part number only found on product page - my $part_num = $product_dom->find("#ProductAdd")->text(); - ($part_num) = ($part_num =~ m/Part #: (.*)\r/); - next if (not_defined($part_num, "part number", $product_dom)); - $product_sth->execute($part_num); if ($product_sth->fetchrow_arrayref()) { $update_sth->execute(time, $part_num);