pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 82dc4aac57c151a36e3a26b9a76e47207ee11731
parent 460c57ff20cb93e63ab03a40050b022069cb63b5
Author: Kyle R W Milz <kyle@getaddrinfo.net>
Date:   Tue, 12 Aug 2014 23:43:53 -0600

price_scraper: move get_dom into shared perl module

Diffstat:
AShared.pm | 22++++++++++++++++++++++
Mproduct_scraper.pl | 19++++---------------
2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/Shared.pm b/Shared.pm @@ -0,0 +1,22 @@ +#!/usr/bin/env perl + +package Shared; +use Exporter; + +@ISA = ("Exporter"); +@EXPORT = ("get_dom"); + +sub get_dom +{ + my $url = shift; + my $ua = shift; + + my $resp = $ua->get($url); + if (! $resp->is_success) { + print "getting $url failed: " . $resp->status_line . "\n"; + return undef; + } + return HTML::Grabber->new(html => $resp->decoded_content); +} + +1; diff --git a/product_scraper.pl b/product_scraper.pl @@ -10,6 +10,7 @@ use Getopt::Std; use JSON; use HTML::Grabber; use LWP::Simple; +use Shared; use POSIX; @@ -53,7 +54,7 @@ for (keys %product_map) { my $class_url = "http://www.memoryexpress.com/Category/" . "$product_map{$_}?PageSize=120&Page="; - my $dom = get_dom($class_url . "1"); + my $dom = get_dom($class_url . "1", $ua); return if (! defined $dom); $dom = $dom->find(".AJAX_List_Pager"); @@ -67,7 +68,7 @@ for (keys %product_map) { my @results; for (1..$pages) { - $dom = get_dom($class_url . "$_"); + $dom = get_dom($class_url . "$_", $ua); return if (! defined $dom); # $dom->filter(".AJAX_List_Body"); @@ -93,7 +94,7 @@ for (keys %product_map) { next if (not_defined($product_id, "product ID", $node)); my $product_url = "http://www.memoryexpress.com/Products/"; - my $product_dom = get_dom("$product_url$product_id"); + my $product_dom = get_dom("$product_url$product_id", $ua); # part number only found on product page my $part_num = $product_dom->find("#ProductAdd")->text; @@ -164,15 +165,3 @@ sub not_defined } return 0; } - -sub get_dom -{ - my $url = shift; - - my $resp = $ua->get($url); - if (! $resp->is_success) { - print STDERR "getting $url failed: " . $resp->status_line . "\n"; - return undef; - } - return HTML::Grabber->new(html => $resp->decoded_content); -}