commit 82dc4aac57c151a36e3a26b9a76e47207ee11731
parent 460c57ff20cb93e63ab03a40050b022069cb63b5
Author: Kyle R W Milz <kyle@getaddrinfo.net>
Date:   Tue, 12 Aug 2014 23:43:53 -0600
price_scraper: move get_dom into shared perl module
Diffstat:
2 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/Shared.pm b/Shared.pm
@@ -0,0 +1,22 @@
+#!/usr/bin/env perl
+
+package Shared;
+use Exporter;
+
+@ISA = ("Exporter");
+@EXPORT = ("get_dom");
+
+sub get_dom
+{
+	my $url = shift;
+	my $ua = shift;
+
+	my $resp = $ua->get($url);
+	if (! $resp->is_success) {
+		print "getting $url failed: " . $resp->status_line . "\n";
+		return undef;
+	}
+	return HTML::Grabber->new(html => $resp->decoded_content);
+}
+
+1;
diff --git a/product_scraper.pl b/product_scraper.pl
@@ -10,6 +10,7 @@ use Getopt::Std;
 use JSON;
 use HTML::Grabber;
 use LWP::Simple;
+use Shared;
 use POSIX;
 
 
@@ -53,7 +54,7 @@ for (keys %product_map) {
 
 	my $class_url = "http://www.memoryexpress.com/Category/" .
 		"$product_map{$_}?PageSize=120&Page=";
-	my $dom = get_dom($class_url . "1");
+	my $dom = get_dom($class_url . "1", $ua);
 	return if (! defined $dom);
 
 	$dom = $dom->find(".AJAX_List_Pager");
@@ -67,7 +68,7 @@ for (keys %product_map) {
 
 	my @results;
 	for (1..$pages) {
-		$dom = get_dom($class_url . "$_");
+		$dom = get_dom($class_url . "$_", $ua);
 		return if (! defined $dom);
 
 		# $dom->filter(".AJAX_List_Body");
@@ -93,7 +94,7 @@ for (keys %product_map) {
 		next if (not_defined($product_id, "product ID", $node));
 
 		my $product_url = "http://www.memoryexpress.com/Products/";
-		my $product_dom = get_dom("$product_url$product_id");
+		my $product_dom = get_dom("$product_url$product_id", $ua);
 
 		# part number only found on product page
 		my $part_num = $product_dom->find("#ProductAdd")->text;
@@ -164,15 +165,3 @@ sub not_defined
 	}
 	return 0;
 }
-
-sub get_dom
-{
-	my $url = shift;
-
-	my $resp = $ua->get($url);
-	if (! $resp->is_success) {
-		print STDERR "getting $url failed: " . $resp->status_line . "\n";
-		return undef;
-	}
-	return HTML::Grabber->new(html => $resp->decoded_content);
-}