commit a91b73864be2e16bf13751aafe8afeed45d79667
parent afa8a9b26f462c39cc3c6d6c443e50526e9c0772
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun, 12 Oct 2014 21:53:55 -0600
scraper: add last_scraped field
Use it to sequentially scrape products instead of relying on times.
Diffstat:
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/price_scraper.pl b/price_scraper.pl
@@ -32,10 +32,15 @@ if ($args{p}) {
 	$part_no = $args{p};
 }
 else {
-	my $results = $dbh->selectcol_arrayref("select part_num from products");
-	# sequentially pick one product every hour
-	my $index = (time / 3600) % scalar(@$results);
-	$part_no = $results->[$index];
+	my $results = $dbh->selectcol_arrayref("select part_num from products " .
+	"order by last_scraped asc");
+	if (scalar $results == 0) {
+		print "Product table empty, run product_scraper.pl\n";
+		exit;
+	}
+	$part_no = $results->[0];
+	$dbh->do("update products set last_scraped = ? where part_num = ?",
+		undef, time, $part_no);
 }
 
 $dbh->do("create table if not exists prices(" .
diff --git a/product_scraper.pl b/product_scraper.pl
@@ -33,7 +33,8 @@ $dbh->do("create table if not exists products(" .
 	"title text, " .
 	"type text, " .
 	"first_seen int, " . 
-	"last_seen int)") or die $DBI::errstr;
+	"last_seen int, " .
+	"last_scraped int)") or die $DBI::errstr;
 
 my $ua = LWP::UserAgent->new(agent => $cfg->{general}{user_agent});
 $ua->default_header("Accept" => "*/*");
@@ -117,10 +118,10 @@ for (keys %product_map) {
 			# also update title, brand here?
 		}
 		else {
-			$dbh->do("insert into products(" .
-				"part_num, brand, title, type, first_seen, last_seen)" .
-				" values (?, ?, ?, ?, ?, ?)",
-				undef, $part_num, $brand, $title, $_, time, time);
+			$dbh->do("insert into products(part_num, brand, title," .
+				"type, first_seen, last_seen, last_scraped) " .
+				"values (?, ?, ?, ?, ?, ?, ?)", undef,
+				$part_num, $brand, $title, $_, time, time, 0);
 			#$dbh->do("create table [$part_num]" .
 			#	"(unix_time int not null primary key)");
 			push @new, ([$_, $brand, $title, $part_num]);