commit a91b73864be2e16bf13751aafe8afeed45d79667
parent afa8a9b26f462c39cc3c6d6c443e50526e9c0772
Author: Kyle Milz <kyle@getaddrinfo.net>
Date: Sun, 12 Oct 2014 21:53:55 -0600
scraper: add last_scraped field
Use it to sequentially scrape products instead of relying on times.
Diffstat:
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/price_scraper.pl b/price_scraper.pl
@@ -32,10 +32,15 @@ if ($args{p}) {
$part_no = $args{p};
}
else {
- my $results = $dbh->selectcol_arrayref("select part_num from products");
- # sequentially pick one product every hour
- my $index = (time / 3600) % scalar(@$results);
- $part_no = $results->[$index];
+ my $results = $dbh->selectcol_arrayref("select part_num from products " .
+ "order by last_scraped asc");
+ if (scalar $results == 0) {
+ print "Product table empty, run product_scraper.pl\n";
+ exit;
+ }
+ $part_no = $results->[0];
+ $dbh->do("update products set last_scraped = ? where part_num = ?",
+ undef, time, $part_no);
}
$dbh->do("create table if not exists prices(" .
diff --git a/product_scraper.pl b/product_scraper.pl
@@ -33,7 +33,8 @@ $dbh->do("create table if not exists products(" .
"title text, " .
"type text, " .
"first_seen int, " .
- "last_seen int)") or die $DBI::errstr;
+ "last_seen int, " .
+ "last_scraped int)") or die $DBI::errstr;
my $ua = LWP::UserAgent->new(agent => $cfg->{general}{user_agent});
$ua->default_header("Accept" => "*/*");
@@ -117,10 +118,10 @@ for (keys %product_map) {
# also update title, brand here?
}
else {
- $dbh->do("insert into products(" .
- "part_num, brand, title, type, first_seen, last_seen)" .
- " values (?, ?, ?, ?, ?, ?)",
- undef, $part_num, $brand, $title, $_, time, time);
+ $dbh->do("insert into products(part_num, brand, title," .
+ "type, first_seen, last_seen, last_scraped) " .
+ "values (?, ?, ?, ?, ?, ?, ?)", undef,
+ $part_num, $brand, $title, $_, time, time, 0);
#$dbh->do("create table [$part_num]" .
# "(unix_time int not null primary key)");
push @new, ([$_, $brand, $title, $part_num]);