pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit a91b73864be2e16bf13751aafe8afeed45d79667
parent afa8a9b26f462c39cc3c6d6c443e50526e9c0772
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun, 12 Oct 2014 21:53:55 -0600

scraper: add last_scraped field

Use it to sequentially scrape products instead of relying on times.

Diffstat:
Mprice_scraper.pl | 13+++++++++----
Mproduct_scraper.pl | 11++++++-----
2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/price_scraper.pl b/price_scraper.pl @@ -32,10 +32,15 @@ if ($args{p}) { $part_no = $args{p}; } else { - my $results = $dbh->selectcol_arrayref("select part_num from products"); - # sequentially pick one product every hour - my $index = (time / 3600) % scalar(@$results); - $part_no = $results->[$index]; + my $results = $dbh->selectcol_arrayref("select part_num from products " . + "order by last_scraped asc"); + if (scalar $results == 0) { + print "Product table empty, run product_scraper.pl\n"; + exit; + } + $part_no = $results->[0]; + $dbh->do("update products set last_scraped = ? where part_num = ?", + undef, time, $part_no); } $dbh->do("create table if not exists prices(" . diff --git a/product_scraper.pl b/product_scraper.pl @@ -33,7 +33,8 @@ $dbh->do("create table if not exists products(" . "title text, " . "type text, " . "first_seen int, " . - "last_seen int)") or die $DBI::errstr; + "last_seen int, " . + "last_scraped int)") or die $DBI::errstr; my $ua = LWP::UserAgent->new(agent => $cfg->{general}{user_agent}); $ua->default_header("Accept" => "*/*"); @@ -117,10 +118,10 @@ for (keys %product_map) { # also update title, brand here? } else { - $dbh->do("insert into products(" . - "part_num, brand, title, type, first_seen, last_seen)" . - " values (?, ?, ?, ?, ?, ?)", - undef, $part_num, $brand, $title, $_, time, time); + $dbh->do("insert into products(part_num, brand, title," . + "type, first_seen, last_seen, last_scraped) " . + "values (?, ?, ?, ?, ?, ?, ?)", undef, + $part_num, $brand, $title, $_, time, time, 0); #$dbh->do("create table [$part_num]" . # "(unix_time int not null primary key)"); push @new, ([$_, $brand, $title, $part_num]);