pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 20683a56ca80638aae900fc67d5bf72bd7f3486d
parent 7ee98c25b29cb6107f6b1385e54e4f9b17c6ad7f
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Thu,  2 Apr 2015 19:46:27 -0600

price_scraper: general cleanup

- use qq
- remove -m -p args
- select both manufacturer and part num
- add product type to log

Diffstat:
Mprice_scraper | 54++++++++++++++++++++++++++++--------------------------
1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/price_scraper b/price_scraper @@ -16,7 +16,7 @@ use URI::Escape; my %args; -getopts("m:np:v", \%args); +getopts("nv", \%args); $| = 1 if ($args{v}); @@ -30,19 +30,18 @@ my $log = get_log($log_path, $args{v}); # allow products to go out of stock. if we haven't seen them for > 30 days # chances are retailers aren't carrying them anymore my $cutoff = time - (30 * 24 * 60 * 60); -my $sql = "select part_num, manufacturer from products " . +my $sql = "select part_num, manufacturer, type from products " . "where last_seen > $cutoff order by last_scraped asc"; -my ($part_num, $manufacturer) = $dbh->selectrow_array($sql); +my ($part_num, $manufacturer, $type) = $dbh->selectrow_array($sql); -# prevent races with other scrapers, claim ownership as soon as possible -$dbh->do("update products set last_scraped = ? where part_num = ?", - undef, time, $part_num); - -if ($args{p} && $args{m}) { - $part_num = $args{p}; - $manufacturer = $args{m}; +unless (defined $part_num && defined $manufacturer) { + print "error: no parts have been seen in the last $cutoff s"; + exit 1; } -exit unless (defined $part_num); + +# prevent races with other scrapers, claim ownership as soon as possible +$dbh->do("update products set last_scraped = ? where part_num = ? and manufacturer = ?", + undef, time, $part_num, $manufacturer); $dbh->do(qq{ create table if not exists retailers( @@ -51,17 +50,18 @@ $dbh->do(qq{ url text not null) }) or die $DBI::errstr; -$dbh->do("create table if not exists prices(" . - "date int not null, " . - "manufacturer text not null, " . - "part_num text not null, " . - "retailer text not null, " . - "price int not null, " . - "duration int, " . - "primary key(date, part_num, retailer, price), " . - "foreign key(manufacturer, part_num) references products(manufacturer, part_num), " . - "foreign key(retailer) references retailers(name))" -) or die $DBI::errstr; +$dbh->do(qq{ + create table if not exists prices( + date int not null, + manufacturer text not null, + part_num text not null, + retailer text not null, + price int not null, + duration int, + primary key(date, part_num, retailer, price), + foreign key(manufacturer, part_num) references products(manufacturer, part_num), + foreign key(retailer) references retailers(name)) +}) or die $DBI::errstr; print "info: scraping $manufacturer $part_num\n" if ($args{v}); @@ -69,7 +69,7 @@ $sql = "insert into prices(date, manufacturer, part_num, retailer, price, durati "values (?, ?, ?, ?, ?, ?)"; my $prices_sth = $dbh->prepare($sql); -$sql = "update products set last_seen = ? where part_num = ?"; +$sql = "update products set last_seen = ? where part_num = ? and manufacturer = ?"; my $products_sth = $dbh->prepare($sql); $sql = "insert or replace into retailers(name, color, url) values (?, ?, ?)"; @@ -135,18 +135,20 @@ for my $retailer (sort keys %{$cfg->{retailers}}) { $retailer_sth->execute($retailer, $color, $url); $prices_sth->execute($start, $manufacturer, $part_num, $retailer, $price, time - $retailer_start); - $products_sth->execute($start, $part_num); + $products_sth->execute($start, $part_num, $manufacturer); $descriptions_sth->execute($manufacturer, $part_num, $retailer, $desc, time); print "info: $retailer: db: inserted \$$price\n" if ($args{v}); } -printf $log "%s %-10s %-20s [%s] (%i s)\n", $timestamp, $manufacturer, - $part_num, join("", @status), time - $start; +printf $log "%s %-10s %-10s %-20s [%s] (%i s)\n", $timestamp, $type, + $manufacturer, $part_num, join("", @status), time - $start; $log->close(); +$retailer_sth = undef; $prices_sth = undef; $products_sth = undef; +$descriptions_sth = undef; $dbh->disconnect(); exit 0;