pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit b90fdd53ba1da4dcdc3e0495a9e24898c9fc4fc5
parent 074d32a959c861fb3b1e0445c04e4a6ebc12f365
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun, 19 Oct 2014 16:14:05 -0600

price_scraper: add debugging output

Diffstat:
Mprice_scraper.pl | 64+++++++++++++++++++++++++++++++++++++++++++++-------------------
Mshared.pm | 23+++++++++++++++++++----
2 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/price_scraper.pl b/price_scraper.pl @@ -5,22 +5,16 @@ use warnings; use Data::Dumper; use DBI; -use Getopt::Std; use HTML::Grabber; use POSIX; use shared; -my %args; -getopts('f:np:v', \%args); - -my $cfg = get_config($args{f}); +my $cfg = get_config(); my $dbh = get_dbh($cfg); my $ua = get_ua($cfg); -$| = 1 if ($args{v}); - open my $log, ">>", "$cfg->{general}{log_file}" or die $!; my $part_num; @@ -51,17 +45,20 @@ $dbh->do("create table if not exists prices(" . print $log strftime "%b %e %Y %H:%M ", localtime; printf $log "%-15s [", $part_num; -print "$part_num\n" if ($args{v}); +vprint("$part_num\n"); my $date = time; for (sort keys $cfg->{vendors}) { my $start = time; my $vendor = $cfg->{vendors}{$_}; - printf "%-15s ", "$_:" if ($args{v}); + vprint("$_:\n"); my $dom = get_dom("$vendor->{search_uri}$part_num", $ua); - next if (!defined $dom); + if (!defined $dom) { + msg("e", "error: dom"); + next; + } #if (substr($vendor->{context}, 0, 1) eq '@') { # $vendor->{context} =~ s/@/#/; @@ -76,23 +73,30 @@ for (sort keys $cfg->{vendors}) { # next; #} - my $price = $dom->find($vendor->{reg_price})->text; + my $price = get_price($vendor->{"reg_price"}, $dom); if ($vendor->{sale_price}) { - my $sale = $dom->find($vendor->{sale_price})->text; - $price = $sale if ($sale ne ''); + my $sale_price = get_price($vendor->{"sale_price"}, $dom); + $price = $sale_price if ($sale_price ne ''); } - if (! $price) { - print $log " "; - print "\n" if ($args{v}); + msg(" ", "error: price not found"); next; } - ($price) = ($price =~ m/(\d[\d,]+)/); + my @prices = ($price =~ m/(\d[\d,]+)/); + if (@prices != 1) { + msg("r", "error: too many regex matches: " . scalar @prices); + next; + } + + $price = $prices[0]; $price =~ s/,//; + if ($price <= 0 || $price > 10000) { + msg("o", "error: price \$$price out of range"); + next; + } - print $log substr($_, 0, 1); - printf "\$%i\n", $price if ($args{v}); + msg(substr($_, 0, 1), "price = \$$price"); next if ($args{n}); @@ -101,6 +105,8 @@ for (sort keys $cfg->{vendors}) { undef, $date, $part_num, $_, $price, time - $start); $dbh->do("update products set last_seen = ? where part_num = ?", undef, $date, $part_num); + + vprint("\tdb updated\n"); } my $duration = time - $date; @@ -108,3 +114,23 @@ print $log "] ($duration s)\n"; close $log; $dbh->disconnect(); + +sub get_price +{ + my $dom_element = shift; + my $dom = shift; + + my @prices = $dom->find($dom_element)->text_array(); + vprintf("\t%s = %i\n", $dom_element, scalar @prices); + + return $prices[0]; +} + +sub msg +{ + my $log_char = shift; + my $verbose_msg = shift; + + print $log $log_char; + vprint("\t$verbose_msg\n"); +} diff --git a/shared.pm b/shared.pm @@ -3,10 +3,17 @@ package shared; use Config::Grammar; use Exporter; +use Getopt::Std; use LWP::Simple; @ISA = ("Exporter"); -@EXPORT = ("get_dom", "get_config", "get_dbh", "get_ua"); +@EXPORT = qw(get_dom get_config get_dbh get_ua vprint vprintf %args); + + +our %args; +getopts('f:np:v', \%args); + +$| = 1 if ($args{v}); sub get_dom { @@ -23,9 +30,7 @@ sub get_dom sub get_config { - my $cfg_file = shift; - - if (!defined $cfg_file) { + if (!$args{f}) { if (-e "pricechart.cfg") { $cfg_file = "pricechart.cfg"; } else { @@ -77,4 +82,14 @@ sub get_ua return $ua; } +sub vprint +{ + print $_[0] if ($args{v}); +} + +sub vprintf +{ + printf(@_) if ($args{v}); +} + 1;