commit b90fdd53ba1da4dcdc3e0495a9e24898c9fc4fc5
parent 074d32a959c861fb3b1e0445c04e4a6ebc12f365
Author: Kyle Milz <kyle@getaddrinfo.net>
Date: Sun, 19 Oct 2014 16:14:05 -0600
price_scraper: add debugging output
Diffstat:
M | price_scraper.pl | | | 64 | +++++++++++++++++++++++++++++++++++++++++++++------------------- |
M | shared.pm | | | 23 | +++++++++++++++++++---- |
2 files changed, 64 insertions(+), 23 deletions(-)
diff --git a/price_scraper.pl b/price_scraper.pl
@@ -5,22 +5,16 @@ use warnings;
use Data::Dumper;
use DBI;
-use Getopt::Std;
use HTML::Grabber;
use POSIX;
use shared;
-my %args;
-getopts('f:np:v', \%args);
-
-my $cfg = get_config($args{f});
+my $cfg = get_config();
my $dbh = get_dbh($cfg);
my $ua = get_ua($cfg);
-$| = 1 if ($args{v});
-
open my $log, ">>", "$cfg->{general}{log_file}" or die $!;
my $part_num;
@@ -51,17 +45,20 @@ $dbh->do("create table if not exists prices(" .
print $log strftime "%b %e %Y %H:%M ", localtime;
printf $log "%-15s [", $part_num;
-print "$part_num\n" if ($args{v});
+vprint("$part_num\n");
my $date = time;
for (sort keys $cfg->{vendors}) {
my $start = time;
my $vendor = $cfg->{vendors}{$_};
- printf "%-15s ", "$_:" if ($args{v});
+ vprint("$_:\n");
my $dom = get_dom("$vendor->{search_uri}$part_num", $ua);
- next if (!defined $dom);
+ if (!defined $dom) {
+ msg("e", "error: dom");
+ next;
+ }
#if (substr($vendor->{context}, 0, 1) eq '@') {
# $vendor->{context} =~ s/@/#/;
@@ -76,23 +73,30 @@ for (sort keys $cfg->{vendors}) {
# next;
#}
- my $price = $dom->find($vendor->{reg_price})->text;
+ my $price = get_price($vendor->{"reg_price"}, $dom);
if ($vendor->{sale_price}) {
- my $sale = $dom->find($vendor->{sale_price})->text;
- $price = $sale if ($sale ne '');
+ my $sale_price = get_price($vendor->{"sale_price"}, $dom);
+ $price = $sale_price if ($sale_price ne '');
}
-
if (! $price) {
- print $log " ";
- print "\n" if ($args{v});
+ msg(" ", "error: price not found");
next;
}
- ($price) = ($price =~ m/(\d[\d,]+)/);
+ my @prices = ($price =~ m/(\d[\d,]+)/);
+ if (@prices != 1) {
+ msg("r", "error: too many regex matches: " . scalar @prices);
+ next;
+ }
+
+ $price = $prices[0];
$price =~ s/,//;
+ if ($price <= 0 || $price > 10000) {
+ msg("o", "error: price \$$price out of range");
+ next;
+ }
- print $log substr($_, 0, 1);
- printf "\$%i\n", $price if ($args{v});
+ msg(substr($_, 0, 1), "price = \$$price");
next if ($args{n});
@@ -101,6 +105,8 @@ for (sort keys $cfg->{vendors}) {
undef, $date, $part_num, $_, $price, time - $start);
$dbh->do("update products set last_seen = ? where part_num = ?",
undef, $date, $part_num);
+
+ vprint("\tdb updated\n");
}
my $duration = time - $date;
@@ -108,3 +114,23 @@ print $log "] ($duration s)\n";
close $log;
$dbh->disconnect();
+
+sub get_price
+{
+ my $dom_element = shift;
+ my $dom = shift;
+
+ my @prices = $dom->find($dom_element)->text_array();
+ vprintf("\t%s = %i\n", $dom_element, scalar @prices);
+
+ return $prices[0];
+}
+
+sub msg
+{
+ my $log_char = shift;
+ my $verbose_msg = shift;
+
+ print $log $log_char;
+ vprint("\t$verbose_msg\n");
+}
diff --git a/shared.pm b/shared.pm
@@ -3,10 +3,17 @@
package shared;
use Config::Grammar;
use Exporter;
+use Getopt::Std;
use LWP::Simple;
@ISA = ("Exporter");
-@EXPORT = ("get_dom", "get_config", "get_dbh", "get_ua");
+@EXPORT = qw(get_dom get_config get_dbh get_ua vprint vprintf %args);
+
+
+our %args;
+getopts('f:np:v', \%args);
+
+$| = 1 if ($args{v});
sub get_dom
{
@@ -23,9 +30,7 @@ sub get_dom
sub get_config
{
- my $cfg_file = shift;
-
- if (!defined $cfg_file) {
+ if (!$args{f}) {
if (-e "pricechart.cfg") {
$cfg_file = "pricechart.cfg";
} else {
@@ -77,4 +82,14 @@ sub get_ua
return $ua;
}
+sub vprint
+{
+ print $_[0] if ($args{v});
+}
+
+sub vprintf
+{
+ printf(@_) if ($args{v});
+}
+
1;