pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 803e8d2efcef144dac2241ee1967eee985b7b5c2
parent 2c131cebaeefb12fbfb3acbf67cca9bfde2cef42
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Mon,  3 Nov 2014 22:10:15 -0700

product_scraper: unify email and verbose output

Diffstat:
Mproduct_scraper.pl | 45+++++++++++++++++++++------------------------
1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/product_scraper.pl b/product_scraper.pl @@ -34,12 +34,10 @@ my %product_map = ("televisions" => "Televisions", "laptops" => "LaptopsNotebooks", "hard_drives" => "HardDrives"); -vprint("$vendor:\n"); - my $email; -$email .= "*** $vendor ***\n\n"; -$email .= "type scraped total new time\n"; -$email .= "------------ ------- ----- --- ----\n"; +tee("$vendor\n"); +tee("=") for (1..length $vendor); +tee("\n\n"); my $product_sth = $dbh->prepare("select * from products where part_num = ?"); @@ -51,11 +49,7 @@ my $insert_sth = $dbh->prepare($qry); $qry = "update products set last_seen = ? where part_num = ?"; my $update_sth = $dbh->prepare($qry); -my @new = (); for (keys %product_map) { - - $email .= sprintf("%-15s ", $_); - my $class_url = "http://www.memoryexpress.com/Category/" . "$product_map{$_}?PageSize=120&Page="; my $dom = get_dom($class_url . "1", $ua); @@ -79,7 +73,7 @@ for (keys %product_map) { push @thumbnails, $dom->find(".PIV_Regular")->html_array(); } - vprint("$_: found " . @thumbnails . " products\n"); + tee("*** $_ (" . @thumbnails . ") ***\n"); my $new = 0; my $old = 0; @@ -119,27 +113,24 @@ for (keys %product_map) { $product_sth->execute($part_num); if ($product_sth->fetchrow_arrayref()) { $update_sth->execute(time, $part_num); - vprint(" "); + vprint(" ($part_num) $brand $description\n"); $old++; } else { $insert_sth->execute($part_num, $brand, $description, $_, time, time, 0); - push @new, ([$_, $brand, $description, $part_num]); - vprint("+ "); + tee("+ ($part_num) $brand $description\n"); $new++; } - - vprint("($part_num) $brand $description\n"); } - $email .= sprintf("%7s %5s %3s %4s\n", - $new + $old, scalar @thumbnails, $new, time - $start); + tee("\n"); + tee("scraped total new time\n"); + tee("------- ----- --- ----\n"); + tee(sprintf("%7s %5s %3s %4s s\n", + $new + $old, scalar @thumbnails, $new, time - $start)); } -$email .= "\nNew products:\n" if (@new); -$email .= "- ($_->[0]) $_->[1] $_->[2] $_->[3]\n" for (@new); - $product_sth->finish(); $dbh->disconnect(); @@ -147,12 +138,10 @@ my $e_mail = Email::Simple->create( header => [ From => "Santa Claus <sc\@np.com>", To => $cfg->{general}{email}, - Subject => "PriceChart product scrape, (" . @new . " new)", + Subject => "PriceChart product scrape", ], body => $email); -vprint($e_mail->as_string()); - my $sender = Email::Send->new({mailer => 'SMTP'}); $sender->mailer_args([Host => $cfg->{general}{smtp}]); $sender->send($e_mail->as_string()) || print "Couldn't send email\n"; @@ -166,8 +155,16 @@ sub get_tag_text if (!defined $field || $field eq "" ) { vprint("could not find $tag, html was:\n"); vprint($dom->html()); - vprint("\n"); + vprint("\n\n"); return undef; } return $field; } + +sub tee +{ + my $line = shift; + + vprint($line); + $email .= $line; +}