commit 6976be2e21ef3d29016b38fa96bedd08cc976d28
parent d33785138ad65d3b05182fc314868acad23e4e45
Author: Kyle R W Milz <kyle@getaddrinfo.net>
Date: Wed, 13 Aug 2014 22:32:47 -0600
product_scraper: send reports via email
Diffstat:
1 file changed, 49 insertions(+), 7 deletions(-)
diff --git a/product_scraper.pl b/product_scraper.pl
@@ -5,6 +5,8 @@ use warnings;
use Data::Dumper;
use DBI;
+use Email::Simple;
+use Email::Send;
use Getopt::Std;
use HTML::Grabber;
use LWP::Simple;
@@ -40,15 +42,27 @@ $dbh->do("create table if not exists products(" .
my $ua = LWP::UserAgent->new(agent => $cfg->{general}{user_agent});
$ua->default_header("Accept" => "*/*");
+my $email;
+
#
# Memory Express
#
+
+$dbh->do("create table if not exists [Memory Express](" .
+ "date int not null primary key)");
+
my %product_map = ("televisions" => "Televisions",
"laptops" => "LaptopsNotebooks",
"hard_drives" => "HardDrives");
+
+$email .= "*** Memory Express ***\n\n";
+$email .= "product type scraped total new\n";
+$email .= "------------ ------- ----- ---\n";
+
+my @new = ();
for (keys %product_map) {
- print "*** $_ ***\n";
+ $email .= sprintf("%-15s ", "$_:");
my $class_url = "http://www.memoryexpress.com/Category/" .
"$product_map{$_}?PageSize=120&Page=";
@@ -74,7 +88,6 @@ for (keys %product_map) {
}
my $scraped = 0;
- my @new_products = ();
for my $node (@results) {
my $product = HTML::Grabber->new(html => $node);
@@ -105,6 +118,7 @@ for (keys %product_map) {
if ($sth->fetchrow_array()) {
$dbh->do("update products set last_seen = ? where part_num = ?",
undef, time, $part_num);
+ # also update title, brand here?
}
else {
$dbh->do("insert into products(" .
@@ -113,21 +127,49 @@ for (keys %product_map) {
undef, $part_num, $brand, $title, $_, time, time);
#$dbh->do("create table [$part_num]" .
# "(unix_time int not null primary key)");
- push @new_products, ([$brand, $title, $part_num]);
+ push @new, ([$_, $brand, $title, $part_num]);
}
$scraped++;
last;
}
- print "scraped/total: $scraped/" . @results . "\n";
- print "new: " . scalar @new_products . "\n";
- print " - $_->[0] $_->[1] $_->[2]\n" for (@new_products);
- print "\n";
+ $email .= sprintf("%7s %5s %3s\n", $scraped, scalar @results,
+ scalar @new);
+ # $email .= "scraped/total: $scraped/" . @results . "\n";
+ # $email .= "new: " . scalar @new_products . "\n";
+
+ next;
+
+ my $sth = $dbh->prepare("select * from [Memory Express]");
+ my @columns = @{$sth->{NAME}};
+ for my $column (@columns) {
+ next if ($column ne $_);
+ }
+ $dbh->do("alter table [Memory Express] add column $_");
}
+$email .= "\nNew products:\n" if (@new);
+$email .= "- ($_->[0]) $_->[1] $_->[2] $_->[3]\n" for (@new);
+$email .= "\n";
+
$dbh->disconnect();
+my $date = strftime "%d/%m/%Y", localtime;
+my $e_mail = Email::Simple->create(
+ header => [
+ From => "Santa Claus <sc\@np.com>",
+ To => "kyle\@getaddrinfo.net",
+ Subject => "PriceChart product scrape $date",
+ ],
+ body => $email);
+
+print $e_mail->as_string();
+
+my $sender = Email::Send->new({mailer => 'SMTP'});
+$sender->mailer_args([Host => 'smtp.getaddrinfo.net']);
+$sender->send($e_mail->as_string());
+
#for (keys %title_dict) {
# print "$_ " if ($title_dict{$_} / $total_titles >= 0.5);
#}