pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 6439af17217b5ca593f484bb00900446de33eb26
parent e4e4ec5175e3ed373ec44ed32a6c4c02a5cadbb7
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Tue,  5 May 2015 20:57:12 -0600

PriceChart.pm: rename

Diffstat:
MMakefile | 2+-
DPriceChart.pm | 173-------------------------------------------------------------------------------
APriceSloth.pm | 173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mprice_scraper | 2+-
Mproduct_scraper | 2+-
Mps_fcgi | 2+-
Mps_html | 2+-
7 files changed, 178 insertions(+), 178 deletions(-)

diff --git a/Makefile b/Makefile @@ -11,7 +11,7 @@ HTDOCS = $(VAR)/www/htdocs DEV_BIN = /home/kyle/src/pricesloth BINS = ps_html ps_fcgi price_scraper product_scraper # WARNING stupid idiom used below if adding > 1 item to LIBS!! -LIBS = PriceChart.pm +LIBS = PriceSloth.pm HTML = tt logo pricesloth.css pricesloth.jpg install: diff --git a/PriceChart.pm b/PriceChart.pm @@ -1,173 +0,0 @@ -package PriceChart; - -use DBI; -use File::Path qw(make_path); -use Exporter; - -@ISA = ("Exporter"); -@EXPORT = qw(get_config get_dom get_log get_dbh trunc_line new_ua make_path spin); - - -sub get_config -{ - my $parser = Config::Grammar->new({ - _sections => ["general", "http", "retailers"], - general => { - _vars => [ - "agent", - "email", - "smtp", - # XXX: add simple regex validation here - "addrs", - "db_dir", - "log_dir", - ], - }, - http => { - _vars => [ - "socket", - "uid", - "gid", - "chroot", - "htdocs", - ], - }, - retailers => { - _sections => ["/[A-Za-z ]+/"], - "/[A-Za-z ]+/" => { - _vars => [ - "url", - "reg_tag", - "sale_tag", - "color", - "title" - ] - } - } - }); - - my $cfg_file = "/etc/pricesloth.cfg"; - my $cfg = $parser->parse($cfg_file) or die "error: $parser->{err}\n"; - - return $cfg; -} - -sub get_dbh -{ - my $db_dir = shift; - my $verbose = shift || undef; - - make_path($db_dir, { verbose => $verbose }); - print "info: get_dbh: opening $db_dir/db\n" if ($verbose); - my $dbh = DBI->connect( - "dbi:SQLite:dbname=$db_dir/db", - "", - "", - { RaiseError => 1 } - ) or die $DBI::errstr; - - return $dbh; -} - -sub get_dom -{ - my $url = shift; - my $ua = shift; - my $verbose = shift; - my $log = shift; - - my $resp = $ua->get($url); - if ($resp->is_success) { - my $short_url = trunc_line($url, length($resp->status_line) + 16); - print "info: get_dom: " . $resp->status_line . " $short_url\n" if ($verbose); - return HTML::Grabber->new(html => $resp->decoded_content); - } - - print $log "error: get_dom: " . $resp->status_line . " $url\n"; - return undef; -} - -sub new_ua -{ - my $cfg = shift; - my $verbose = shift || 0; - my $ua; - - # it's optional to list ip addresses to scrape on - if ($cfg->{addrs}) { - my @addresses = split(" ", $cfg->{addrs}); - my $addr = $addresses[rand @addresses]; - print "info: new_ua: using ip $addr\n" if ($verbose); - $ua = LWP::UserAgent->new(local_address => $addr); - } - else { - $ua = LWP::UserAgent->new(); - } - - $ua->default_header("Accept" => "*/*"); - $ua->default_header("Accept-Encoding" => scalar HTTP::Message::decodable()); - $ua->default_header("Accept-Charset" => "utf-8"); - $ua->default_header("Accept-Language" => "en-US"); - $ua->default_header("User-Agent" => $cfg->{agent}); - - my $headers = $ua->default_headers; - for (sort keys %$headers) { - my $header = trunc_line($headers->{$_}, length($_) + 18); - print "info: new_ua: $_ => $header\n" if ($verbose); - } - - return $ua; -} - -sub get_log -{ - my $log_path = shift || return undef; - my $verbose = shift || 0; - - # if $log_path has a / in it, make sure the path to it is made - make_path(substr($log_path, 0, rindex($log_path, '/')), { verbose => $verbose }); - - print "info: get_log: opening $log_path, append mode\n" if ($verbose); - open my $log, ">>", $log_path or die "can't open $log_path: $!"; - - if ($verbose) { - print "info: get_log: outputting to tee\n"; - open my $std_out, '>&', STDOUT or die "$!"; - - return IO::Tee->new($log, $std_out); - } - - return IO::Tee->new($log); -} - -# -# make a possibly long line fit on a single line, with ellipses -# -sub trunc_line -{ - my $line = shift || return undef; - my $prefix = shift || 0; - - # if stdout is not a tty, it's likely a log file, output everything - return $line unless (POSIX::isatty(STDOUT)); - - my ($term_width) = Term::ReadKey::GetTerminalSize(); - my $len = $term_width - $prefix - 3; - if (length($line) < $len) { - return $line; - } - - my $chopped = substr($line, 0, $len); - return $chopped . "..."; -} - -my $state = 0; -sub spin -{ - my @spin_states = ("-", "\\", "|", "/"); - - print "\b"; - print $spin_states[++$state % 4]; -} - -1; diff --git a/PriceSloth.pm b/PriceSloth.pm @@ -0,0 +1,173 @@ +package PriceSloth; + +use DBI; +use File::Path qw(make_path); +use Exporter; + +@ISA = ("Exporter"); +@EXPORT = qw(get_config get_dom get_log get_dbh trunc_line new_ua make_path spin); + + +sub get_config +{ + my $parser = Config::Grammar->new({ + _sections => ["general", "http", "retailers"], + general => { + _vars => [ + "agent", + "email", + "smtp", + # XXX: add simple regex validation here + "addrs", + "db_dir", + "log_dir", + ], + }, + http => { + _vars => [ + "socket", + "uid", + "gid", + "chroot", + "htdocs", + ], + }, + retailers => { + _sections => ["/[A-Za-z ]+/"], + "/[A-Za-z ]+/" => { + _vars => [ + "url", + "reg_tag", + "sale_tag", + "color", + "title" + ] + } + } + }); + + my $cfg_file = "/etc/pricesloth.cfg"; + my $cfg = $parser->parse($cfg_file) or die "error: $parser->{err}\n"; + + return $cfg; +} + +sub get_dbh +{ + my $db_dir = shift; + my $verbose = shift || undef; + + make_path($db_dir, { verbose => $verbose }); + print "info: get_dbh: opening $db_dir/db\n" if ($verbose); + my $dbh = DBI->connect( + "dbi:SQLite:dbname=$db_dir/db", + "", + "", + { RaiseError => 1 } + ) or die $DBI::errstr; + + return $dbh; +} + +sub get_dom +{ + my $url = shift; + my $ua = shift; + my $verbose = shift; + my $log = shift; + + my $resp = $ua->get($url); + if ($resp->is_success) { + my $short_url = trunc_line($url, length($resp->status_line) + 16); + print "info: get_dom: " . $resp->status_line . " $short_url\n" if ($verbose); + return HTML::Grabber->new(html => $resp->decoded_content); + } + + print $log "error: get_dom: " . $resp->status_line . " $url\n"; + return undef; +} + +sub new_ua +{ + my $cfg = shift; + my $verbose = shift || 0; + my $ua; + + # it's optional to list ip addresses to scrape on + if ($cfg->{addrs}) { + my @addresses = split(" ", $cfg->{addrs}); + my $addr = $addresses[rand @addresses]; + print "info: new_ua: using ip $addr\n" if ($verbose); + $ua = LWP::UserAgent->new(local_address => $addr); + } + else { + $ua = LWP::UserAgent->new(); + } + + $ua->default_header("Accept" => "*/*"); + $ua->default_header("Accept-Encoding" => scalar HTTP::Message::decodable()); + $ua->default_header("Accept-Charset" => "utf-8"); + $ua->default_header("Accept-Language" => "en-US"); + $ua->default_header("User-Agent" => $cfg->{agent}); + + my $headers = $ua->default_headers; + for (sort keys %$headers) { + my $header = trunc_line($headers->{$_}, length($_) + 18); + print "info: new_ua: $_ => $header\n" if ($verbose); + } + + return $ua; +} + +sub get_log +{ + my $log_path = shift || return undef; + my $verbose = shift || 0; + + # if $log_path has a / in it, make sure the path to it is made + make_path(substr($log_path, 0, rindex($log_path, '/')), { verbose => $verbose }); + + print "info: get_log: opening $log_path, append mode\n" if ($verbose); + open my $log, ">>", $log_path or die "can't open $log_path: $!"; + + if ($verbose) { + print "info: get_log: outputting to tee\n"; + open my $std_out, '>&', STDOUT or die "$!"; + + return IO::Tee->new($log, $std_out); + } + + return IO::Tee->new($log); +} + +# +# make a possibly long line fit on a single line, with ellipses +# +sub trunc_line +{ + my $line = shift || return undef; + my $prefix = shift || 0; + + # if stdout is not a tty, it's likely a log file, output everything + return $line unless (POSIX::isatty(STDOUT)); + + my ($term_width) = Term::ReadKey::GetTerminalSize(); + my $len = $term_width - $prefix - 3; + if (length($line) < $len) { + return $line; + } + + my $chopped = substr($line, 0, $len); + return $chopped . "..."; +} + +my $state = 0; +sub spin +{ + my @spin_states = ("-", "\\", "|", "/"); + + print "\b"; + print $spin_states[++$state % 4]; +} + +1; diff --git a/price_scraper b/price_scraper @@ -9,7 +9,7 @@ use HTML::Grabber; use IO::Tee; use List::Util qw(min); use LWP::Simple; -use PriceChart; +use PriceSloth; use POSIX; use Term::ReadKey; use URI::Escape; diff --git a/product_scraper b/product_scraper @@ -10,7 +10,7 @@ use Getopt::Std; use HTML::Grabber; use IO::Tee; use LWP::Simple; -use PriceChart; +use PriceSloth; use Term::ReadKey; use POSIX; diff --git a/ps_fcgi b/ps_fcgi @@ -16,7 +16,7 @@ use Template::Iterator; use Template::Parser; use Template::Plugins; use Template::Stash::XS; -use PriceChart; +use PriceSloth; use Unix::Syslog qw(:macros :subs); use URI::Escape; diff --git a/ps_html b/ps_html @@ -8,7 +8,7 @@ use Getopt::Std; use Lingua::EN::Inflect qw(PL); use Math::MatrixReal; use POSIX; -use PriceChart; +use PriceSloth; use SVG; use Template;