pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 25396783059f7ea719c6a511b156e8b93198e5c4
parent 9b86d9acae9879bb70f34931cdc2f4a3cd20883f
Author: Kyle R W Milz <kyle@getaddrinfo.net>
Date:   Mon, 11 Aug 2014 23:22:09 -0600

price_scraper: remove products from config file now that we pull from db

Diffstat:
Mprice_scraper.cfg | 129-------------------------------------------------------------------------------
Mprice_scraper.pl | 112+++----------------------------------------------------------------------------
2 files changed, 4 insertions(+), 237 deletions(-)

diff --git a/price_scraper.cfg b/price_scraper.cfg @@ -4,135 +4,6 @@ http = /var/www/htdocs/pricegraph data = data # must be a directory log = pricegraph.txt -*** products *** - -+ Samsung - -++ UN32F5500 -++ UN32EH4003 -++ UN32EH5300 -++ UN39EH5003 -++ HG40NA570L -++ UN40EH5300 -++ UN40FH6030 -++ UN40F5500 -++ UN40F6300 -++ UN46EH5300 -++ UN46F5500 -++ UN46F6300 -++ UN46F6800 -++ UN46FH6030 -++ UN50EH5300 -++ UN50F5500 -++ UN50F6300 -++ UN50F6800 -++ UN55F6300 -++ UN55F6800 -++ UN55F7050 -++ UN55F7100 -++ UN55F8000 -++ UN55FH6030 -++ UN55FH6200 -++ UN55F7500 -++ UN55F9000 -# ++ KN55S9 -++ UN60ES6500 -++ UN60F6300 -++ UN60F6400 -++ UN60F7050 -++ UN60F7100 -++ UN60F8000 -++ UN60FH6200 -++ UN65EH6000 -++ UN65F6300 -++ UN65FH6001 -++ UN65F6400 -++ UN65F7100 -++ UN65F8000 -++ UN65F9000 -++ UN75F6300 -++ UN75F7100 -++ UN75F8000 -++ UN85S9 - -+ Toshiba - -++ 32L1300UC -++ 39L1350UC -++ 39L4300UC -++ 50L1350UC -++ 50L4300UC -++ 50L5300 -++ 50L7300 -++ 58L1350 -++ 58L7350UC -++ 58L9300 -++ 65L7350UC -++ 65L9300 -++ 84L9300 - -+ Sharp - -++ LC60C8470U -++ LC60LE450U -++ LC60LE550U -++ LC60LE650U -++ LC60LE757U -++ LC70C8470U -++ LC70LE550U -++ LC70LE650U -++ LC70LE757U -++ LC80LE642U -++ LC80LE650U -++ LC80LE757U -++ LC80LE857U -++ LC90LE657U - -+ Sony - -++ KDL32R400A -++ KDL40R450A -++ KDL46R450A -# ++ KDL47W802A -# ++ KDL50R550A -++ KDL55W802A -++ KDL55W900A -# ++ KDL60R550A -++ KDL70R550A -++ XBR55X900A -++ XBR65X900A - -+ Panasonic - -++ TCL32B6 -++ TCL42E60 -++ TCL47ET60 -++ TCL55ET60 -++ TCL55WT50 - -+ LG - -++ 32LN530B -++ 32LN5700 -++ 42LA6205 -++ 42LN5300 -++ 42LN5400 -++ 42LN5700 -++ 47LA6205 -++ 47LN5400 -++ 47LN5750 -++ 50LA6205 -++ 50LN5310 -++ 50LN5750 -++ 55LA8600 -++ 55LN5310 -++ 55LN5400 -++ 55LN5750 -++ 60LA7400 -++ 60LA8600 -++ 65LA9700 - - *** vendors *** + Memory Express diff --git a/price_scraper.pl b/price_scraper.pl @@ -8,27 +8,16 @@ use Data::Dumper; use DBI; use File::Basename; use Getopt::Std; -use JSON; use HTML::Grabber; use LWP::Simple; use POSIX; my %args; -getopts('adf:i:np:rv', \%args); +getopts('df:i:np:v', \%args); my $parser = Config::Grammar->new({ - _sections => ['products', 'vendors', 'paths'], - products => { - # manufacturer regular expression - _sections => ['/[A-Za-z]+/'], - '/[A-Za-z]+/' => { - # part number regular expression - _sections => ['/[A-Za-z0-9]+/'], - '/[A-Za-z0-9]+/' => { - }, - }, - }, + _sections => ['vendors', 'paths'], vendors => { # vendor regular expression _sections => ['/[A-Za-z ]+/'], @@ -70,33 +59,14 @@ else { select $logfile; } -if ($args{a}) { - scrape_vendors($_) for (make_parts_list()); - regenerate_json(); -} -elsif ($args{d}) { +if ($args{d}) { print Dumper($cfg); } elsif ($args{p}) { scrape_vendors($args{p}); } -elsif ($args{r}) { - regenerate_json(); -} else { - srand; - my @parts = make_parts_list(); - scrape_vendors($parts[rand @parts]); - regenerate_json(); -} - -sub make_parts_list -{ - my @parts; - for (sort keys $cfg->{products}) { - push @parts, sort keys $cfg->{products}{$_}; - } - return @parts; + scrape_vendors(); } sub scrape_vendors @@ -184,77 +154,3 @@ sub scrape_vendors print FILE "\n"; close FILE; } - -sub regenerate_json -{ - my $pretty = 0; - $pretty = 1 if $args{v}; - - mkdir "$cfg->{paths}{http}/json"; - - my @manufacturers = sort keys $cfg->{products}; - open my $fh, '>', "$cfg->{paths}{http}/json/manufacturers.json" or die $!; - print $fh to_json(\@manufacturers, {pretty => $pretty}); - close $fh; - - open $fh, '>', "$cfg->{paths}{http}/json/vendors.json" or die $!; - print $fh to_json($cfg->{vendors}, {pretty => $pretty}); - close $fh; - - print "Regenerating... " if $args{v}; - - my %parts; - opendir(DIR, $cfg->{paths}{data}); - while (my $file = readdir(DIR)) { - next if ($file =~ m/^\./); - - my %part; - my $part_num = basename($file, '.txt'); - print $part_num if ($args{v}); - - my %tmp; - open FILE, "<", "$cfg->{paths}{data}/$file" or die $!; - while (<FILE>) { - chomp; - my @fields = split("\t", $_); - - my $date = $fields[0]; - splice(@fields, 0, 1); - foreach (@fields) { - my ($l, $r) = split("=", $_); - if (! defined $tmp{$l}) { - $tmp{$l}{data} = []; - $tmp{$l}{name} = $l; - if ($cfg->{vendors}{$l}) { - $tmp{$l}{color} = "#$cfg->{vendors}{$l}{color}"; - } - } - push @{$tmp{$l}{data}}, [int($date), int($r)]; - } - } - close FILE; - - @{$part{vendors}} = keys %tmp; - @{$part{series}} = values %tmp; - $part{part_num} = $part_num; - - for my $manuf (keys $cfg->{products}) { - for (keys $cfg->{products}{$manuf}) { - $part{manuf} = $manuf if ($_ eq $part_num); - } - } - - if ($args{v}) { - print chr(0x08) for split("", $part_num); - } - - $parts{$part_num} = \%part; - } - closedir(DIR); - - open $fh, ">$cfg->{paths}{http}/json/products.json" or die $!; - print $fh to_json(\%parts, {pretty => $pretty}); - close $fh; - - print "done. \n" if $args{v}; -}