commit 25396783059f7ea719c6a511b156e8b93198e5c4
parent 9b86d9acae9879bb70f34931cdc2f4a3cd20883f
Author: Kyle R W Milz <kyle@getaddrinfo.net>
Date: Mon, 11 Aug 2014 23:22:09 -0600
price_scraper: remove products from config file now that we pull from db
Diffstat:
M | price_scraper.cfg | | | 129 | ------------------------------------------------------------------------------- |
M | price_scraper.pl | | | 112 | +++---------------------------------------------------------------------------- |
2 files changed, 4 insertions(+), 237 deletions(-)
diff --git a/price_scraper.cfg b/price_scraper.cfg
@@ -4,135 +4,6 @@ http = /var/www/htdocs/pricegraph
data = data # must be a directory
log = pricegraph.txt
-*** products ***
-
-+ Samsung
-
-++ UN32F5500
-++ UN32EH4003
-++ UN32EH5300
-++ UN39EH5003
-++ HG40NA570L
-++ UN40EH5300
-++ UN40FH6030
-++ UN40F5500
-++ UN40F6300
-++ UN46EH5300
-++ UN46F5500
-++ UN46F6300
-++ UN46F6800
-++ UN46FH6030
-++ UN50EH5300
-++ UN50F5500
-++ UN50F6300
-++ UN50F6800
-++ UN55F6300
-++ UN55F6800
-++ UN55F7050
-++ UN55F7100
-++ UN55F8000
-++ UN55FH6030
-++ UN55FH6200
-++ UN55F7500
-++ UN55F9000
-# ++ KN55S9
-++ UN60ES6500
-++ UN60F6300
-++ UN60F6400
-++ UN60F7050
-++ UN60F7100
-++ UN60F8000
-++ UN60FH6200
-++ UN65EH6000
-++ UN65F6300
-++ UN65FH6001
-++ UN65F6400
-++ UN65F7100
-++ UN65F8000
-++ UN65F9000
-++ UN75F6300
-++ UN75F7100
-++ UN75F8000
-++ UN85S9
-
-+ Toshiba
-
-++ 32L1300UC
-++ 39L1350UC
-++ 39L4300UC
-++ 50L1350UC
-++ 50L4300UC
-++ 50L5300
-++ 50L7300
-++ 58L1350
-++ 58L7350UC
-++ 58L9300
-++ 65L7350UC
-++ 65L9300
-++ 84L9300
-
-+ Sharp
-
-++ LC60C8470U
-++ LC60LE450U
-++ LC60LE550U
-++ LC60LE650U
-++ LC60LE757U
-++ LC70C8470U
-++ LC70LE550U
-++ LC70LE650U
-++ LC70LE757U
-++ LC80LE642U
-++ LC80LE650U
-++ LC80LE757U
-++ LC80LE857U
-++ LC90LE657U
-
-+ Sony
-
-++ KDL32R400A
-++ KDL40R450A
-++ KDL46R450A
-# ++ KDL47W802A
-# ++ KDL50R550A
-++ KDL55W802A
-++ KDL55W900A
-# ++ KDL60R550A
-++ KDL70R550A
-++ XBR55X900A
-++ XBR65X900A
-
-+ Panasonic
-
-++ TCL32B6
-++ TCL42E60
-++ TCL47ET60
-++ TCL55ET60
-++ TCL55WT50
-
-+ LG
-
-++ 32LN530B
-++ 32LN5700
-++ 42LA6205
-++ 42LN5300
-++ 42LN5400
-++ 42LN5700
-++ 47LA6205
-++ 47LN5400
-++ 47LN5750
-++ 50LA6205
-++ 50LN5310
-++ 50LN5750
-++ 55LA8600
-++ 55LN5310
-++ 55LN5400
-++ 55LN5750
-++ 60LA7400
-++ 60LA8600
-++ 65LA9700
-
-
*** vendors ***
+ Memory Express
diff --git a/price_scraper.pl b/price_scraper.pl
@@ -8,27 +8,16 @@ use Data::Dumper;
use DBI;
use File::Basename;
use Getopt::Std;
-use JSON;
use HTML::Grabber;
use LWP::Simple;
use POSIX;
my %args;
-getopts('adf:i:np:rv', \%args);
+getopts('df:i:np:v', \%args);
my $parser = Config::Grammar->new({
- _sections => ['products', 'vendors', 'paths'],
- products => {
- # manufacturer regular expression
- _sections => ['/[A-Za-z]+/'],
- '/[A-Za-z]+/' => {
- # part number regular expression
- _sections => ['/[A-Za-z0-9]+/'],
- '/[A-Za-z0-9]+/' => {
- },
- },
- },
+ _sections => ['vendors', 'paths'],
vendors => {
# vendor regular expression
_sections => ['/[A-Za-z ]+/'],
@@ -70,33 +59,14 @@ else {
select $logfile;
}
-if ($args{a}) {
- scrape_vendors($_) for (make_parts_list());
- regenerate_json();
-}
-elsif ($args{d}) {
+if ($args{d}) {
print Dumper($cfg);
}
elsif ($args{p}) {
scrape_vendors($args{p});
}
-elsif ($args{r}) {
- regenerate_json();
-}
else {
- srand;
- my @parts = make_parts_list();
- scrape_vendors($parts[rand @parts]);
- regenerate_json();
-}
-
-sub make_parts_list
-{
- my @parts;
- for (sort keys $cfg->{products}) {
- push @parts, sort keys $cfg->{products}{$_};
- }
- return @parts;
+ scrape_vendors();
}
sub scrape_vendors
@@ -184,77 +154,3 @@ sub scrape_vendors
print FILE "\n";
close FILE;
}
-
-sub regenerate_json
-{
- my $pretty = 0;
- $pretty = 1 if $args{v};
-
- mkdir "$cfg->{paths}{http}/json";
-
- my @manufacturers = sort keys $cfg->{products};
- open my $fh, '>', "$cfg->{paths}{http}/json/manufacturers.json" or die $!;
- print $fh to_json(\@manufacturers, {pretty => $pretty});
- close $fh;
-
- open $fh, '>', "$cfg->{paths}{http}/json/vendors.json" or die $!;
- print $fh to_json($cfg->{vendors}, {pretty => $pretty});
- close $fh;
-
- print "Regenerating... " if $args{v};
-
- my %parts;
- opendir(DIR, $cfg->{paths}{data});
- while (my $file = readdir(DIR)) {
- next if ($file =~ m/^\./);
-
- my %part;
- my $part_num = basename($file, '.txt');
- print $part_num if ($args{v});
-
- my %tmp;
- open FILE, "<", "$cfg->{paths}{data}/$file" or die $!;
- while (<FILE>) {
- chomp;
- my @fields = split("\t", $_);
-
- my $date = $fields[0];
- splice(@fields, 0, 1);
- foreach (@fields) {
- my ($l, $r) = split("=", $_);
- if (! defined $tmp{$l}) {
- $tmp{$l}{data} = [];
- $tmp{$l}{name} = $l;
- if ($cfg->{vendors}{$l}) {
- $tmp{$l}{color} = "#$cfg->{vendors}{$l}{color}";
- }
- }
- push @{$tmp{$l}{data}}, [int($date), int($r)];
- }
- }
- close FILE;
-
- @{$part{vendors}} = keys %tmp;
- @{$part{series}} = values %tmp;
- $part{part_num} = $part_num;
-
- for my $manuf (keys $cfg->{products}) {
- for (keys $cfg->{products}{$manuf}) {
- $part{manuf} = $manuf if ($_ eq $part_num);
- }
- }
-
- if ($args{v}) {
- print chr(0x08) for split("", $part_num);
- }
-
- $parts{$part_num} = \%part;
- }
- closedir(DIR);
-
- open $fh, ">$cfg->{paths}{http}/json/products.json" or die $!;
- print $fh to_json(\%parts, {pretty => $pretty});
- close $fh;
-
- print "done. \n" if $args{v};
-}