pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 488d8f65da6c2cdadc0478d6f17bab1973eb6952
parent 58556c624637d15cba50a38e7ec4f64ed12ad871
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun, 29 Mar 2015 21:40:37 -0600

change the db schema

Add new descriptions and vendors tables. The vendors table removes the
dependence on the config file always staying the same. The descriptions table
holds the different product descriptions we've seen.

Diffstat:
Mgen_static | 53+++++++++++++++++++++++++++++++----------------------
Mprice_scraper | 33+++++++++++++++++++++++++--------
Mproduct_scraper | 38++++++++++++++++++++++++++++----------
Mtt/chart_list.tt | 23+++++++++++++++--------
Mtt/product.tt | 17+++--------------
5 files changed, 102 insertions(+), 62 deletions(-)

diff --git a/gen_static b/gen_static @@ -44,9 +44,12 @@ print "info: gen manufacturers/ ($m pages total)\n" if ($args{v}); for my $manufacturer (@$manufacturers) { my $manufacturer_lc = lc($manufacturer); - $sql = "select part_num, manufacturer, description from products " . - "where lower(manufacturer) = ?"; + $sql = "select manufacturer, part_num from products where " . + "lower(manufacturer) = ?"; my $products = $dbh->selectall_arrayref($sql, undef, $manufacturer_lc); + while (my $i = each (@$products)) { + $products->[$i][2] = get_description($products->[$i][0], $products->[$i][1]); + } my $vars = { name => $manufacturer, num => scalar @$products, @@ -71,7 +74,7 @@ $template->process("link_list.tt", $vars, "manufacturers.html") # xmkdir "$work_dir/retailers"; -$sql = "select distinct vendor from prices"; +$sql = "select distinct retailer from prices"; my $retailers = $dbh->selectcol_arrayref($sql); my $v = scalar @$retailers; @@ -79,20 +82,15 @@ print "info: gen retailers/ ($v pages total)\n" if ($args{v}); for my $retailer (@$retailers) { my $retailer_lc = lc($retailer); - $sql = "select distinct part_num from prices where vendor = ?"; - my $parts = $dbh->selectcol_arrayref($sql, undef, $retailer); - - # XXX: i think foreign keys would make this data directly fetchable - $sql = "select manufacturer, description from products where part_num = ?"; - my @part_nums; - for my $part (@$parts) { - my $other_info = $dbh->selectrow_arrayref($sql, undef, $part); - push @part_nums, [$part, @$other_info]; + $sql = "select manufacturer, part_num from prices where retailer = ?"; + my $products = $dbh->selectall_arrayref($sql, undef, $retailer); + while (my $i = each (@$products)) { + $products->[$i][2] = get_description($products->[$i][0], $products->[$i][1]); } my $vars = { - name => $retailer, num => scalar @part_nums, - products => \@part_nums, + name => $retailer, num => scalar @$products, + products => $products, }; $template->process("chart_list.tt", $vars, "retailers/$retailer_lc.html") @@ -118,17 +116,13 @@ my $products = $dbh->selectall_hashref($sql, "part_num"); my $p = scalar keys %$products; print "info: gen products/ ($p pages total)\n" if ($args{v}); -for my $part_num (keys %$products) { +while (my ($part_num, $row) = each %$products) { my $part_num_lc = lc($part_num); - - $sql = "select distinct title, vendor from prices where part_num = ?"; - my $descriptions = $dbh->selectall_arrayref($sql, undef, $part_num); - $products->{$part_num}{descriptions} = $descriptions; + $row->{description} = get_description($row->{manufacturer}, $row->{part_num}); # xmkdir("$work_dir/products/$result_lc.html", $args{v}); - $template->process("product.tt", $products->{$part_num}, - "products/$part_num_lc.html") - or die "template: " . $template->error() . "\n"; + $template->process("product.tt", $row, "products/$part_num_lc.html") + || die "template: " . $template->error() . "\n"; } # get a list of products added within the last week @@ -150,3 +144,18 @@ $template->process("index.tt", $vars, "index.html") or die "template: " . $template->error() . "\n"; $dbh->disconnect(); + +sub get_description +{ + my $manufacturer = shift; + my $part_num = shift; + + my $sql = "select description from descriptions where " . + "manufacturer = ? and part_num = ? order by date"; + my $descriptions = $dbh->selectcol_arrayref($sql, undef, $manufacturer, + $part_num); + + # for now just return the first one ever scraped + # XXX: leaving as is for cool future improvements (string interpolation!) + return $descriptions->[0]; +} diff --git a/price_scraper b/price_scraper @@ -44,26 +44,41 @@ if ($args{p} && $args{m}) { } exit unless (defined $part_num); +$dbh->do(qq{ + create table if not exists retailers( + name text not null primary key, + color text not null, + url text not null) +}) or die $DBI::errstr; + $dbh->do("create table if not exists prices(" . "date int not null, " . + "manufacturer text not null, " . "part_num text not null, " . - "vendor text not null, " . + "retailer text not null, " . "price int not null, " . - "color text not null, " . "duration int, " . - "title text, " . - "primary key(date, part_num, vendor, price))" + "primary key(date, part_num, retailer, price), " . + "foreign key(manufacturer, part_num) references products(manufacturer, part_num), " . + "foreign key(retailer) references retailers(name))" ) or die $DBI::errstr; print "info: scraping $manufacturer $part_num\n" if ($args{v}); -$sql = "insert into prices(date, part_num, vendor, color, price, duration, title) " . - "values (?, ?, ?, ?, ?, ?, ?)"; +$sql = "insert into prices(date, manufacturer, part_num, retailer, price, duration) " . + "values (?, ?, ?, ?, ?, ?)"; my $prices_sth = $dbh->prepare($sql); $sql = "update products set last_seen = ? where part_num = ?"; my $products_sth = $dbh->prepare($sql); +$sql = "insert or replace into retailers(name, color, url) values (?, ?, ?)"; +my $retailer_sth = $dbh->prepare($sql); + +$sql = "insert or replace into descriptions(manufacturer, part_num, retailer, ". + "description, date) values (?, ?, ?, ?, ?)"; +my $descriptions_sth = $dbh->prepare($sql); + my $timestamp = strftime("%F %T> ", localtime); my ($start, @status, $i) = (time, "", -1); for my $retailer (sort keys %{$cfg->{retailers}}) { @@ -117,9 +132,11 @@ for my $retailer (sort keys %{$cfg->{retailers}}) { $status[$i] = substr($retailer, 0, 1); next if ($args{n}); - $prices_sth->execute($start, $part_num, $retailer, $color, - $price, time - $retailer_start, $desc); + $retailer_sth->execute($retailer, $color, $url); + $prices_sth->execute($start, $manufacturer, $part_num, $retailer, $price, + time - $retailer_start); $products_sth->execute($start, $part_num); + $descriptions_sth->execute($manufacturer, $part_num, $retailer, $desc, time); print "info: $retailer: db: inserted \$$price\n" if ($args{v}); } diff --git a/product_scraper b/product_scraper @@ -28,20 +28,36 @@ my $log = get_log($tmp_file, $args{v}); srand; $dbh->do("create table if not exists products(" . - "part_num text not null primary key, " . - "manufacturer text, " . - "description text, " . + "manufacturer text not null, " . + "part_num text not null, " . + "retailer text not null, " . "type text, " . "first_seen int, " . "last_seen int, " . - "last_scraped int)") or die $DBI::errstr; + "last_scraped int, " . + "primary key(manufacturer, part_num))" +) or die $DBI::errstr; + +$dbh->do(qq{create table if not exists descriptions( + manufacturer text not null, + part_num text not null, + retailer text not null, + description text not null, + date int not null, + primary key(manufacturer, part_num, retailer, description), + foreign key(manufacturer, part_num) references products(manufacturer, part_num)) +}) or die $DBI::errstr; # $dbh->do("create table if not exists scrapes"); -my $sql = "insert into products(part_num, manufacturer, description, type, " . +my $sql = "insert into products(part_num, manufacturer, retailer, type, " . "first_seen, last_seen, last_scraped) values (?, ?, ?, ?, ?, ?, ?)"; my $insert_sth = $dbh->prepare($sql); +$sql = "insert or replace into descriptions(manufacturer, part_num, retailer, ". + "description, date) values (?, ?, ?, ?, ?)"; +my $descriptions_sth = $dbh->prepare($sql); + # also update description, manufacturer? $sql = "update products set last_seen = ? where part_num = ?"; my $update_sth = $dbh->prepare($sql); @@ -54,10 +70,10 @@ print $log "type ok percent errors new duration\n"; print $log "--------------- ------- ------- ------ --- --------\n"; my %product_map = ( - "televisions" => "Televisions", - "laptops" => "LaptopsNotebooks", - "hard drives" => "HardDrives", - "memory" => "Memory" + "Television" => "Televisions", + "Laptop" => "LaptopsNotebooks", + "Hard Drives" => "HardDrives", + "Memory" => "Memory" ); while (my ($type, $name) = each %product_map) { mem_exp_scrape_class($type, $name); @@ -103,6 +119,8 @@ sub mem_exp_scrape_class next; } + $descriptions_sth->execute($brand, $part_num, "Memory Express", $desc, time); + # extraction looks good, insert or update the database $sql = "select * from products where part_num = ?"; if ($dbh->selectrow_arrayref($sql, undef, $part_num)) { @@ -112,7 +130,7 @@ sub mem_exp_scrape_class $old++; } else { - $insert_sth->execute($part_num, $brand, $desc, $type, + $insert_sth->execute($part_num, $brand, "Memory Express", $type, time, time, 0) or die $dbh->errstr(); print "$thumb_hdr: inserted into db\n" if ($args{v}); $new++; diff --git a/tt/chart_list.tt b/tt/chart_list.tt @@ -3,22 +3,29 @@ my $name_lc = lc($stash->get("name")); $stash->set("name_lc", $name_lc); [% END %] - <h1><img alt="[% name %]" class="logo" src="/logo/[% name_lc %].svg"></img> + <h1><img alt="[% name %]" class="logo_small" src="/logo/[% name_lc %].svg"></img> ([% num %] total)</h1> [% FOREACH product IN products %] [% PERL %] - my $part_num = $stash->get("product.0"); - $stash->set("part_lc", lc($part_num)); - my $manufacturer = $stash->get("product.1"); + my $manufacturer = $stash->get("product.0"); $stash->set("manufacturer_lc", lc($manufacturer)); + my $part_num = $stash->get("product.1"); + $stash->set("part_lc", lc($part_num)); [% END %] <hr> <div class="product"> - <img alt="[% product.1 %]" class="logo_small" - src="/logo/[% manufacturer_lc %].svg"></img> + <!-- make the manufacturer logo into a link --> + <a href="/manufacturers/[% manufacturer_lc %].html"> + <img alt="[% product.0 %]" class="logo_small" + src="/logo/[% manufacturer_lc %].svg"></img></a> + + <!-- display the description --> [% product.2 %] - <a href="/products/[% part_lc %].html">([% product.0 %])</a><br> - <object data="/svg/[% product.0 %].svg" type="image/svg+xml"> + <!-- inline link to the products page, in parenthesis --> + <a href="/products/[% part_lc %].html">([% product.1 %])</a><br> + + <!-- show the chart --> + <object data="/svg/[% part_lc %].svg" type="image/svg+xml"> </object> </div> [% END %] diff --git a/tt/product.tt b/tt/product.tt @@ -15,21 +15,10 @@ <div class="product"> <table> <tr><td>Manufacturer:</td><td>[% manufacturer %]</td></tr> - - <tr><td>Description(s):</td> - <td><ul> - [% FOREACH description IN descriptions %] - <li>[% description.1 %]: [% description.0 %] - [% END %] - </ul></td></tr> - + <tr><td>Description:</td><td>[% description %]</td></tr> <tr><td>Type:</td><td>[% type %]</td></tr> - - <tr><td>First Seen:</td> - <td>[% first_seen_proc %]</td></tr> - - <tr><td>Last Seen:</td> - <td>[% last_seen_proc %]</td></tr> + <tr><td>First Seen:</td><td>[% first_seen_proc %]</td></tr> + <tr><td>Last Seen:</td><td>[% last_seen_proc %]</td></tr> </table> <object data="/svg/[% part_num %].svg" type="image/svg+xml"> </object>