pricecharts

track prices of consumer electronics
Log | Files | Refs | README

ps_html.pl (15797B)


      1 #!/usr/bin/env perl
      2 use strict;
      3 use warnings;
      4 
      5 use BSD::arc4random qw(:all);
      6 use Config::Grammar;
      7 use Data::Dumper;
      8 use Getopt::Std;
      9 use Lingua::EN::Inflect qw(PL);
     10 use Math::MatrixReal;
     11 use Number::Format qw(:subs :vars);
     12 use POSIX;
     13 use PriceSloth;
     14 use SVG;
     15 use Template;
     16 
     17 
     18 my %args;
     19 getopts("av", \%args);
     20 
     21 $| = 1 if ($args{v});
     22 
     23 my $cfg = get_config();
     24 my $dbh = get_dbh($cfg->{general}{db_dir}, $args{v});
     25 
     26 my $work_dir = $cfg->{http}{htdocs};
     27 my $svg_dir  = $work_dir . "/svg";
     28 print "info: work, svg dirs $work_dir/\{,svg\}\n" if ($args{v});
     29 
     30 my $config = {
     31 	POST_CHOMP => 1, EVAL_PERL => 1,
     32 	INCLUDE_PATH => "$work_dir/tt", OUTPUT_PATH => $work_dir
     33 };
     34 my $www = Template->new($config) || die Template->error(), "\n";
     35 
     36 my $and_stale = $args{a} ? "" : "and products.svg_stale = 1";
     37 my $where_stale = $args{a} ? "" : "where products.svg_stale = 1";
     38 
     39 my $desc_sth = $dbh->prepare(qq{select description from descriptions where
     40 	manufacturer = ? and part_num = ? order by date});
     41 
     42 # catmull-rom to cubic bezier conversion matrix
     43 my $catrom_to_bezier = Math::MatrixReal->new_from_rows(
     44 	[[0,     1,   0,    0],
     45 	 [-1/6,  1, 1/6,    0],
     46 	 [0,   1/6,   1, -1/6],
     47 	 [0,     0,   1,    0]]
     48 );
     49 my $m_t = ~$catrom_to_bezier;
     50 
     51 # make a logo file map using massaged names as keys
     52 opendir(DH, "$cfg->{http}{htdocs}/logo");
     53 my @files = readdir(DH);
     54 closedir(DH);
     55 
     56 my %logo_hash;
     57 for my $filename (@files) {
     58 	my $last_dot = rindex($filename, ".");
     59 	my $logo_name = substr($filename, 0, $last_dot);
     60 	$logo_hash{$logo_name} = "/logo/$filename";
     61 }
     62 
     63 #
     64 # manufacturers
     65 #
     66 my $stale_list = qq{select distinct manufacturer from products $where_stale};
     67 
     68 my $types = qq{select distinct type from products where manufacturer = ? $and_stale};
     69 
     70 my $products_fine = qq{select distinct manufacturer, part_num
     71 	from products where type = ? and manufacturer = ?};
     72 
     73 my $summary = qq{select type, count(*) from products where manufacturer = ? group by type};
     74 
     75 generate_folder($stale_list, $types, $products_fine, "Manufacturers", $summary);
     76 
     77 # most natural grouping is manufacturer then type
     78 # (answers the question: what types of products does this manufacturer make?)
     79 my $coarse_sql = qq{select manufacturer, count(distinct part_num) as count,
     80 	type from products group by manufacturer, type};
     81 my @key_fields = ("manufacturer", "type");
     82 
     83 # second most natural grouping is manufacturer then retailer
     84 # (answers the question: which places sell this manufacturer?)
     85 
     86 my $manufacturer_list = $dbh->selectall_hashref($coarse_sql, \@key_fields);
     87 # print Dumper($manufacturer_list);
     88 
     89 
     90 #
     91 # retailers
     92 #
     93 $stale_list = qq{select distinct prices.retailer from prices, products where
     94 	products.manufacturer = prices.manufacturer and
     95 	products.part_num = prices.part_num $and_stale};
     96 
     97 $types = qq{select distinct prices.manufacturer from prices, products where
     98 	prices.retailer = ? $and_stale and
     99 	products.manufacturer = prices.manufacturer and
    100 	products.part_num = prices.part_num};
    101 
    102 $products_fine = qq{select distinct manufacturer, part_num
    103 	from prices where manufacturer = ? and retailer = ?};
    104 
    105 $summary = qq{select manufacturer, count(*) from prices where retailer = ?
    106 	group by manufacturer};
    107 
    108 generate_folder($stale_list, $types, $products_fine, "Retailers", $summary);
    109 
    110 # most natural grouping here is a toss up between type and manufacturer
    111 # (answers the question: what manufacturers does this retailer sell?)
    112 $coarse_sql = qq{select retailer, count(distinct part_num) as count,
    113 	manufacturer from prices group by retailer, manufacturer};
    114 @key_fields = ("retailer", "manufacturer");
    115 
    116 # second grouping is retailer then type
    117 # (answers the question: what types of products does this retailer sell?)
    118 
    119 my $retailer_list = $dbh->selectall_hashref($coarse_sql, \@key_fields);
    120 # print Dumper($retailer_list);
    121 
    122 #
    123 # product types
    124 #
    125 $stale_list = qq{select distinct type from products $where_stale};
    126 
    127 $types = qq{select distinct manufacturer from products where type = ?};
    128 
    129 $products_fine = qq{select distinct manufacturer, part_num from products where
    130 	manufacturer = ?  and type = ?};
    131 
    132 $summary = qq{select manufacturer, count(*) from products where type = ?
    133 	group by manufacturer};
    134 
    135 generate_folder($stale_list, $types, $products_fine, "Types", $summary);
    136 
    137 $coarse_sql = qq{select type, count(distinct part_num) as count,
    138 	manufacturer from products group by type, manufacturer};
    139 @key_fields = ("type", "manufacturer");
    140 
    141 my $types_list = $dbh->selectall_hashref($coarse_sql, \@key_fields);
    142 # print Dumper($types_list);
    143 
    144 #
    145 # index
    146 #
    147 my $vars = {
    148 	manufacturer_list => $manufacturer_list,
    149 	retailer_list => $retailer_list,
    150 	types_list => $types_list,
    151 	logo_hash => \%logo_hash,
    152 };
    153 $www->process("index.tt", $vars, "index.html") or die $www->error(), "\n";
    154 
    155 #
    156 # about
    157 #
    158 my ($p, $m) = $dbh->selectrow_array(qq{select count(*),
    159 	count(distinct manufacturer) from products});
    160 my ($nprice) = $dbh->selectrow_array("select count(*) from prices");
    161 
    162 # anything we haven't seen for over 30 days is stale
    163 my ($prod_stale) = $dbh->selectrow_array(qq{select count(*) from products
    164 	where last_seen < ?}, undef, time - (30 * 24 * 60 * 60));
    165 
    166 my ($r) = $dbh->selectrow_array("select count(*) from retailers");
    167 
    168 # draw a graph of total number of products vs time
    169 my ($first_seen) = $dbh->selectrow_array("select first_seen from products order by first_seen limit 1");
    170 my $num_weeks = (time - $first_seen) / (60 * 60 * 24 * 7);
    171 my %totals_series;
    172 for my $i (0..$num_weeks) {
    173 	my $x = $first_seen + $i * (60 * 60 * 24 * 7);
    174 	my ($y) = $dbh->selectrow_array("select count(*) from products where first_seen < ?", undef, $x);
    175 	$totals_series{"Total"}{$x} = { "price" => $y };
    176 
    177 	($y) = $dbh->selectrow_array("select count(*) from products where last_seen < ?", undef, $x - (60 * 60 * 24 * 30));
    178 	$totals_series{"Out of date"}{$x} = { "price" => $y };
    179 }
    180 
    181 # print Dumper(%totals_series);
    182 my %series_metadata;
    183 $series_metadata{"Total"} = { url => "", color => "000" };
    184 $series_metadata{"Out of date"} = { url => "", color => "F00" };
    185 my $svg = make_svg(\%totals_series, "no_part_num", \%series_metadata, "");
    186 
    187 make_path($svg_dir, { verbose => $args{v} });
    188 my $svg_path = "$svg_dir/history_summary.svg";
    189 
    190 open my $svg_fh, ">", "$svg_path" or die "couldn't open $svg_path: $!";
    191 print $svg_fh $svg->xmlify;
    192 close $svg_fh;
    193 
    194 # this is supposed to work??? alternative sucks
    195 # $THOUSANDS_SEP = '/';
    196 my $de = new Number::Format(-thousands_sep => ' ',
    197 	                    -decimal_point => '.');
    198 
    199 $vars = { nprice => $de->format_number($nprice),
    200 	nret => $r,
    201 	nmanuf => $m,
    202 	nprod => $de->format_number($p - $prod_stale),
    203 	nprod_stale => $prod_stale
    204 };
    205 $www->process("about.tt", $vars, "about.html") or die $www->error(), "\n";
    206 print "info: about\n" if ($args{v});
    207 
    208 
    209 #
    210 # products
    211 #
    212 my $sql = "select * from products $where_stale";
    213 my $products = $dbh->selectall_hashref($sql, "part_num");
    214 while (my ($part_num, $row) = each %$products) {
    215 	my $part_link = linkify($part_num);
    216 	my $manuf_link = linkify($row->{manufacturer});
    217 
    218 	$row->{description} =
    219 		get_description($row->{manufacturer}, $row->{part_num});
    220 
    221 	my $url = "products/$manuf_link/$part_link.html";
    222 	$www->process("product.tt", $row, $url) or die $www->error(), "\n";
    223 }
    224 print "info: products (" . scalar(keys %$products) . ")\n" if ($args{v});
    225 
    226 
    227 #
    228 # product svg;s
    229 #
    230 print "info: svg  " if ($args{v});
    231 
    232 my @series_keys = ("retailer", "date");
    233 my $series_sth = $dbh->prepare(qq{select retailer, date, price from prices
    234 	where manufacturer = ? and part_num = ?});
    235 
    236 my $retailer_info = $dbh->selectall_hashref(qq{select name, url, color from
    237 	retailers}, "name");
    238 
    239 my $parts_sth = $dbh->prepare(qq{select distinct manufacturer,
    240 	part_num from products $where_stale});
    241 $parts_sth->execute();
    242 
    243 my $rendered = 0;
    244 while (my ($manufacturer, $part_num) = $parts_sth->fetchrow_array()) {
    245 	spin() if ($args{v});
    246 
    247 	my $series = $dbh->selectall_hashref($series_sth, \@series_keys, undef,
    248 		$manufacturer, $part_num);
    249 	my $svg = make_svg($series, $part_num, $retailer_info, "\$");
    250 
    251 	my $manufacturer_dir = linkify($manufacturer);
    252 	my $part_link = linkify($part_num);
    253 
    254 	make_path("$svg_dir/$manufacturer_dir", { verbose => $args{v} });
    255 	my $svg_path = "$svg_dir/$manufacturer_dir/$part_link.svg";
    256 
    257 	open my $svg_fh, ">", "$svg_path" or die "couldn't open $svg_path: $!";
    258 	print $svg_fh $svg->xmlify;
    259 	close $svg_fh;
    260 
    261 	$rendered++;
    262 }
    263 print "\b($rendered)\n" if ($args{v});
    264 
    265 $dbh->begin_work;
    266 $dbh->do("update products set svg_stale = 0");
    267 $dbh->commit;
    268 $dbh->disconnect();
    269 
    270 #
    271 # generate an entire tree of html structure
    272 #
    273 sub generate_folder
    274 {
    275 	my ($sql_stale_outer, $sql_types, $sql_products, $name, $sql_summary) = @_;
    276 
    277 	my $name_lc = lc ($name);
    278 
    279 	my $stale_list = $dbh->selectcol_arrayref($sql_stale_outer);
    280 	for my $it (@$stale_list) {
    281 
    282 		my $it_link = linkify($it);
    283 		my $types = $dbh->selectcol_arrayref($sql_types, undef, $it);
    284 		for my $type (sort @$types) {
    285 
    286 			my $products = $dbh->selectall_arrayref($sql_products, undef, $type, $it);
    287 			$_->[2] = get_description($_->[0], $_->[1]) for (@$products);
    288 
    289 			my $vars = {
    290 				name => $it, type => PL($type, scalar @$products),
    291 				products => $products, logo_hash => \%logo_hash
    292 			};
    293 			my $type_link = linkify($type);
    294 			my $out_path = "$name_lc/$it_link/$type_link.html";
    295 			$www->process("fine_list.tt", $vars, $out_path)
    296 				or die $www->error(), "\n";
    297 		}
    298 
    299 		my $summary = $dbh->selectall_arrayref($sql_summary, undef, $it);
    300 		my $vars = { type => $name_lc, name => $it, info => $summary };
    301 		$www->process("summary.tt", $vars, "$name_lc/$it_link.html")
    302 			or die $www->error(), "\n";
    303 
    304 		print "info: $name_lc/$it_link\n" if ($args{v});
    305 	}
    306 }
    307 
    308 sub linkify
    309 {
    310 	my $type = shift;
    311 
    312 	my $type_link = lc($type);
    313 	$type_link =~ s/[ #\/]/_/g;
    314 	return $type_link;
    315 }
    316 
    317 sub get_description
    318 {
    319 	my $manufacturer = shift;
    320 	my $part_num = shift;
    321 
    322 	my $descriptions = $dbh->selectcol_arrayref($desc_sth, undef, $manufacturer,
    323 		$part_num);
    324 	unless (@$descriptions) {
    325 		print "error: no descriptions for $manufacturer $part_num\n";
    326 	}
    327 
    328 	# pick the shortest non-zero description
    329 	my $best = $descriptions->[0];
    330 	for (@$descriptions) {
    331 		next if ($_ eq "");
    332 		$best = $_ if (length($_) < length($best));
    333 	}
    334 
    335 	return $best;
    336 }
    337 
    338 #
    339 # make a new svg with provided coordinate and label data
    340 #
    341 sub make_svg
    342 {
    343 	my ($series, $part_num, $metadata, $right_axis_prefix) = @_;
    344 
    345 	my ($left, $center, $right, $top, $middle, $bottom) =
    346 		(20, 430, 50, 15, 150, 20);
    347 
    348 	my $width = $right + $center + $left;
    349 	my $height = $top + $middle + $bottom;
    350 
    351 	my ($x_min, $x_max, $y_min, $y_max) = (100000000000, 0, 1000000, 0.00001);
    352 	while (my ($retailer, $values) = each %$series) {
    353 		for (keys %{$values}) {
    354 			my ($x, $y) = ($_, $values->{$_}{price});
    355 			$x_min = $x if ($x < $x_min);
    356 			$x_max = $x if ($x > $x_max);
    357 			$y_min = $y if ($y < $y_min);
    358 			$y_max = $y if ($y > $y_max);
    359 		}
    360 	}
    361 
    362 	my $num_digits = ceil(log($y_max) / log(10));
    363 	my $magnitude = 10 ** ($num_digits - 1);
    364 
    365 	$y_max = ceil($y_max / $magnitude) * $magnitude;
    366 	$y_min = floor($y_min / $magnitude) * $magnitude;
    367 
    368 	my ($domain, $range) = ($x_max - $x_min, $y_max - $y_min);
    369 	$domain = 24 * 60 * 60 if ($domain <= 0);
    370 	$range = 20 if ($range < 20);
    371 
    372 	# clamp the total size of this thing with viewBox
    373 	my $svg = SVG->new(viewBox => "0 0 $width $height");
    374 	my $defs = $svg->defs();
    375 	my ($x_scale, $y_scale) = ($center / $domain, $middle / $range);
    376 
    377 	# $defs->tag("link", href => "/charts.css", type => "text/css",
    378 	# 	rel => "stylesheet", xmlns => "http://www.w3.org/1999/xhtml");
    379 
    380 	# y axis labels (prices)
    381 	my $num_labels = 5;
    382 	for (1..$num_labels) {
    383 		my $step = ($_ - 1) / ($num_labels - 1);
    384 		my $price = ceil($y_max - $range * $step);
    385 		my $y = $top + $middle * $step;
    386 
    387 		$svg->text(
    388 			id => "price_$_", x => $left + $center + 5, y => $y - 2,
    389 			fill => "black", "font-family" => "sans-serif",
    390 			"font-size" => "0.8em"
    391 		)->cdata("$right_axis_prefix$price");
    392 
    393 		$svg->line(
    394 			id => "horizontal_line_$_", x1 => $left, y1 => $y,
    395 			x2 => $width , y2 => $y,
    396 			style => "stroke: #BBB; stroke-width: 0.5px;"
    397 		);
    398 	}
    399 
    400 	$num_labels = 4;
    401 
    402 	# x axis labels (dates)
    403 	if ($domain == 24 * 60 * 60) {
    404 		$num_labels = 2;
    405 	}
    406 	for (1..$num_labels) {
    407 		my $step = ($_ - 1) / ($num_labels - 1);
    408 
    409 		# make the dates not hang off the ends of the chart
    410 		my $text_anchor = "middle";
    411 		$text_anchor = "start" if ($_ == 1);
    412 		# $text_anchor = "end" if ($_ == $num_labels);
    413 
    414 		# print the dates along the x axis
    415 		my $x = $left + $center * $step;
    416 		my $time = $x_min + $domain * $step;
    417 		$svg->text(
    418 			id => "date_$_", x => $x, y => $height,
    419 			"text-anchor" => $text_anchor, "fill" => "black",
    420 			"font-family" => "sans-serif", "font-size" => "0.8em"
    421 		)->cdata(strftime("%b %e %Y", localtime($time)));
    422 
    423 		# print the little tick marks down from the x axis
    424 		my $x_axis = $top + $middle;
    425 		$svg->line(
    426 			id => "date_marker_$_", x1 => $x, y1 => $x_axis,
    427 			x2 => $x, y2 => $x_axis + 5,
    428 			style => "stroke: #BBB; stroke-width: 0.5px;"
    429 		);
    430 	}
    431 
    432 	while (my ($series_name, $values) = each %$series) {
    433 		my $retailer_id = lc($series_name);
    434 		$retailer_id =~ s/ /_/;
    435 
    436 		my (@xs, @ys);
    437 		for (sort keys %{$values}) {
    438 			my ($x, $y) = ($_, $values->{$_}{price});
    439 			push @xs, sprintf "%.3f", ($x - $x_min) * $x_scale + $left;
    440 			push @ys, sprintf "%.3f", $height - $bottom - ($y - $y_min) * $y_scale;
    441 		}
    442 
    443 		if (@xs < 3) {
    444 			my $points = $svg->get_path(x => \@xs, y => \@ys, -type => "path");
    445 			$defs->path(%$points, id => "path_$retailer_id");
    446 		}
    447 		else {
    448 			# catmull rom time
    449 			my $d = catmullrom_to_bezier(\@xs, \@ys);
    450 			$defs->tag("path", "d" => $d, id => "path_$retailer_id");
    451 		}
    452 
    453 		my $info = $metadata->{$series_name};
    454 		my ($url, $color) = ($info->{url}, $info->{color});
    455 
    456 		# xlink:href's don't like raw ampersands
    457 		$url =~ s/&/&amp;/g;
    458 
    459 		# the line, points, and label can be grouped under one anchor
    460 		my $anchor = $svg->anchor(-href => $url . $part_num,
    461 			target => "new_window");
    462 
    463 		# draw path first
    464 		$anchor->use(
    465 			-href => "#path_$retailer_id",
    466 			style => qq{stroke: #$color; fill-opacity: 0;
    467 				stroke-width: 2; stroke-opacity: 0.8;}
    468 		);
    469 
    470 		# now draw individual data points
    471 		my $rand_token = sprintf("%x", arc4random());
    472 		$defs->circle(id => $rand_token, cx => 0, cy => 0, r => 2,
    473 			style => "stroke: #$color; fill: white; stoke-width: 2;"
    474 		);
    475 		while (my $i = each @xs) {
    476 			$anchor->use(-href => "#$rand_token",
    477 				x => $xs[$i], y => $ys[$i]
    478 			);
    479 		}
    480 
    481 		# show series name along the start of the path
    482 		$anchor->text(
    483 			fill => "#$color", style => "font-family: sans-serif;"
    484 		)->tag("textPath", -href => "#path_$retailer_id"
    485 		)->tag("tspan", "dy" => "-5")->cdata($series_name);
    486 	}
    487 
    488 	# when graph is loaded make a sliding motion show the graph lines
    489 	# my $mask = $svg->rectangle(
    490 	# 	x => 0, y => 0, width => 1000, height => 250, rx => 0, ry => 0,
    491 	# 	id => "mask", fill => "#FFF"
    492 	# );
    493 	# $mask->animate(
    494 	# 	attributeName => "x", values => "0;1000", dur => "0.2s",
    495 	# 	fill => "freeze", -method => ""
    496 	# );
    497 
    498 	return $svg
    499 }
    500 
    501 #
    502 # taken from https://gist.github.com/njvack/6925609
    503 #
    504 sub catmullrom_to_bezier
    505 {
    506 	my $xs_ref = shift;
    507 	my $ys_ref = shift;
    508 
    509 	my $d = "M $xs_ref->[0], $ys_ref->[0] ";
    510 	my $iLen = @$xs_ref;
    511 	for (my $i = 0; $iLen - 1 > $i; $i++) {
    512 
    513 		my @offsets = (-1, 0, 1, 2);
    514 		if ($i == 0) {
    515 			@offsets = (0, 0, 1, 2);
    516 		} elsif ($i == ($iLen - 2)) {
    517 			@offsets = (-1, 0, 1, 1);
    518 		}
    519 
    520 		my (@xs, @ys);
    521 		for my $idx (@offsets) {
    522 			push @xs, $xs_ref->[$i + $idx];
    523 			push @ys, $ys_ref->[$i + $idx];
    524 		}
    525 
    526 		my $x_row = Math::MatrixReal->new_from_rows([[@xs]]);
    527 		my $y_row = Math::MatrixReal->new_from_rows([[@ys]]);
    528 
    529 		$x_row = $x_row * $m_t;
    530 		$y_row = $y_row * $m_t;
    531 
    532 		my ($x, $y) = ($x_row->[0][0], $y_row->[0][0]);
    533 
    534 		# knock some digits of precision off
    535 		$d .= sprintf("C %0.2f, %0.2f %0.2f, %0.2f %0.2f, %0.2f ",
    536 			$x->[1], $y->[1], $x->[2], $y->[2], $x->[3], $y->[3]);
    537 	}
    538 
    539 	return $d;
    540 }