ps_html.pl (15797B)
1 #!/usr/bin/env perl 2 use strict; 3 use warnings; 4 5 use BSD::arc4random qw(:all); 6 use Config::Grammar; 7 use Data::Dumper; 8 use Getopt::Std; 9 use Lingua::EN::Inflect qw(PL); 10 use Math::MatrixReal; 11 use Number::Format qw(:subs :vars); 12 use POSIX; 13 use PriceSloth; 14 use SVG; 15 use Template; 16 17 18 my %args; 19 getopts("av", \%args); 20 21 $| = 1 if ($args{v}); 22 23 my $cfg = get_config(); 24 my $dbh = get_dbh($cfg->{general}{db_dir}, $args{v}); 25 26 my $work_dir = $cfg->{http}{htdocs}; 27 my $svg_dir = $work_dir . "/svg"; 28 print "info: work, svg dirs $work_dir/\{,svg\}\n" if ($args{v}); 29 30 my $config = { 31 POST_CHOMP => 1, EVAL_PERL => 1, 32 INCLUDE_PATH => "$work_dir/tt", OUTPUT_PATH => $work_dir 33 }; 34 my $www = Template->new($config) || die Template->error(), "\n"; 35 36 my $and_stale = $args{a} ? "" : "and products.svg_stale = 1"; 37 my $where_stale = $args{a} ? "" : "where products.svg_stale = 1"; 38 39 my $desc_sth = $dbh->prepare(qq{select description from descriptions where 40 manufacturer = ? and part_num = ? order by date}); 41 42 # catmull-rom to cubic bezier conversion matrix 43 my $catrom_to_bezier = Math::MatrixReal->new_from_rows( 44 [[0, 1, 0, 0], 45 [-1/6, 1, 1/6, 0], 46 [0, 1/6, 1, -1/6], 47 [0, 0, 1, 0]] 48 ); 49 my $m_t = ~$catrom_to_bezier; 50 51 # make a logo file map using massaged names as keys 52 opendir(DH, "$cfg->{http}{htdocs}/logo"); 53 my @files = readdir(DH); 54 closedir(DH); 55 56 my %logo_hash; 57 for my $filename (@files) { 58 my $last_dot = rindex($filename, "."); 59 my $logo_name = substr($filename, 0, $last_dot); 60 $logo_hash{$logo_name} = "/logo/$filename"; 61 } 62 63 # 64 # manufacturers 65 # 66 my $stale_list = qq{select distinct manufacturer from products $where_stale}; 67 68 my $types = qq{select distinct type from products where manufacturer = ? $and_stale}; 69 70 my $products_fine = qq{select distinct manufacturer, part_num 71 from products where type = ? and manufacturer = ?}; 72 73 my $summary = qq{select type, count(*) from products where manufacturer = ? group by type}; 74 75 generate_folder($stale_list, $types, $products_fine, "Manufacturers", $summary); 76 77 # most natural grouping is manufacturer then type 78 # (answers the question: what types of products does this manufacturer make?) 79 my $coarse_sql = qq{select manufacturer, count(distinct part_num) as count, 80 type from products group by manufacturer, type}; 81 my @key_fields = ("manufacturer", "type"); 82 83 # second most natural grouping is manufacturer then retailer 84 # (answers the question: which places sell this manufacturer?) 85 86 my $manufacturer_list = $dbh->selectall_hashref($coarse_sql, \@key_fields); 87 # print Dumper($manufacturer_list); 88 89 90 # 91 # retailers 92 # 93 $stale_list = qq{select distinct prices.retailer from prices, products where 94 products.manufacturer = prices.manufacturer and 95 products.part_num = prices.part_num $and_stale}; 96 97 $types = qq{select distinct prices.manufacturer from prices, products where 98 prices.retailer = ? $and_stale and 99 products.manufacturer = prices.manufacturer and 100 products.part_num = prices.part_num}; 101 102 $products_fine = qq{select distinct manufacturer, part_num 103 from prices where manufacturer = ? and retailer = ?}; 104 105 $summary = qq{select manufacturer, count(*) from prices where retailer = ? 106 group by manufacturer}; 107 108 generate_folder($stale_list, $types, $products_fine, "Retailers", $summary); 109 110 # most natural grouping here is a toss up between type and manufacturer 111 # (answers the question: what manufacturers does this retailer sell?) 112 $coarse_sql = qq{select retailer, count(distinct part_num) as count, 113 manufacturer from prices group by retailer, manufacturer}; 114 @key_fields = ("retailer", "manufacturer"); 115 116 # second grouping is retailer then type 117 # (answers the question: what types of products does this retailer sell?) 118 119 my $retailer_list = $dbh->selectall_hashref($coarse_sql, \@key_fields); 120 # print Dumper($retailer_list); 121 122 # 123 # product types 124 # 125 $stale_list = qq{select distinct type from products $where_stale}; 126 127 $types = qq{select distinct manufacturer from products where type = ?}; 128 129 $products_fine = qq{select distinct manufacturer, part_num from products where 130 manufacturer = ? and type = ?}; 131 132 $summary = qq{select manufacturer, count(*) from products where type = ? 133 group by manufacturer}; 134 135 generate_folder($stale_list, $types, $products_fine, "Types", $summary); 136 137 $coarse_sql = qq{select type, count(distinct part_num) as count, 138 manufacturer from products group by type, manufacturer}; 139 @key_fields = ("type", "manufacturer"); 140 141 my $types_list = $dbh->selectall_hashref($coarse_sql, \@key_fields); 142 # print Dumper($types_list); 143 144 # 145 # index 146 # 147 my $vars = { 148 manufacturer_list => $manufacturer_list, 149 retailer_list => $retailer_list, 150 types_list => $types_list, 151 logo_hash => \%logo_hash, 152 }; 153 $www->process("index.tt", $vars, "index.html") or die $www->error(), "\n"; 154 155 # 156 # about 157 # 158 my ($p, $m) = $dbh->selectrow_array(qq{select count(*), 159 count(distinct manufacturer) from products}); 160 my ($nprice) = $dbh->selectrow_array("select count(*) from prices"); 161 162 # anything we haven't seen for over 30 days is stale 163 my ($prod_stale) = $dbh->selectrow_array(qq{select count(*) from products 164 where last_seen < ?}, undef, time - (30 * 24 * 60 * 60)); 165 166 my ($r) = $dbh->selectrow_array("select count(*) from retailers"); 167 168 # draw a graph of total number of products vs time 169 my ($first_seen) = $dbh->selectrow_array("select first_seen from products order by first_seen limit 1"); 170 my $num_weeks = (time - $first_seen) / (60 * 60 * 24 * 7); 171 my %totals_series; 172 for my $i (0..$num_weeks) { 173 my $x = $first_seen + $i * (60 * 60 * 24 * 7); 174 my ($y) = $dbh->selectrow_array("select count(*) from products where first_seen < ?", undef, $x); 175 $totals_series{"Total"}{$x} = { "price" => $y }; 176 177 ($y) = $dbh->selectrow_array("select count(*) from products where last_seen < ?", undef, $x - (60 * 60 * 24 * 30)); 178 $totals_series{"Out of date"}{$x} = { "price" => $y }; 179 } 180 181 # print Dumper(%totals_series); 182 my %series_metadata; 183 $series_metadata{"Total"} = { url => "", color => "000" }; 184 $series_metadata{"Out of date"} = { url => "", color => "F00" }; 185 my $svg = make_svg(\%totals_series, "no_part_num", \%series_metadata, ""); 186 187 make_path($svg_dir, { verbose => $args{v} }); 188 my $svg_path = "$svg_dir/history_summary.svg"; 189 190 open my $svg_fh, ">", "$svg_path" or die "couldn't open $svg_path: $!"; 191 print $svg_fh $svg->xmlify; 192 close $svg_fh; 193 194 # this is supposed to work??? alternative sucks 195 # $THOUSANDS_SEP = '/'; 196 my $de = new Number::Format(-thousands_sep => ' ', 197 -decimal_point => '.'); 198 199 $vars = { nprice => $de->format_number($nprice), 200 nret => $r, 201 nmanuf => $m, 202 nprod => $de->format_number($p - $prod_stale), 203 nprod_stale => $prod_stale 204 }; 205 $www->process("about.tt", $vars, "about.html") or die $www->error(), "\n"; 206 print "info: about\n" if ($args{v}); 207 208 209 # 210 # products 211 # 212 my $sql = "select * from products $where_stale"; 213 my $products = $dbh->selectall_hashref($sql, "part_num"); 214 while (my ($part_num, $row) = each %$products) { 215 my $part_link = linkify($part_num); 216 my $manuf_link = linkify($row->{manufacturer}); 217 218 $row->{description} = 219 get_description($row->{manufacturer}, $row->{part_num}); 220 221 my $url = "products/$manuf_link/$part_link.html"; 222 $www->process("product.tt", $row, $url) or die $www->error(), "\n"; 223 } 224 print "info: products (" . scalar(keys %$products) . ")\n" if ($args{v}); 225 226 227 # 228 # product svg;s 229 # 230 print "info: svg " if ($args{v}); 231 232 my @series_keys = ("retailer", "date"); 233 my $series_sth = $dbh->prepare(qq{select retailer, date, price from prices 234 where manufacturer = ? and part_num = ?}); 235 236 my $retailer_info = $dbh->selectall_hashref(qq{select name, url, color from 237 retailers}, "name"); 238 239 my $parts_sth = $dbh->prepare(qq{select distinct manufacturer, 240 part_num from products $where_stale}); 241 $parts_sth->execute(); 242 243 my $rendered = 0; 244 while (my ($manufacturer, $part_num) = $parts_sth->fetchrow_array()) { 245 spin() if ($args{v}); 246 247 my $series = $dbh->selectall_hashref($series_sth, \@series_keys, undef, 248 $manufacturer, $part_num); 249 my $svg = make_svg($series, $part_num, $retailer_info, "\$"); 250 251 my $manufacturer_dir = linkify($manufacturer); 252 my $part_link = linkify($part_num); 253 254 make_path("$svg_dir/$manufacturer_dir", { verbose => $args{v} }); 255 my $svg_path = "$svg_dir/$manufacturer_dir/$part_link.svg"; 256 257 open my $svg_fh, ">", "$svg_path" or die "couldn't open $svg_path: $!"; 258 print $svg_fh $svg->xmlify; 259 close $svg_fh; 260 261 $rendered++; 262 } 263 print "\b($rendered)\n" if ($args{v}); 264 265 $dbh->begin_work; 266 $dbh->do("update products set svg_stale = 0"); 267 $dbh->commit; 268 $dbh->disconnect(); 269 270 # 271 # generate an entire tree of html structure 272 # 273 sub generate_folder 274 { 275 my ($sql_stale_outer, $sql_types, $sql_products, $name, $sql_summary) = @_; 276 277 my $name_lc = lc ($name); 278 279 my $stale_list = $dbh->selectcol_arrayref($sql_stale_outer); 280 for my $it (@$stale_list) { 281 282 my $it_link = linkify($it); 283 my $types = $dbh->selectcol_arrayref($sql_types, undef, $it); 284 for my $type (sort @$types) { 285 286 my $products = $dbh->selectall_arrayref($sql_products, undef, $type, $it); 287 $_->[2] = get_description($_->[0], $_->[1]) for (@$products); 288 289 my $vars = { 290 name => $it, type => PL($type, scalar @$products), 291 products => $products, logo_hash => \%logo_hash 292 }; 293 my $type_link = linkify($type); 294 my $out_path = "$name_lc/$it_link/$type_link.html"; 295 $www->process("fine_list.tt", $vars, $out_path) 296 or die $www->error(), "\n"; 297 } 298 299 my $summary = $dbh->selectall_arrayref($sql_summary, undef, $it); 300 my $vars = { type => $name_lc, name => $it, info => $summary }; 301 $www->process("summary.tt", $vars, "$name_lc/$it_link.html") 302 or die $www->error(), "\n"; 303 304 print "info: $name_lc/$it_link\n" if ($args{v}); 305 } 306 } 307 308 sub linkify 309 { 310 my $type = shift; 311 312 my $type_link = lc($type); 313 $type_link =~ s/[ #\/]/_/g; 314 return $type_link; 315 } 316 317 sub get_description 318 { 319 my $manufacturer = shift; 320 my $part_num = shift; 321 322 my $descriptions = $dbh->selectcol_arrayref($desc_sth, undef, $manufacturer, 323 $part_num); 324 unless (@$descriptions) { 325 print "error: no descriptions for $manufacturer $part_num\n"; 326 } 327 328 # pick the shortest non-zero description 329 my $best = $descriptions->[0]; 330 for (@$descriptions) { 331 next if ($_ eq ""); 332 $best = $_ if (length($_) < length($best)); 333 } 334 335 return $best; 336 } 337 338 # 339 # make a new svg with provided coordinate and label data 340 # 341 sub make_svg 342 { 343 my ($series, $part_num, $metadata, $right_axis_prefix) = @_; 344 345 my ($left, $center, $right, $top, $middle, $bottom) = 346 (20, 430, 50, 15, 150, 20); 347 348 my $width = $right + $center + $left; 349 my $height = $top + $middle + $bottom; 350 351 my ($x_min, $x_max, $y_min, $y_max) = (100000000000, 0, 1000000, 0.00001); 352 while (my ($retailer, $values) = each %$series) { 353 for (keys %{$values}) { 354 my ($x, $y) = ($_, $values->{$_}{price}); 355 $x_min = $x if ($x < $x_min); 356 $x_max = $x if ($x > $x_max); 357 $y_min = $y if ($y < $y_min); 358 $y_max = $y if ($y > $y_max); 359 } 360 } 361 362 my $num_digits = ceil(log($y_max) / log(10)); 363 my $magnitude = 10 ** ($num_digits - 1); 364 365 $y_max = ceil($y_max / $magnitude) * $magnitude; 366 $y_min = floor($y_min / $magnitude) * $magnitude; 367 368 my ($domain, $range) = ($x_max - $x_min, $y_max - $y_min); 369 $domain = 24 * 60 * 60 if ($domain <= 0); 370 $range = 20 if ($range < 20); 371 372 # clamp the total size of this thing with viewBox 373 my $svg = SVG->new(viewBox => "0 0 $width $height"); 374 my $defs = $svg->defs(); 375 my ($x_scale, $y_scale) = ($center / $domain, $middle / $range); 376 377 # $defs->tag("link", href => "/charts.css", type => "text/css", 378 # rel => "stylesheet", xmlns => "http://www.w3.org/1999/xhtml"); 379 380 # y axis labels (prices) 381 my $num_labels = 5; 382 for (1..$num_labels) { 383 my $step = ($_ - 1) / ($num_labels - 1); 384 my $price = ceil($y_max - $range * $step); 385 my $y = $top + $middle * $step; 386 387 $svg->text( 388 id => "price_$_", x => $left + $center + 5, y => $y - 2, 389 fill => "black", "font-family" => "sans-serif", 390 "font-size" => "0.8em" 391 )->cdata("$right_axis_prefix$price"); 392 393 $svg->line( 394 id => "horizontal_line_$_", x1 => $left, y1 => $y, 395 x2 => $width , y2 => $y, 396 style => "stroke: #BBB; stroke-width: 0.5px;" 397 ); 398 } 399 400 $num_labels = 4; 401 402 # x axis labels (dates) 403 if ($domain == 24 * 60 * 60) { 404 $num_labels = 2; 405 } 406 for (1..$num_labels) { 407 my $step = ($_ - 1) / ($num_labels - 1); 408 409 # make the dates not hang off the ends of the chart 410 my $text_anchor = "middle"; 411 $text_anchor = "start" if ($_ == 1); 412 # $text_anchor = "end" if ($_ == $num_labels); 413 414 # print the dates along the x axis 415 my $x = $left + $center * $step; 416 my $time = $x_min + $domain * $step; 417 $svg->text( 418 id => "date_$_", x => $x, y => $height, 419 "text-anchor" => $text_anchor, "fill" => "black", 420 "font-family" => "sans-serif", "font-size" => "0.8em" 421 )->cdata(strftime("%b %e %Y", localtime($time))); 422 423 # print the little tick marks down from the x axis 424 my $x_axis = $top + $middle; 425 $svg->line( 426 id => "date_marker_$_", x1 => $x, y1 => $x_axis, 427 x2 => $x, y2 => $x_axis + 5, 428 style => "stroke: #BBB; stroke-width: 0.5px;" 429 ); 430 } 431 432 while (my ($series_name, $values) = each %$series) { 433 my $retailer_id = lc($series_name); 434 $retailer_id =~ s/ /_/; 435 436 my (@xs, @ys); 437 for (sort keys %{$values}) { 438 my ($x, $y) = ($_, $values->{$_}{price}); 439 push @xs, sprintf "%.3f", ($x - $x_min) * $x_scale + $left; 440 push @ys, sprintf "%.3f", $height - $bottom - ($y - $y_min) * $y_scale; 441 } 442 443 if (@xs < 3) { 444 my $points = $svg->get_path(x => \@xs, y => \@ys, -type => "path"); 445 $defs->path(%$points, id => "path_$retailer_id"); 446 } 447 else { 448 # catmull rom time 449 my $d = catmullrom_to_bezier(\@xs, \@ys); 450 $defs->tag("path", "d" => $d, id => "path_$retailer_id"); 451 } 452 453 my $info = $metadata->{$series_name}; 454 my ($url, $color) = ($info->{url}, $info->{color}); 455 456 # xlink:href's don't like raw ampersands 457 $url =~ s/&/&/g; 458 459 # the line, points, and label can be grouped under one anchor 460 my $anchor = $svg->anchor(-href => $url . $part_num, 461 target => "new_window"); 462 463 # draw path first 464 $anchor->use( 465 -href => "#path_$retailer_id", 466 style => qq{stroke: #$color; fill-opacity: 0; 467 stroke-width: 2; stroke-opacity: 0.8;} 468 ); 469 470 # now draw individual data points 471 my $rand_token = sprintf("%x", arc4random()); 472 $defs->circle(id => $rand_token, cx => 0, cy => 0, r => 2, 473 style => "stroke: #$color; fill: white; stoke-width: 2;" 474 ); 475 while (my $i = each @xs) { 476 $anchor->use(-href => "#$rand_token", 477 x => $xs[$i], y => $ys[$i] 478 ); 479 } 480 481 # show series name along the start of the path 482 $anchor->text( 483 fill => "#$color", style => "font-family: sans-serif;" 484 )->tag("textPath", -href => "#path_$retailer_id" 485 )->tag("tspan", "dy" => "-5")->cdata($series_name); 486 } 487 488 # when graph is loaded make a sliding motion show the graph lines 489 # my $mask = $svg->rectangle( 490 # x => 0, y => 0, width => 1000, height => 250, rx => 0, ry => 0, 491 # id => "mask", fill => "#FFF" 492 # ); 493 # $mask->animate( 494 # attributeName => "x", values => "0;1000", dur => "0.2s", 495 # fill => "freeze", -method => "" 496 # ); 497 498 return $svg 499 } 500 501 # 502 # taken from https://gist.github.com/njvack/6925609 503 # 504 sub catmullrom_to_bezier 505 { 506 my $xs_ref = shift; 507 my $ys_ref = shift; 508 509 my $d = "M $xs_ref->[0], $ys_ref->[0] "; 510 my $iLen = @$xs_ref; 511 for (my $i = 0; $iLen - 1 > $i; $i++) { 512 513 my @offsets = (-1, 0, 1, 2); 514 if ($i == 0) { 515 @offsets = (0, 0, 1, 2); 516 } elsif ($i == ($iLen - 2)) { 517 @offsets = (-1, 0, 1, 1); 518 } 519 520 my (@xs, @ys); 521 for my $idx (@offsets) { 522 push @xs, $xs_ref->[$i + $idx]; 523 push @ys, $ys_ref->[$i + $idx]; 524 } 525 526 my $x_row = Math::MatrixReal->new_from_rows([[@xs]]); 527 my $y_row = Math::MatrixReal->new_from_rows([[@ys]]); 528 529 $x_row = $x_row * $m_t; 530 $y_row = $y_row * $m_t; 531 532 my ($x, $y) = ($x_row->[0][0], $y_row->[0][0]); 533 534 # knock some digits of precision off 535 $d .= sprintf("C %0.2f, %0.2f %0.2f, %0.2f %0.2f, %0.2f ", 536 $x->[1], $y->[1], $x->[2], $y->[2], $x->[3], $y->[3]); 537 } 538 539 return $d; 540 }