pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 639fda6cc595b4d47f2fde943eca9aa7c7c3957a
parent 0847defadc6cc3ad2794969910dbf7114f3d099f
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun,  5 Apr 2015 12:26:49 -0600

price_scraper: fix description scraping again

Diffstat:
Mprice_scraper | 19+++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/price_scraper b/price_scraper @@ -118,15 +118,18 @@ for my $retailer (sort keys %{$cfg->{retailers}}) { $price = min($price_r, $price_s) if ($price_r && $price_s); # opportunistically scrape descriptions - my $desc = ""; + my ($found_descr, $descr); if ($desc_tag) { # scrape description, use first one found on page - ($desc) = $search_results->find($desc_tag)->text_array(); - $desc =~ s/^\s+//; - $desc =~ s/\s+$//; - if ($desc ne "" && $args{v}) { - my $desc_s = trunc_line($desc, length($retailer) + 8); - print "info: $retailer: $desc_s\n"; + ($descr) = $search_results->find($desc_tag)->text_array(); + if (defined $descr && $descr ne "") { + $descr =~ s/^\s+//; + $descr =~ s/\s+$//; + $descr =~ s/$manufacturer//; + $descr =~ s/$part_num//; + + my $descr_s = trunc_line($descr, length($retailer) + 8); + print "info: $retailer: $descr_s\n" if ($args{v}); } } @@ -139,7 +142,7 @@ for my $retailer (sort keys %{$cfg->{retailers}}) { time - $retailer_start); $products_sth->execute($start, $part_num, $manufacturer); $descriptions_sth->execute($manufacturer, $part_num, $retailer, - $desc, time) if ($desc ne ""); + $descr, time) if ($found_descr); print "info: $retailer: db: inserted \$$price\n" if ($args{v}); }