commit 639fda6cc595b4d47f2fde943eca9aa7c7c3957a
parent 0847defadc6cc3ad2794969910dbf7114f3d099f
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun,  5 Apr 2015 12:26:49 -0600
price_scraper: fix description scraping again
Diffstat:
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/price_scraper b/price_scraper
@@ -118,15 +118,18 @@ for my $retailer (sort keys %{$cfg->{retailers}}) {
 	$price = min($price_r, $price_s) if ($price_r && $price_s);
 
 	# opportunistically scrape descriptions
-	my $desc = "";
+	my ($found_descr, $descr);
 	if ($desc_tag) {
 		# scrape description, use first one found on page
-		($desc) = $search_results->find($desc_tag)->text_array();
-		$desc =~ s/^\s+//;
-		$desc =~ s/\s+$//;
-		if ($desc ne "" && $args{v}) {
-			my $desc_s = trunc_line($desc, length($retailer) + 8);
-			print "info: $retailer: $desc_s\n";
+		($descr) = $search_results->find($desc_tag)->text_array();
+		if (defined $descr && $descr ne "") {
+			$descr =~ s/^\s+//;
+			$descr =~ s/\s+$//;
+			$descr =~ s/$manufacturer//;
+			$descr =~ s/$part_num//;
+
+			my $descr_s = trunc_line($descr, length($retailer) + 8);
+			print "info: $retailer: $descr_s\n" if ($args{v});
 		}
 	}
 
@@ -139,7 +142,7 @@ for my $retailer (sort keys %{$cfg->{retailers}}) {
 		time - $retailer_start);
 	$products_sth->execute($start, $part_num, $manufacturer);
 	$descriptions_sth->execute($manufacturer, $part_num, $retailer,
-		$desc, time) if ($desc ne "");
+		$descr, time) if ($found_descr);
 
 	print "info: $retailer: db: inserted \$$price\n" if ($args{v});
 }