commit 02d15038dc7454f7408056a2e3fb7642f3d0cfb1
parent 0bc5cda9d1589075ff2c040257731dee8b7de1eb
Author: Kyle Milz <kyle@getaddrinfo.net>
Date: Fri, 3 Apr 2015 00:06:44 -0600
product_scraper: randomize iteration over thumbnails
Diffstat:
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/product_scraper b/product_scraper
@@ -108,9 +108,12 @@ sub mem_exp_scrape_class
my $total = scalar @$thumbnails;
print "$info_hdr: $total total\n" if ($args{v});
+ # randomize the combined results so we don't linearly visit them
+ my @rand_thumbnails = sort { rand > .5 } @$thumbnails;
+
# extract and store part number, brand, and description
my ($new, $old, $err, $start, $i) = (0, 0, 0, time, 0);
- for my $thumbnail_html (@$thumbnails) {
+ for my $thumbnail_html (@rand_thumbnails) {
$i++;
my $thumb_hdr = "$info_hdr: $i/$total";