commit a4bb9216133b28852aee52540c08e195b09de9e7
parent 9b531cbec72633fd4664a2aedaa71bb08b458654
Author: Kyle Milz <kyle@getaddrinfo.net>
Date: Sun, 19 Oct 2014 21:14:59 -0600
just specify a var dir in the config
Diffstat:
4 files changed, 130 insertions(+), 108 deletions(-)
diff --git a/etc/pricechart.cfg b/etc/pricechart.cfg
@@ -0,0 +1,97 @@
+*** general ***
+
+var = /home/kyle/src/pricegraph
+# Chrome 36 Win7 64bit
+user_agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36;
+email = kyle@getaddrinfo.net
+smtp = smtp.getaddrinfo.net
+
+*** vendors ***
+
++ Memory Express
+#
+# On sale:
+# <div class="PIV_BotPrices">
+# <div class="PIV_PriceRegular">Reg: <span>$359.99</span></div>
+# <div class="PIV_PriceSale">
+# $279.99
+# </div>
+# </div>
+#
+# Regular price:
+# <div class="PIV_BotPrices">
+# <div class="PIV_Price">
+# <span>$359.99</span>
+# </div>
+# </div>
+#
+color = 56B849
+search_uri = http://www.memoryexpress.com/Search/Products?Search=
+#title = .ProductTitle
+reg_price = .PIV_Price
+sale_price = .PIV_PriceSale
+
++ Future Shop
+color = BA0024
+search_uri = http://www.futureshop.ca/Search/SearchResults.aspx?query=
+#title = .prod-title
+reg_price = .dollars
+
++ Visions Electronics
+#
+# <td class="price">
+# <span id="ctl00_..." class="regPrice">Price: <span>$509.99</span></span>
+# <span id="ctl00_..." class="salePrice">Sale Price: $336.00</span>
+# </td>
+#
+# price is a unique class when only a single product is returned and
+# can be used to make sure only a single product has been returned.
+# Products that are on sale return both regPrice and salePrice classes
+# while regularly priced productes only return the regPrice class.
+#
+color = 000
+search_uri = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText=
+#title = .plProductName
+reg_price = .price
+# sale_price = .salePrice
+
++ London Drugs
+color = 005DAB
+search_uri = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q=
+#title = .productname
+reg_price = .pricing
+#reg_price = .standardprice
+#sale_price = .salesprice
+
+# + Amazon
+# color = FFA51D
+# search_uri = http://www.amazon.ca/s/keywords=
+# #title = .newaps
+# reg_price = .price
+
+# + Tiger Direct
+# color = 660
+# search_uri = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords=
+# price_context =
+# reg_price = .salePrice
+# sale_price =
+
++ Best Buy
+color = 003B64
+search_uri = http://www.bestbuy.ca/Search/SearchResults.aspx?query=
+#title = .product-title, .prod-title
+#sale_price = .price-onsale
+reg_price = .prodprice
+
+# + RadioShack
+# color = E76453
+# search_uri = http://www.radioshack.com/search/controller.jsp?kw=
+# title = .title
+# price_context = .product-price-tag
+# reg_price = .price
+
+# + Walmart
+# color = 0000FF
+# search_uri = http://www.walmart.ca/search/
+# title = .title
+# reg_price = .price-current
diff --git a/price_scraper.pl b/price_scraper.pl
@@ -14,8 +14,8 @@ use shared;
my $cfg = get_config();
my $dbh = get_dbh($cfg);
my $ua = get_ua($cfg);
+my $log = get_log($cfg, "pricechart_scrapes");
-open my $log, ">>", "$cfg->{general}{log_file}" or die $!;
my $part_num;
if ($args{p}) {
diff --git a/pricechart.cfg b/pricechart.cfg
@@ -1,99 +0,0 @@
-*** general ***
-
-http_path = /var/www/htdocs/pricegraph
-log_file = pricechart_log.txt
-# Chrome 36 Win7 64bit
-user_agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36;
-email = kyle@getaddrinfo.net
-smtp = smtp.getaddrinfo.net
-db_file = pricechart.db
-
-*** vendors ***
-
-+ Memory Express
-#
-# On sale:
-# <div class="PIV_BotPrices">
-# <div class="PIV_PriceRegular">Reg: <span>$359.99</span></div>
-# <div class="PIV_PriceSale">
-# $279.99
-# </div>
-# </div>
-#
-# Regular price:
-# <div class="PIV_BotPrices">
-# <div class="PIV_Price">
-# <span>$359.99</span>
-# </div>
-# </div>
-#
-color = 56B849
-search_uri = http://www.memoryexpress.com/Search/Products?Search=
-#title = .ProductTitle
-reg_price = .PIV_Price
-sale_price = .PIV_PriceSale
-
-+ Future Shop
-color = BA0024
-search_uri = http://www.futureshop.ca/Search/SearchResults.aspx?query=
-#title = .prod-title
-reg_price = .dollars
-
-+ Visions Electronics
-#
-# <td class="price">
-# <span id="ctl00_..." class="regPrice">Price: <span>$509.99</span></span>
-# <span id="ctl00_..." class="salePrice">Sale Price: $336.00</span>
-# </td>
-#
-# price is a unique class when only a single product is returned and
-# can be used to make sure only a single product has been returned.
-# Products that are on sale return both regPrice and salePrice classes
-# while regularly priced productes only return the regPrice class.
-#
-color = 000
-search_uri = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText=
-#title = .plProductName
-reg_price = .price
-# sale_price = .salePrice
-
-+ London Drugs
-color = 005DAB
-search_uri = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q=
-#title = .productname
-reg_price = .pricing
-#reg_price = .standardprice
-#sale_price = .salesprice
-
-# + Amazon
-# color = FFA51D
-# search_uri = http://www.amazon.ca/s/keywords=
-# #title = .newaps
-# reg_price = .price
-
-# + Tiger Direct
-# color = 660
-# search_uri = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords=
-# price_context =
-# reg_price = .salePrice
-# sale_price =
-
-+ Best Buy
-color = 003B64
-search_uri = http://www.bestbuy.ca/Search/SearchResults.aspx?query=
-#title = .product-title, .prod-title
-#sale_price = .price-onsale
-reg_price = .prodprice
-
-# + RadioShack
-# color = E76453
-# search_uri = http://www.radioshack.com/search/controller.jsp?kw=
-# title = .title
-# price_context = .product-price-tag
-# reg_price = .price
-
-# + Walmart
-# color = 0000FF
-# search_uri = http://www.walmart.ca/search/
-# title = .title
-# reg_price = .price-current
diff --git a/shared.pm b/shared.pm
@@ -7,7 +7,7 @@ use Getopt::Std;
use LWP::Simple;
@ISA = ("Exporter");
-@EXPORT = qw(get_dom get_config get_dbh get_ua vprint vprintf %args);
+@EXPORT = qw(get_dom get_config get_dbh get_ua get_log vprint vprintf %args);
our %args;
@@ -31,8 +31,8 @@ sub get_dom
sub get_config
{
if (!$args{f}) {
- if (-e "pricechart.cfg") {
- $cfg_file = "pricechart.cfg";
+ if (-e "etc/pricechart.cfg") {
+ $cfg_file = "etc/pricechart.cfg";
} else {
$cfg_file = "/etc/pricechart.cfg";
}
@@ -49,24 +49,28 @@ sub get_config
},
general => {
_vars => [
- 'http_path',
- 'log_file',
+ 'var',
'user_agent',
'email',
'smtp',
- 'db_file'
],
},
});
- return $parser->parse($cfg_file) or die "ERROR: $parser->{err}\n";
+
+ my $cfg =$parser->parse($cfg_file) or die "error: $parser->{err}\n";
+ make_dir($cfg->{general}{var});
+
+ return $cfg;
}
sub get_dbh
{
my $cfg = shift;
+ my $db_dir = "$cfg->{general}{var}/db";
+ make_dir($db_dir);
my $dbh = DBI->connect(
- "dbi:SQLite:dbname=$cfg->{general}{db_file}",
+ "dbi:SQLite:dbname=$db_dir/pricechart.db",
"",
"",
{ RaiseError => 1 },) or die $DBI::errstr;
@@ -82,6 +86,26 @@ sub get_ua
return $ua;
}
+sub get_log
+{
+ my $cfg = shift;
+ my $file = shift;
+ my $log_dir = "$cfg->{general}{var}/log";
+
+ make_dir($log_dir);
+ open my $log, ">>", "$log_dir/$file.txt";
+ return $log;
+}
+
+sub make_dir
+{
+ my $dir = shift;
+
+ unless (-e $dir or mkdir $dir) {
+ die "Could not create directory $dir: $!\n"
+ }
+}
+
sub vprint
{
print $_[0] if ($args{v});