pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit a4bb9216133b28852aee52540c08e195b09de9e7
parent 9b531cbec72633fd4664a2aedaa71bb08b458654
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun, 19 Oct 2014 21:14:59 -0600

just specify a var dir in the config

Diffstat:
Aetc/pricechart.cfg | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mprice_scraper.pl | 2+-
Dpricechart.cfg | 99-------------------------------------------------------------------------------
Mshared.pm | 40++++++++++++++++++++++++++++++++--------
4 files changed, 130 insertions(+), 108 deletions(-)

diff --git a/etc/pricechart.cfg b/etc/pricechart.cfg @@ -0,0 +1,97 @@ +*** general *** + +var = /home/kyle/src/pricegraph +# Chrome 36 Win7 64bit +user_agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36; +email = kyle@getaddrinfo.net +smtp = smtp.getaddrinfo.net + +*** vendors *** + ++ Memory Express +# +# On sale: +# <div class="PIV_BotPrices"> +# <div class="PIV_PriceRegular">Reg: <span>$359.99</span></div> +# <div class="PIV_PriceSale"> +# $279.99 +# </div> +# </div> +# +# Regular price: +# <div class="PIV_BotPrices"> +# <div class="PIV_Price"> +# <span>$359.99</span> +# </div> +# </div> +# +color = 56B849 +search_uri = http://www.memoryexpress.com/Search/Products?Search= +#title = .ProductTitle +reg_price = .PIV_Price +sale_price = .PIV_PriceSale + ++ Future Shop +color = BA0024 +search_uri = http://www.futureshop.ca/Search/SearchResults.aspx?query= +#title = .prod-title +reg_price = .dollars + ++ Visions Electronics +# +# <td class="price"> +# <span id="ctl00_..." class="regPrice">Price: <span>$509.99</span></span> +# <span id="ctl00_..." class="salePrice">Sale Price: $336.00</span> +# </td> +# +# price is a unique class when only a single product is returned and +# can be used to make sure only a single product has been returned. +# Products that are on sale return both regPrice and salePrice classes +# while regularly priced productes only return the regPrice class. +# +color = 000 +search_uri = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText= +#title = .plProductName +reg_price = .price +# sale_price = .salePrice + ++ London Drugs +color = 005DAB +search_uri = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q= +#title = .productname +reg_price = .pricing +#reg_price = .standardprice +#sale_price = .salesprice + +# + Amazon +# color = FFA51D +# search_uri = http://www.amazon.ca/s/keywords= +# #title = .newaps +# reg_price = .price + +# + Tiger Direct +# color = 660 +# search_uri = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords= +# price_context = +# reg_price = .salePrice +# sale_price = + ++ Best Buy +color = 003B64 +search_uri = http://www.bestbuy.ca/Search/SearchResults.aspx?query= +#title = .product-title, .prod-title +#sale_price = .price-onsale +reg_price = .prodprice + +# + RadioShack +# color = E76453 +# search_uri = http://www.radioshack.com/search/controller.jsp?kw= +# title = .title +# price_context = .product-price-tag +# reg_price = .price + +# + Walmart +# color = 0000FF +# search_uri = http://www.walmart.ca/search/ +# title = .title +# reg_price = .price-current diff --git a/price_scraper.pl b/price_scraper.pl @@ -14,8 +14,8 @@ use shared; my $cfg = get_config(); my $dbh = get_dbh($cfg); my $ua = get_ua($cfg); +my $log = get_log($cfg, "pricechart_scrapes"); -open my $log, ">>", "$cfg->{general}{log_file}" or die $!; my $part_num; if ($args{p}) { diff --git a/pricechart.cfg b/pricechart.cfg @@ -1,99 +0,0 @@ -*** general *** - -http_path = /var/www/htdocs/pricegraph -log_file = pricechart_log.txt -# Chrome 36 Win7 64bit -user_agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36; -email = kyle@getaddrinfo.net -smtp = smtp.getaddrinfo.net -db_file = pricechart.db - -*** vendors *** - -+ Memory Express -# -# On sale: -# <div class="PIV_BotPrices"> -# <div class="PIV_PriceRegular">Reg: <span>$359.99</span></div> -# <div class="PIV_PriceSale"> -# $279.99 -# </div> -# </div> -# -# Regular price: -# <div class="PIV_BotPrices"> -# <div class="PIV_Price"> -# <span>$359.99</span> -# </div> -# </div> -# -color = 56B849 -search_uri = http://www.memoryexpress.com/Search/Products?Search= -#title = .ProductTitle -reg_price = .PIV_Price -sale_price = .PIV_PriceSale - -+ Future Shop -color = BA0024 -search_uri = http://www.futureshop.ca/Search/SearchResults.aspx?query= -#title = .prod-title -reg_price = .dollars - -+ Visions Electronics -# -# <td class="price"> -# <span id="ctl00_..." class="regPrice">Price: <span>$509.99</span></span> -# <span id="ctl00_..." class="salePrice">Sale Price: $336.00</span> -# </td> -# -# price is a unique class when only a single product is returned and -# can be used to make sure only a single product has been returned. -# Products that are on sale return both regPrice and salePrice classes -# while regularly priced productes only return the regPrice class. -# -color = 000 -search_uri = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText= -#title = .plProductName -reg_price = .price -# sale_price = .salePrice - -+ London Drugs -color = 005DAB -search_uri = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q= -#title = .productname -reg_price = .pricing -#reg_price = .standardprice -#sale_price = .salesprice - -# + Amazon -# color = FFA51D -# search_uri = http://www.amazon.ca/s/keywords= -# #title = .newaps -# reg_price = .price - -# + Tiger Direct -# color = 660 -# search_uri = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords= -# price_context = -# reg_price = .salePrice -# sale_price = - -+ Best Buy -color = 003B64 -search_uri = http://www.bestbuy.ca/Search/SearchResults.aspx?query= -#title = .product-title, .prod-title -#sale_price = .price-onsale -reg_price = .prodprice - -# + RadioShack -# color = E76453 -# search_uri = http://www.radioshack.com/search/controller.jsp?kw= -# title = .title -# price_context = .product-price-tag -# reg_price = .price - -# + Walmart -# color = 0000FF -# search_uri = http://www.walmart.ca/search/ -# title = .title -# reg_price = .price-current diff --git a/shared.pm b/shared.pm @@ -7,7 +7,7 @@ use Getopt::Std; use LWP::Simple; @ISA = ("Exporter"); -@EXPORT = qw(get_dom get_config get_dbh get_ua vprint vprintf %args); +@EXPORT = qw(get_dom get_config get_dbh get_ua get_log vprint vprintf %args); our %args; @@ -31,8 +31,8 @@ sub get_dom sub get_config { if (!$args{f}) { - if (-e "pricechart.cfg") { - $cfg_file = "pricechart.cfg"; + if (-e "etc/pricechart.cfg") { + $cfg_file = "etc/pricechart.cfg"; } else { $cfg_file = "/etc/pricechart.cfg"; } @@ -49,24 +49,28 @@ sub get_config }, general => { _vars => [ - 'http_path', - 'log_file', + 'var', 'user_agent', 'email', 'smtp', - 'db_file' ], }, }); - return $parser->parse($cfg_file) or die "ERROR: $parser->{err}\n"; + + my $cfg =$parser->parse($cfg_file) or die "error: $parser->{err}\n"; + make_dir($cfg->{general}{var}); + + return $cfg; } sub get_dbh { my $cfg = shift; + my $db_dir = "$cfg->{general}{var}/db"; + make_dir($db_dir); my $dbh = DBI->connect( - "dbi:SQLite:dbname=$cfg->{general}{db_file}", + "dbi:SQLite:dbname=$db_dir/pricechart.db", "", "", { RaiseError => 1 },) or die $DBI::errstr; @@ -82,6 +86,26 @@ sub get_ua return $ua; } +sub get_log +{ + my $cfg = shift; + my $file = shift; + my $log_dir = "$cfg->{general}{var}/log"; + + make_dir($log_dir); + open my $log, ">>", "$log_dir/$file.txt"; + return $log; +} + +sub make_dir +{ + my $dir = shift; + + unless (-e $dir or mkdir $dir) { + die "Could not create directory $dir: $!\n" + } +} + sub vprint { print $_[0] if ($args{v});