pricecharts

track prices of consumer electronics
Log | Files | Refs | README

commit 731068d4618312f0ad95972f67c5868bd418cb20
parent aeb25c83669a7d9215628964457d4da427cd0610
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun, 22 Mar 2015 16:22:41 -0600

grammar: simplify keywords

Diffstat:
MPriceChart.pm | 18++++++++++--------
Mpc_fcgi | 11+++++------
Mprice_scraper | 6+++---
Mpricechart.cfg | 116+++++++++++++++++++++++++++++++++++++++----------------------------------------
4 files changed, 75 insertions(+), 76 deletions(-)

diff --git a/PriceChart.pm b/PriceChart.pm @@ -13,14 +13,16 @@ sub get_config _sections => ["general", "http", "retailers"], general => { _vars => [ - 'user_agent', - 'email', - 'smtp', + "agent", + "email", + "smtp", + # XXX: add simple regex validation here + "addrs" ], }, http => { _vars => [ - "socket_file", + "socket", "uid", "gid", "chroot", @@ -33,9 +35,9 @@ sub get_config _sections => ["/[A-Za-z ]+/"], "/[A-Za-z ]+/" => { _vars => [ - "search_url", - "price_regular", - "price_sale", + "url", + "reg_tag", + "sale_tag", "color", "title" ] @@ -95,7 +97,7 @@ sub new_ua $ua->default_header("Accept-Encoding" => scalar HTTP::Message::decodable()); $ua->default_header("Accept-Charset" => "utf-8"); $ua->default_header("Accept-Language" => "en-US"); - $ua->default_header("User-Agent" => $cfg->{"user_agent"}); + $ua->default_header("User-Agent" => $cfg->{agent}); my $headers = $ua->default_headers; for (sort keys %$headers) { diff --git a/pc_fcgi b/pc_fcgi @@ -52,17 +52,16 @@ print "info: uid:gid set to $<:$(\n" if ($args{v}); print "info: opening syslog\n" if ($args{v}); openlog("pc_fcgi", LOG_PID, LOG_DAEMON); -my $socket_file = $http_cfg{"socket_file"}; -if (-e $socket_file) { - my $msg = "socket file $socket_file exists, not starting\n"; +if (-e $http_cfg{socket}) { + my $msg = "socket $http_cfg{socket} exists, not starting\n"; print "error: $msg\n" if ($args{v}); syslog(LOG_ERR, $msg); exit; } # XXX: i need to be sudo for this to work? after we've dropped privileges? -print "info: opening $socket_file\n" if ($args{v}); -my $socket = FCGI::OpenSocket($socket_file, 1024); +print "info: opening $http_cfg{socket}\n" if ($args{v}); +my $socket = FCGI::OpenSocket($http_cfg{socket}, 1024); print "info: opening $http_cfg{db_dir}/pricechart.db\n" if ($args{v}); my $dbh = get_dbh($cfg->{"http"}, $http_cfg{db_dir}, $args{v}); @@ -109,7 +108,7 @@ $search_sth = undef; $dbh->disconnect(); FCGI::CloseSocket($socket); -unlink($socket_file) or print "error: could not unlink $socket_file: $!"; +unlink($http_cfg{socket}) or print "error: could not unlink $http_cfg{socket}: $!"; sub child_sig { diff --git a/price_scraper b/price_scraper @@ -69,10 +69,10 @@ my ($start, @status, $i) = (time, "", -1); for my $retailer (sort keys %{$cfg->{retailers}}) { my %props = %{$cfg->{retailers}{$retailer}}; # this could probably be done smarter - my $url = $props{"search_url"}; + my $url = $props{"url"}; my $color = $props{"color"}; - my $price_tag = $props{"price_regular"}; - my $sale_tag = $props{"price_sale"}; + my $price_tag = $props{"reg_tag"}; + my $sale_tag = $props{"sale_tag"}; my $desc_tag = $props{"title"}; my $retailer_start = time; diff --git a/pricechart.cfg b/pricechart.cfg @@ -1,20 +1,22 @@ *** general *** # Chrome 36 Win7 64bit -user_agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36; -email = kyle@getaddrinfo.net -smtp = smtp.getaddrinfo.net +agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36; +email = kyle@getaddrinfo.net +smtp = smtp.getaddrinfo.net +addrs = 216.171.227.98 216.171.227.100 *** http *** +uid = www +gid = daemon +chroot = /var/www -uid = www -gid = daemon -chroot = /var/www -socket_file = /run/search.sock -db_dir = /db -htdocs = /htdocs/pricechart -logs = /logs +# everything below is relative to chroot +socket = /run/search.sock +db_dir = /db +htdocs = /htdocs/pricechart +logs = /logs *** retailers *** @@ -36,18 +38,18 @@ logs = /logs # </div> # </div> # -color = 56B849 -search_url = http://www.memoryexpress.com/Search/Products?Search= -title = .ProductTitle -price_regular = .PIV_Price -price_sale = .PIV_PriceSale +color = 56B849 +url = http://www.memoryexpress.com/Search/Products?Search= +title = .ProductTitle +reg_tag = .PIV_Price +sale_tag = .PIV_PriceSale + Future Shop -color = BA0024 -search_url = http://www.futureshop.ca/Search/SearchResults.aspx?query= -title = .prod-title -price_regular = .dollars +color = BA0024 +url = http://www.futureshop.ca/Search/SearchResults.aspx?query= +title = .prod-title +reg_tag = .dollars + Visions Electronics @@ -62,42 +64,37 @@ price_regular = .dollars # Products that are on sale return both regPrice and salePrice classes # while regularly priced productes only return the regPrice class. # -color = 000 -search_url = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText= -title = .plProductName -price_regular = .price -price_sale = .salePrice +color = 000 +url = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText= +title = .plProductName +reg_tag = .price +sale_tag = .salePrice +# type_includes = televisions + London Drugs -color = 005DAB -search_url = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q= -title = .productname -price_regular = .pricing +color = 005DAB +url = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q= +title = .productname +reg_tag = .pricing # not sure about the below # reg_price = .standardprice # sale_price = .salesprice # + Amazon -# color = FFA51D -# search_url = http://www.amazon.ca/s/keywords= -# title = .newaps -# price_regular = .price - -# + Tiger Direct -# color = 660 -# search_uri = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords= -# price_context = -# reg_price = .salePrice -# sale_price = +# color = FFA51D +# url = http://www.amazon.ca/s/keywords= +# title = .newaps +# reg_tag = .price + + Best Buy -color = 003B64 -search_url = http://www.bestbuy.ca/Search/SearchResults.aspx?query= -#title = .product-title, .prod-title -#sale_price = .price-onsale -price_regular = .prodprice +color = 003B64 +url = http://www.bestbuy.ca/Search/SearchResults.aspx?query= +reg_tag = .prodprice +# title = .product-title, .prod-title +# sale_tag = .price-onsale # + RadioShack @@ -108,26 +105,27 @@ price_regular = .prodprice # reg_price = .price # + Walmart -# color = 0000FF -# search_url = http://www.walmart.ca/search/ -# # title = .title -# price_regular = .price-current +# color = 0000FF +# url = http://www.walmart.ca/search/ +# title = .title +# reg_tag = .price-current # this one has a table layout with no id= tags, making scraping impossible with # the current technique # + NCIX -# color = -# search_url = http://search.ncix.com/search/?q= -# price_regular = +# color = +# url = http://search.ncix.com/search/?q= +# reg_tag = + Newegg -color = F8A42A -search_url = http://www.newegg.ca/Product/ProductList.aspx?Submit=ENE&DEPA=0&Order=BESTMATCH&Description= -price_regular = .price-current -title = .itemDescription +color = F8A42A +url = http://www.newegg.ca/Product/ProductList.aspx?Submit=ENE&DEPA=0&Order=BESTMATCH&Description= +title = .itemDescription +reg_tag = .price-current + + Tiger Direct -color = -search_url = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords= -price_regular = .salePrice -title = .itemName +color = FED443 +url = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords= +title = .itemName +reg_tag = .salePrice