commit 731068d4618312f0ad95972f67c5868bd418cb20
parent aeb25c83669a7d9215628964457d4da427cd0610
Author: Kyle Milz <kyle@getaddrinfo.net>
Date: Sun, 22 Mar 2015 16:22:41 -0600
grammar: simplify keywords
Diffstat:
4 files changed, 75 insertions(+), 76 deletions(-)
diff --git a/PriceChart.pm b/PriceChart.pm
@@ -13,14 +13,16 @@ sub get_config
_sections => ["general", "http", "retailers"],
general => {
_vars => [
- 'user_agent',
- 'email',
- 'smtp',
+ "agent",
+ "email",
+ "smtp",
+ # XXX: add simple regex validation here
+ "addrs"
],
},
http => {
_vars => [
- "socket_file",
+ "socket",
"uid",
"gid",
"chroot",
@@ -33,9 +35,9 @@ sub get_config
_sections => ["/[A-Za-z ]+/"],
"/[A-Za-z ]+/" => {
_vars => [
- "search_url",
- "price_regular",
- "price_sale",
+ "url",
+ "reg_tag",
+ "sale_tag",
"color",
"title"
]
@@ -95,7 +97,7 @@ sub new_ua
$ua->default_header("Accept-Encoding" => scalar HTTP::Message::decodable());
$ua->default_header("Accept-Charset" => "utf-8");
$ua->default_header("Accept-Language" => "en-US");
- $ua->default_header("User-Agent" => $cfg->{"user_agent"});
+ $ua->default_header("User-Agent" => $cfg->{agent});
my $headers = $ua->default_headers;
for (sort keys %$headers) {
diff --git a/pc_fcgi b/pc_fcgi
@@ -52,17 +52,16 @@ print "info: uid:gid set to $<:$(\n" if ($args{v});
print "info: opening syslog\n" if ($args{v});
openlog("pc_fcgi", LOG_PID, LOG_DAEMON);
-my $socket_file = $http_cfg{"socket_file"};
-if (-e $socket_file) {
- my $msg = "socket file $socket_file exists, not starting\n";
+if (-e $http_cfg{socket}) {
+ my $msg = "socket $http_cfg{socket} exists, not starting\n";
print "error: $msg\n" if ($args{v});
syslog(LOG_ERR, $msg);
exit;
}
# XXX: i need to be sudo for this to work? after we've dropped privileges?
-print "info: opening $socket_file\n" if ($args{v});
-my $socket = FCGI::OpenSocket($socket_file, 1024);
+print "info: opening $http_cfg{socket}\n" if ($args{v});
+my $socket = FCGI::OpenSocket($http_cfg{socket}, 1024);
print "info: opening $http_cfg{db_dir}/pricechart.db\n" if ($args{v});
my $dbh = get_dbh($cfg->{"http"}, $http_cfg{db_dir}, $args{v});
@@ -109,7 +108,7 @@ $search_sth = undef;
$dbh->disconnect();
FCGI::CloseSocket($socket);
-unlink($socket_file) or print "error: could not unlink $socket_file: $!";
+unlink($http_cfg{socket}) or print "error: could not unlink $http_cfg{socket}: $!";
sub child_sig
{
diff --git a/price_scraper b/price_scraper
@@ -69,10 +69,10 @@ my ($start, @status, $i) = (time, "", -1);
for my $retailer (sort keys %{$cfg->{retailers}}) {
my %props = %{$cfg->{retailers}{$retailer}};
# this could probably be done smarter
- my $url = $props{"search_url"};
+ my $url = $props{"url"};
my $color = $props{"color"};
- my $price_tag = $props{"price_regular"};
- my $sale_tag = $props{"price_sale"};
+ my $price_tag = $props{"reg_tag"};
+ my $sale_tag = $props{"sale_tag"};
my $desc_tag = $props{"title"};
my $retailer_start = time;
diff --git a/pricechart.cfg b/pricechart.cfg
@@ -1,20 +1,22 @@
*** general ***
# Chrome 36 Win7 64bit
-user_agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36;
-email = kyle@getaddrinfo.net
-smtp = smtp.getaddrinfo.net
+agent = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36;
+email = kyle@getaddrinfo.net
+smtp = smtp.getaddrinfo.net
+addrs = 216.171.227.98 216.171.227.100
*** http ***
+uid = www
+gid = daemon
+chroot = /var/www
-uid = www
-gid = daemon
-chroot = /var/www
-socket_file = /run/search.sock
-db_dir = /db
-htdocs = /htdocs/pricechart
-logs = /logs
+# everything below is relative to chroot
+socket = /run/search.sock
+db_dir = /db
+htdocs = /htdocs/pricechart
+logs = /logs
*** retailers ***
@@ -36,18 +38,18 @@ logs = /logs
# </div>
# </div>
#
-color = 56B849
-search_url = http://www.memoryexpress.com/Search/Products?Search=
-title = .ProductTitle
-price_regular = .PIV_Price
-price_sale = .PIV_PriceSale
+color = 56B849
+url = http://www.memoryexpress.com/Search/Products?Search=
+title = .ProductTitle
+reg_tag = .PIV_Price
+sale_tag = .PIV_PriceSale
+ Future Shop
-color = BA0024
-search_url = http://www.futureshop.ca/Search/SearchResults.aspx?query=
-title = .prod-title
-price_regular = .dollars
+color = BA0024
+url = http://www.futureshop.ca/Search/SearchResults.aspx?query=
+title = .prod-title
+reg_tag = .dollars
+ Visions Electronics
@@ -62,42 +64,37 @@ price_regular = .dollars
# Products that are on sale return both regPrice and salePrice classes
# while regularly priced productes only return the regPrice class.
#
-color = 000
-search_url = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText=
-title = .plProductName
-price_regular = .price
-price_sale = .salePrice
+color = 000
+url = http://www.visions.ca/catalogue/category/ProductResults.aspx?searchText=
+title = .plProductName
+reg_tag = .price
+sale_tag = .salePrice
+# type_includes = televisions
+ London Drugs
-color = 005DAB
-search_url = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q=
-title = .productname
-price_regular = .pricing
+color = 005DAB
+url = http://www.londondrugs.com/on/demandware.store/Sites-LondonDrugs-Site/default/Search-Show?q=
+title = .productname
+reg_tag = .pricing
# not sure about the below
# reg_price = .standardprice
# sale_price = .salesprice
# + Amazon
-# color = FFA51D
-# search_url = http://www.amazon.ca/s/keywords=
-# title = .newaps
-# price_regular = .price
-
-# + Tiger Direct
-# color = 660
-# search_uri = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords=
-# price_context =
-# reg_price = .salePrice
-# sale_price =
+# color = FFA51D
+# url = http://www.amazon.ca/s/keywords=
+# title = .newaps
+# reg_tag = .price
+
+ Best Buy
-color = 003B64
-search_url = http://www.bestbuy.ca/Search/SearchResults.aspx?query=
-#title = .product-title, .prod-title
-#sale_price = .price-onsale
-price_regular = .prodprice
+color = 003B64
+url = http://www.bestbuy.ca/Search/SearchResults.aspx?query=
+reg_tag = .prodprice
+# title = .product-title, .prod-title
+# sale_tag = .price-onsale
# + RadioShack
@@ -108,26 +105,27 @@ price_regular = .prodprice
# reg_price = .price
# + Walmart
-# color = 0000FF
-# search_url = http://www.walmart.ca/search/
-# # title = .title
-# price_regular = .price-current
+# color = 0000FF
+# url = http://www.walmart.ca/search/
+# title = .title
+# reg_tag = .price-current
# this one has a table layout with no id= tags, making scraping impossible with
# the current technique
# + NCIX
-# color =
-# search_url = http://search.ncix.com/search/?q=
-# price_regular =
+# color =
+# url = http://search.ncix.com/search/?q=
+# reg_tag =
+ Newegg
-color = F8A42A
-search_url = http://www.newegg.ca/Product/ProductList.aspx?Submit=ENE&DEPA=0&Order=BESTMATCH&Description=
-price_regular = .price-current
-title = .itemDescription
+color = F8A42A
+url = http://www.newegg.ca/Product/ProductList.aspx?Submit=ENE&DEPA=0&Order=BESTMATCH&Description=
+title = .itemDescription
+reg_tag = .price-current
+
+ Tiger Direct
-color =
-search_url = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords=
-price_regular = .salePrice
-title = .itemName
+color = FED443
+url = http://www.tigerdirect.ca/applications/SearchTools/search.asp?keywords=
+title = .itemName
+reg_tag = .salePrice