#!/usr/bin/perl # $Id: scrape-wowhead.perl 288 2008-12-28 00:32:36Z shefki $ ####################################################################### ### scrape-wowhead.perl ### ### Fetches data from wowhead to build default categories for TBag. ### ### Copyright 2007-2008 Shefki, All Rights Reserved! ### ### Not for redistribution! ### ####################################################################### ################# ### Libraries ### ################# use lib "$ENV{'HOME'}/lib/perl/lib/perl5/site_perl"; use strict; use WWW::Mechanize; ############## ### Config ### ############## # Version of the iteminfo db my $db_version = 2; # URL snipets used to get what we need my $wowhead_url = 'http://www.wowhead.com/'; my $item_tag = '?item='; my $trade_filter = '?spells='; # Maping of the skill ids that wowhead uses (appears to be # completely specific to them). my %trades = ( 'Alchemy' => '11.171', 'Blacksmithing' => '11.164', 'Enchanting' => '11.333', 'Engineering' => '11.202', 'Inscription' => '11.773', 'Jewelcrafting' => '11.755', 'Leatherworking' => '11.165', 'Tailoring' => '11.197', 'Cooking' => '9.185', 'First Aid' => '9.129', 'Mining' => '11.186', ); ############## ### Output ### ############## # Save current time for output my $time = time(); my %TBag_Reagents; my %TBag_TradeCreations; for my $trade (keys %trades) { $TBag_TradeCreations{$trade} = { }; } ################### ### Subroutines ### ################### # Output the file header. sub output_header { print < $b} keys %TBag_Reagents) { print qq(\t\t["$reagent"] = {\n); foreach my $trade (sort keys %{$TBag_Reagents{$reagent}}) { print qq(\t\t\t["$trade"] = {\n); foreach my $id (sort {$a <=> $b} keys %{$TBag_Reagents{$reagent}->{$trade}}) { print qq(\t\t\t\t["$id"] =1,\n); } print qq(\t\t\t},\n); } print qq(\t\t},\n); } print "};\n\n\n"; } # Iterate over the trade hash and output it as valid lua. sub output_trades { my $var_name = shift; my $source_hash = shift; print "local $var_name = {\n"; print qq(\t\t[TBag.S_UPDATE] = "$time",\n); print qq(\t\t[TBag.S_VERSION] = $db_version,\n); foreach my $trade (sort keys %$source_hash) { print qq(\t\t["$trade"] = {\n); foreach my $id (sort {$a <=> $b} keys %{$source_hash->{$trade}}) { print qq(\t\t\t["$id"] = 1,\n); } print qq(\t\t},\n); } print "};\n\n\n"; } # Takes a data segment from wowhead and gets the itemids of the # reagents to make the item. sub parse_reagents { my $data = shift; my ($reagents) = $data =~ /reagents:\[(.*)\],\w/; my(@return) = $reagents =~ /\[(\d+),\d+\]/g; return \@return; } # Takes a data segment and splits it into individual spell entries sub parse_spells { my $data = shift; my (@return) = $data =~ /\{(.*?)\}/g; return \@return; } # Takes a single spell entry and returns the correct key entry sub parse_id { my $data = shift; my $trade = shift; my ($name) = $data =~ /name:'(.*?)',/; if ($name =~ /^\d/) { # item my ($id) = $data =~ /creates:\[(\d+),/; return $id } elsif ($name =~ /^\@/) { my ($id) = $data =~ /id:(\d+),/; # Ugly special case to avoid Cooking Fire from showing as # an enchant. It's the only spell we should pickup that's # not an enchant. if ($trade eq 'Cooking') { return "spell:$id"; } else { return "enchant:$id"; } } else { die("Unknown item type: $name"); } } # Finds a data segment matches the template and section in the html page. sub parse_data { my $template = shift; my $section_id = shift; my $content = shift; my ($contain_data) = $content =~ /^new Listview\(\{template:\s+'$template',\s+id:\s+'$section_id'.*?data:\s+\[(.*?)\]\}\)\;$/m; return $contain_data; } # Go through all the items made by a given trade and record their # item ids and the item ids of their reagents into the hash. sub handle_trade { my $mech = shift; my $trade = shift; my $var = shift; my ($trade_content, $trade_data, $ids); my $url = "${wowhead_url}${trade_filter}$trades{$trade}"; print STDERR "TRADE [$trade] = $url",$/; $mech->get($url); die("Couldn't get $url") unless $mech->success(); my $trade_content = $mech->response()->decoded_content; my $trade_data = parse_data('spell','spells',$trade_content); my $spells = parse_spells($trade_data); print STDERR "Items: ",scalar(@$spells),"\n"; foreach my $spell (@$spells) { my $id = parse_id($spell,$trade); my $reagents = parse_reagents($spell); foreach my $reagent_id (@$reagents) { $TBag_Reagents{$reagent_id}->{$trade} = {} unless $TBag_Reagents{$reagent_id}->{$trade}; $TBag_Reagents{$reagent_id}->{$trade}->{$id} = 1; } if (scalar(@$reagents) => 1 and $id !~ /(enchant|spell):/) { $var->{$trade}->{$id} = 1; } } } # Go through all the different trades and run handle_trade for each. # Then output all the data. sub scrape_trade_items { my $mech = shift; foreach my $trade (keys %trades) { handle_trade($mech,$trade,\%TBag_TradeCreations); } output_trades('TradeCreations',\%TBag_TradeCreations); output_reagents(); } ############ ### Main ### ############ # Init WWW::Mechanize my $mech = WWW::Mechanize->new(); # Output the file header. output_header(); # Scrape wowhead of the items created by professions and reagents scrape_trade_items($mech); # Output the file footer. output_footer();