SEM – Google Adwords API Data Retriever

If you are running many large campaigns or thousands of campaigns and want to track keyword performance of Google Adwords, the following application helps retrieve the data on regular intervals (daily, weekly, monthly, etc.). Using your data warehouse and reporting infrastructure you can slice and dice the data based on your business requirements.

Application is a multi-threaded implementation of pulling data through API. Each thread gets a campaign id to pull keyword performance report.  The code included here is a skeletal code and you need to add error handing, code to suit your database schema and some ETL.  In my environment I have used MySQL 5.0, linux (2.6) and Windows, Perl 5.10.1 and Google Adwords API v13.  You can also use Google’s client libraries available in many languages including Java, Python, DotNet, Perl, PHP and others.


Other posts that may be of interest:
1. Google Analytics Data Retriever
2. Bulk data loading using Talend
3. Google Keyword Performance data pull


Campaign Class:

package Campaign;

#-------------------------------------
# Campaign Package
#-------------------------------------

use strict;
use DBI;

sub new {
my $self = {};
$self->{campIds} = []; #-- Master campaign ID array

bless($self);

$self->_populateCampaignIds();
return $self;
}

sub _populateCampaignIds {
my $self = shift;

#--------------------------
#- Connect to database to get list of campaigns (IDs).
#--------------------------
#- my $dbh = DBI->connect($data_source, $user_name, $pw);
#- my $sth = $dbh->prepare(
#- "SELECT campaign_id from campaign
#- WHERE search_engine = 'google'
#- AND status = 'active';
#- ";
#- $sth->execute() or die "Unable to execute a query.\n";
#- while ( @id = $sth->fetchrow_array ) {
#- push (${$self->{campIds}}, $id[0]);
#- }
#-
#- $dbh->disconnect;

#---- For demo, I have hard-coded some IDs.
push( @{$self->{campIds} }, '1111');
push( @{$self->{campIds} }, '2222');
push( @{$self->{campIds} }, '3333');

return 1;
}

#----------------------------
#- Returns master list of campaign IDs
#----------------------------
sub getCampaignIds {
my $self = shift;

return @{ $self->{campIds} };
}

#----------------------------
#- Returns chunk of campaign IDs from master array.
#----------------------------
sub getCampaignIdChunk {
my $self = shift;
my $chunks = shift;
my $index = shift;

if (!defined $index and !defined $chunks) {
return undef;
}

my $chunkSize = int (scalar(@ {$self->{campIds}})/$chunks);
if ($chunkSize < 1) {
print "Error: no campaigns to process for thread $index\n";
return undef;
}
my $lIx = $chunkSize * ($index - 1); #-- low index
my $hIx = $chunkSize * ($index); #-- high index

my @campIds = ();
if ($index < $chunks) {
@campIds = splice (@ {$self->{campIds}}, $lIx, $chunkSize);
} else {
#- print "Getting LAST chunk for $index from lower $lIx element and a chunk size of $chunkSize\n";
@campIds = splice (@ {$self->{campIds}}, $lIx);
}

return @campIds;
}

1;

Prototype application:

#-------------------------------------------------
# Adwords Data Retriever
# This is skeleton application to pull Adwords Keywords Performance Report Data
# from Google through Adwords API.
# Uses multi-threaded programming technique for simultaneous pull of many campaigns.
# Campaign ids in the application are hard-coded with dummy values and hence replace them by connecting
# to your database and getting actual campaign ids (code shown).
# Also, for authentication use your user name, password, developer & application code.
# Valid for Google Adwords v13.
# Author: Shiva M.
#-------------------------------------------------

#- use lib "Add directory where you installed Campaign.pm";
use strict;
use Benchmark;
use Getopt::Std;
use Data::Dumper;

use XML::Simple;
use LWP::Simple;
use Crypt::SSLeay;
use SOAP::Lite;
use IO::Uncompress::Gunzip qw();

use threads;
use threads::shared;

use Campaign;

my (%opts, @worker) = undef;
my $NUM_THREADS = 3; #-- Modify this to suit your environment. Or better use Config module with ini file to setup application configuration.
my $GOOGLE_API = 'https://adwords.google.com/api/adwords/v13';

#-------------------------------------------------
# Main program.
#-------------------------------------------------
getOpts();

if ($opts{'s'} == 1) {
for (my $i = 1; $i < ($NUM_THREADS+1); $i++) {
print "MASTER: Starting thread " . $i, "\n";
$worker[$i] = threads->new(\&pullCampaignData, $i);
sleep 5;
}

while(anyRunningThread()) {
for (my $ii = 1; $ii < scalar(@worker); $ii++) {
my $thr = $worker[$ii];
if ($thr->is_joinable) {
my $ret = eval { $thr->join() };
print "Thread $ii returned: $ret\n";
}
}
sleep(10);
}
}

exit (0);

#-------------------------------------------------
# Subroutines.
#-------------------------------------------------
sub getOpts {
if (!getopts('s:d:', \%opts)) {
print "Invalid option. Exiting\n";
exit (-1);
}

return 1;
}

#-----------------------------
#- Any thread still running?. Returns Yes/No (1/0)
#-----------------------------
sub anyRunningThread {
my $runThrs = threads->list(threads::running);
print "Still $runThrs threads are running.\n";
if ($runThrs) { return 1; }

return 0;
}

#---------------------------
#- Each thread to pull keyword performance reports for a list of campaigns
#---------------------------
sub pullCampaignData {
my ($tId) = @_;
my $thr = $worker[$tId];

my $to_dollar_conv_factor = 1 /(1000 * 1000); #-- Currency in micros. $1 1000 * 1000;

my $log = "thread_$tId.log";
open (TFH, ">$log") or die "Unable to open file $log\n"; #-- Log file for the thread.

#---------------------------------
#- Get this thread share of load - list of campaign IDs
#---------------------------------
my $campaign = Campaign->new();
my @tCampIds = $campaign->getCampaignIdChunk($NUM_THREADS,$tId);

print TFH "\npullCampaignData \$tId: $tId: campaignIDs:", map {"$_ "} @tCampIds;

my @headers = (SOAP::Header->name('email' => $email),
SOAP::Header->name('password' => $pw),
SOAP::Header->name('useragent' => $ua_name),
SOAP::Header->name('developerToken' => $developer_token),
SOAP::Header->name('applicationToken' => $application_token)
);

my $url = sprintf('https://%s.google.com/api/adwords/v13/ReportService', 'adwords');
my $wsdl = $url . '?wsdl';
my $service = SOAP::Lite->service($wsdl)->autotype(0)->readable(1)->proxy($url);
print TFH "\ndebug:service=$service" if $opts{d};

$service->on_fault(
sub {
my $response = $::ARG[1];
die('The following SOAP fault occurred:', "\n", 'faultcode: ',
$response->faultcode(), "\n",
'faultstring: ', $response->faultstring(),
"\n");
}
);
SOAP::Lite->import(+trace => 'debug') if $opts{d};


#---------------------------
# Make SOAP call.
# Get one campaign data at a time. Generate a file to bulk load into database.
#---------------------------

foreach my $cId (@tCampIds) {

my $AWJobParams = {
'selectedReportType' => 'Keyword',
'name' => 'AW Keyword Perf Daily Report',
'aggregationTypes' => ['Daily'],
'startDay' => $yesterday_date,
'endDay' => $yesterday_date,
'selectedColumns' => [ qw (
KeywordId Keyword CampaignId CampaignStatus
AdGroupId AdGroupStatus AdGroupMaxCpa AdWordsType
Impressions Clicks CTR CPC Conversions ConversionRate Cost
AveragePosition QualityScore AverageConversionValue BottomPosition
CPM CostPerConverstion CostPerTransaction DailyBudget DefaultCount
DefaultValue FirstPageCpc KeywordDestUrlDisplay KeywordStatus
KeywordTypeDisplay LeadCount LeadValue MaxContentCPC MaximumCPC
MaximumCPM PageViewCount PageViewValue SalesCount SalesValue
TopPosition TotalConversionValue Transactions ValuePerClick
ValuePerCost CustomerName ExternalCustomerId )
],
'crossClient' => 1,
'campaigns' => [$cId]
};


#------------------------------
# Step 1. Define a job
#------------------------------
my $report_job = SOAP::Data->name('job' => $AWJobParams);
$report_job->attr({'xsi:type' => 'DefinedReportJob', 'xmlns' => "$GOOGLE_API"});

#------------------------------
# Output XML file after unzipped.
#------------------------------
my $output = "${cId}_kw_perf.xml";


#-------------------------------
# Catch any exception
#-------------------------------
eval {

#------------------------------
# Step 2. Validate report.
#------------------------------
$service->call('validateReportJob' => $report_job, @headers);
print TFH "debug: about to schedule report \n";

#------------------------------
# Step 3. Schedule report.
#------------------------------
my $job_id = $service->call('scheduleReportJob' => $report_job, @headers)->result();
$job_id = SOAP::Data->name('reportJobId' => $job_id);

#------------------------------
# Step 4. Get report status. Wait until finish or error.
#------------------------------
my $status = $service->call('getReportJobStatus' => $job_id, @headers)->result();
while ($status ne 'Completed' and $status ne 'Failed') {
print TFH "Report job status is $status\n";
sleep(30);
$status = $service->call('getReportJobStatus' => $job_id, @headers)->result();
}
if ($status eq 'Failed') {
die("Report job generation failed.\n");
}

#------------------------------
# Step 5. Get report URL to download the file.
#------------------------------
my $report_url = $service->call('getGzipReportDownloadUrl' => $job_id, @headers)->result();
print TFH "Report is available at $report_url\n" if $opts{d};

#------------------------------
# Step 6. Get the data file (report).
#------------------------------
my $gzip_report_data = LWP::Simple::get($report_url);


#------------------------------
# Step 6a. Save it to local file.
#------------------------------
my $localFile = "${cId}_kw_perf.xml.gz";
open(LFH, ">$localFile") or die "Could not open $localFile: $!\n";
print LFH $gzip_report_data . "\n";
close LFH;


#------------------------------
# Step 7. Unzip the file and build xml hash.
#------------------------------
IO::Uncompress::Gunzip::gunzip(\$gzip_report_data => $output);

my $xml_hash = XMLin($output);
print TFH Dumper($xml_hash) if $opts{d};

#------------------------------
# Step 8. Create database bulk load file.
#------------------------------
open (BLFH, ">${cId}_bulk_load_input.txt") or die "Unable to create bulk load file: $!\n";


#-- Uncomment the fields that are of interest to you.
foreach my $row (@{$xml_hash->{table}->{rows}->{row}}) {
print BLFH
$row->{keywordid},"\t", $row->{keyword},"\t",
$row->{campaignid},"\t",
# $row->{campaign},"\t",
$row->{campStatus},"\t", $row->{adgroupid},"\t",
# $row->{adgroup},"\t",
# $row->{agstatus},"\t",
# $row->{agMaxCpa} * $to_dollar_conv_factor,"\t",
# $row->{adwordsType}, $row->{keywordid}, $row->{keyword}, $row->{kwStatus},
# $row->{customerid}, $row->{acctname}, $row->{kwDestUrl},
$row->{imps},"\t", $row->{clicks},"\t", $row->{ctr},"\t",
# $row->{cpc} * $to_dollar_conv_factor, $row->{conv},
# $row->{convRate},
$row->{cost} * $to_dollar_conv_factor,"\t",
$row->{pos},"\t",
$row->{qualityScore},"\t",
# $row->{avgConvValue} * $to_dollar_conv_factor,
# $row->{bottomPosition}, $row->{cpm} * $to_dollar_conv_factor,
# $row->{costPerConv} * $to_dollar_conv_factor,
# $row->{cpt} * $to_dollar_conv_factor,
# $row->{budget} * $to_dollar_conv_factor, $row->{defaultConv},
# $row->{defaultConvValue} * $to_dollar_conv_factor,
# $row->{firstPageCpc} * $to_dollar_conv_factor,
$row->{kwDestUrl},"\t", $row->{kwType},
# $row->{leads}, $row->{leadValue} * $to_dollar_conv_factor,
# $row->{maxContentCpc} * $to_dollar_conv_factor,
# $row->{maxCpc} * $to_dollar_conv_factor,
# $row->{maxCpm} * $to_dollar_conv_factor, $row->{pageviews},
# $row->{pageviewValue} * $to_dollar_conv_factor,
# $row->{purchases},
# $row->{purchaseValue} * $to_dollar_conv_factor,
# $row->{topPosition},
# $row->{convValue} * $to_dollar_conv_factor,
# $row->{transactions}, $row->{convVpc},
# $row->{valueCostRatio},
"\n";
}
close(BLFH);
};

if ($@) {
print("Report job failed validation due to the following error: ", $@, "\n");
exit (-1);
}
}

close(TFH);
return 1;
}

Bulk loading into data warehouse staging and perform summarization.

#- Psuedo code
#- 1. Cleanup or perform any needed transformation of data in the application.
#- 2. Concatenate all the campaign keyword performance data to a file(s).
#- 3. Bulk load file into a staging table.
#- 4. Transform and summarize the data through SQL.

Enjoy,
Shiva

2 thoughts on “SEM – Google Adwords API Data Retriever

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s