Skip to content

Commit

Permalink
Adding modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
h3devin committed Apr 13, 2015
1 parent c8e8230 commit 9416967
Show file tree
Hide file tree
Showing 24 changed files with 1,022 additions and 640 deletions.
24 changes: 10 additions & 14 deletions ckanext/ga_report/command.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,17 @@ def command(self):


class GetAuthToken(CkanCommand):
""" Gets the auth token from Google and saves it as token.dat.
""" Get's the Google auth token
Usage: paster getauthtoken <credentials_file>
Where <credentials_file> is the file name containing the details
of your client authorized for your Google Analytics data
(known as credentials.json or client_secrets.json)
which is obtained from https://code.google.com/apis/console.
for the service (obtained from https://code.google.com/apis/console).
By default this is set to credentials.json
"""
summary = __doc__.split('\n')[0]
usage = __doc__
max_args = 1
max_args = 0
min_args = 0

def command(self):
Expand Down Expand Up @@ -84,7 +82,7 @@ def command(self):
log = logging.getLogger('ckanext.ga_report')

log.info("Updating 'All' records for old URLs")
post_update_url_stats(print_progress=True)
post_update_url_stats()
log.info("Processing complete")


Expand All @@ -107,17 +105,16 @@ class LoadAnalytics(CkanCommand):

def __init__(self, name):
super(LoadAnalytics, self).__init__(name)
self.stat_names = ('url', 'url-all', 'sitewide', 'social')
self.parser.add_option('-d', '--delete-first',
action='store_true',
default=False,
dest='delete_first',
help='Delete data for the period first')
self.parser.add_option('-s', '--stat',
metavar="STAT",
dest='stat',
help='Only calulcate a particular stat (or collection of stats)- one of: %s' %
'|'.join(self.stat_names))
self.parser.add_option('-s', '--skip_url_stats',
action='store_true',
default=False,
dest='skip_url_stats',
help='Skip the download of URL data - just do site-wide stats')
self.token = ""

def command(self):
Expand All @@ -142,8 +139,7 @@ def command(self):

downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc),
delete_first=self.options.delete_first,
stat=self.options.stat,
print_progress=True)
skip_url_stats=self.options.skip_url_stats)

time_period = self.args[0] if self.args else 'latest'
if time_period == 'all':
Expand Down
83 changes: 34 additions & 49 deletions ckanext/ga_report/controller.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,7 @@ def _get_unix_epoch(strdate):

def _month_details(cls, stat_key=None):
'''
Returns a list of all the periods for which we have data and the date we've
got data up to in the latest month.
e.g. ([(u'2014-11', 'November 2014'),
(u'2014-10', 'October 2014'),
(u'2014-09', 'September 2014')],
'27th')
i.e. we have 3 months up to 27th November
:param cls: GA_Stat or GA_Url
unfortunately
Returns a list of all the periods for which we have data, unfortunately
knows too much about the type of the cls being passed as GA_Url has a
more complex query
Expand All @@ -49,24 +38,21 @@ def _month_details(cls, stat_key=None):
months = []
day = None

q = model.Session.query(cls.period_name, cls.period_complete_day)\
.filter(cls.period_name!='All') \
.distinct(cls.period_name)
q = model.Session.query(cls.period_name,cls.period_complete_day)\
.filter(cls.period_name!='All').distinct(cls.period_name)
if stat_key:
q = q.filter(cls.stat_name==stat_key)
q= q.filter(cls.stat_name==stat_key)

vals = q.order_by("period_name desc").all()

# For the most recent month, add 'ordinal' to the day
# e.g. '27' -> day='27th'
if vals and vals[0][1]:
day = int(vals[0][1])
ordinal = 'th' if 11 <= day <= 13 \
else {1: 'st', 2: 'nd', 3: 'rd'}.get(day % 10, 'th')
else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th')
day = "{day}{ordinal}".format(day=day, ordinal=ordinal)

for m in vals:
months.append((m[0], _get_month_name(m[0])))
months.append( (m[0], _get_month_name(m[0])))

return months, day

Expand All @@ -93,6 +79,7 @@ def csv(self, month):
entry.key.encode('utf-8'),
entry.value.encode('utf-8')])


def index(self):

# Get the month details by fetching distinct values and determining the
Expand Down Expand Up @@ -257,7 +244,7 @@ def publisher_csv(self, month):
'''
c.month = month if not month == 'all' else ''
response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = str('attachment; filename=publishers_%s.csv' % (month,))
response.headers['Content-Disposition'] = str('attachment; filename=organizations_%s.csv' % (month,))

writer = csv.writer(response)
writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"])
Expand All @@ -282,7 +269,7 @@ def dataset_csv(self, id='all', month='all'):
if id != 'all':
c.publisher = model.Group.get(id)
if not c.publisher:
abort(404, 'A publisher with that name could not be found')
abort(404, 'An organization with that name could not be found')

packages = self._get_packages(publisher=c.publisher, month=c.month)
response.headers['Content-Type'] = "text/csv; charset=utf-8"
Expand Down Expand Up @@ -317,7 +304,7 @@ def publishers(self):
graph_data = _get_top_publishers_graph()
c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) )

x = render('ga_report/publisher/index.html')
x = render('ga_report/publisher/index.html')

return x

Expand All @@ -328,17 +315,9 @@ def _get_packages(self, publisher=None, month='', count=-1):
if month != 'All':
have_download_data = month >= DOWNLOADS_AVAILABLE_FROM

if have_download_data:
download_stats_query = model.Session.query(GA_Stat.key, func.sum(cast(GA_Stat.value, sqlalchemy.types.Integer)))
download_stats_query = download_stats_query.filter(GA_Stat.stat_name=='Downloads')
if month != 'All':
download_stats_query = download_stats_query.filter(GA_Stat.period_name==month)
download_stats_query = download_stats_query.group_by(GA_Stat.key).all()
download_stats = dict(download_stats_query)

q = model.Session.query(GA_Url,model.Package)\
.filter(model.Package.name==GA_Url.package_id)\
.filter(GA_Url.package_id != '')
.filter(GA_Url.url.like('/dataset/%'))
if publisher:
q = q.filter(GA_Url.department_id==publisher.name)
q = q.filter(GA_Url.period_name==month)
Expand All @@ -353,7 +332,14 @@ def _get_packages(self, publisher=None, month='', count=-1):
if package:
# Downloads ....
if have_download_data:
downloads = download_stats.get(package.name, 0)
dls = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name=='Downloads').\
filter(GA_Stat.key==package.name)
if month != 'All': # Fetch everything unless the month is specific
dls = dls.filter(GA_Stat.period_name==month)
downloads = 0
for x in dls:
downloads += int(x.value)
else:
downloads = 'No data'
top_packages.append((package, entry.pageviews, entry.visits, downloads))
Expand All @@ -373,14 +359,14 @@ def read_publisher(self, id):
Lists the most popular datasets for a publisher (or across all publishers)
'''
count = 20

c.publishers = _get_publishers()

id = request.params.get('publisher', id)
if id and id != 'all':
c.publisher = model.Group.get(id)
if not c.publisher:
abort(404, 'A publisher with that name could not be found')
abort(404, 'An organization with that name could not be found')
c.publisher_name = c.publisher.name
c.top_packages = [] # package, dataset_views in c.top_packages

Expand All @@ -398,7 +384,7 @@ def read_publisher(self, id):
month = c.month or 'All'
c.publisher_page_views = 0
q = model.Session.query(GA_Url).\
filter(GA_Url.url=='/publisher/%s' % c.publisher_name)
filter(GA_Url.url=='/organization/%s' % c.publisher_name)
entry = q.filter(GA_Url.period_name==c.month).first()
c.publisher_page_views = entry.pageviews if entry else 0

Expand Down Expand Up @@ -435,11 +421,11 @@ def _to_rickshaw(data, percentageMode=False):
x_axis = x_axis[:-1] # Remove latest month
totals = {}
for series in data:
series['data'] = []
series["data"] = []
for x_string in x_axis:
x = _get_unix_epoch( x_string )
y = series['raw'].get(x_string,0)
series['data'].append({'x':x,'y':y})
y = series["raw"].get(x_string,0)
series["data"].append({"x":x,"y":y})
totals[x] = totals.get(x,0)+y
if not percentageMode:
return data
Expand All @@ -449,26 +435,23 @@ def _to_rickshaw(data, percentageMode=False):
raw_data = data
data = []
for series in raw_data:
for point in series['data']:
try:
percentage = (100*float(point['y'])) / totals[point['x']]
except ZeroDivisionError:
percentage = 0
for point in series["data"]:
percentage = (100*float(point["y"])) / totals[point["x"]]
if not (series in data) and percentage>THRESHOLD:
data.append(series)
point['y'] = percentage
point["y"] = percentage
others = [ x for x in raw_data if not (x in data) ]
if len(others):
data_other = []
for i in range(len(x_axis)):
x = _get_unix_epoch(x_axis[i])
y = 0
for series in others:
y += series['data'][i]['y']
data_other.append({'x':x,'y':y})
y += series["data"][i]["y"]
data_other.append({"x":x,"y":y})
data.append({
'name':'Other',
'data': data_other
"name":"Other",
"data": data_other
})
return data

Expand All @@ -485,6 +468,7 @@ def _get_top_publishers(limit=20):
from ga_url
where department_id <> ''
and package_id <> ''
and url like '/dataset/%%'
and period_name=%s
group by department_id order by views desc
"""
Expand All @@ -511,6 +495,7 @@ def _get_top_publishers_graph(limit=20):
from ga_url
where department_id <> ''
and package_id <> ''
and url like '/dataset/%%'
and period_name='All'
group by department_id order by views desc
"""
Expand Down
Loading

0 comments on commit 9416967

Please sign in to comment.