diff --git a/ckanext/ga_report/command.py b/ckanext/ga_report/command.py old mode 100644 new mode 100755 index d9684cf..eff8c8e --- a/ckanext/ga_report/command.py +++ b/ckanext/ga_report/command.py @@ -31,19 +31,17 @@ def command(self): class GetAuthToken(CkanCommand): - """ Gets the auth token from Google and saves it as token.dat. + """ Get's the Google auth token Usage: paster getauthtoken Where is the file name containing the details - of your client authorized for your Google Analytics data - (known as credentials.json or client_secrets.json) - which is obtained from https://code.google.com/apis/console. + for the service (obtained from https://code.google.com/apis/console). By default this is set to credentials.json """ summary = __doc__.split('\n')[0] usage = __doc__ - max_args = 1 + max_args = 0 min_args = 0 def command(self): @@ -84,7 +82,7 @@ def command(self): log = logging.getLogger('ckanext.ga_report') log.info("Updating 'All' records for old URLs") - post_update_url_stats(print_progress=True) + post_update_url_stats() log.info("Processing complete") @@ -107,17 +105,16 @@ class LoadAnalytics(CkanCommand): def __init__(self, name): super(LoadAnalytics, self).__init__(name) - self.stat_names = ('url', 'url-all', 'sitewide', 'social') self.parser.add_option('-d', '--delete-first', action='store_true', default=False, dest='delete_first', help='Delete data for the period first') - self.parser.add_option('-s', '--stat', - metavar="STAT", - dest='stat', - help='Only calulcate a particular stat (or collection of stats)- one of: %s' % - '|'.join(self.stat_names)) + self.parser.add_option('-s', '--skip_url_stats', + action='store_true', + default=False, + dest='skip_url_stats', + help='Skip the download of URL data - just do site-wide stats') self.token = "" def command(self): @@ -142,8 +139,7 @@ def command(self): downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, - stat=self.options.stat, - print_progress=True) + skip_url_stats=self.options.skip_url_stats) time_period = self.args[0] if self.args else 'latest' if time_period == 'all': diff --git a/ckanext/ga_report/controller.py b/ckanext/ga_report/controller.py old mode 100644 new mode 100755 index 61ba1a4..f2928d2 --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -29,18 +29,7 @@ def _get_unix_epoch(strdate): def _month_details(cls, stat_key=None): ''' - Returns a list of all the periods for which we have data and the date we've - got data up to in the latest month. - - e.g. ([(u'2014-11', 'November 2014'), - (u'2014-10', 'October 2014'), - (u'2014-09', 'September 2014')], - '27th') - i.e. we have 3 months up to 27th November - - :param cls: GA_Stat or GA_Url - - unfortunately + Returns a list of all the periods for which we have data, unfortunately knows too much about the type of the cls being passed as GA_Url has a more complex query @@ -49,24 +38,21 @@ def _month_details(cls, stat_key=None): months = [] day = None - q = model.Session.query(cls.period_name, cls.period_complete_day)\ - .filter(cls.period_name!='All') \ - .distinct(cls.period_name) + q = model.Session.query(cls.period_name,cls.period_complete_day)\ + .filter(cls.period_name!='All').distinct(cls.period_name) if stat_key: - q = q.filter(cls.stat_name==stat_key) + q= q.filter(cls.stat_name==stat_key) vals = q.order_by("period_name desc").all() - # For the most recent month, add 'ordinal' to the day - # e.g. '27' -> day='27th' if vals and vals[0][1]: day = int(vals[0][1]) ordinal = 'th' if 11 <= day <= 13 \ - else {1: 'st', 2: 'nd', 3: 'rd'}.get(day % 10, 'th') + else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th') day = "{day}{ordinal}".format(day=day, ordinal=ordinal) for m in vals: - months.append((m[0], _get_month_name(m[0]))) + months.append( (m[0], _get_month_name(m[0]))) return months, day @@ -93,6 +79,7 @@ def csv(self, month): entry.key.encode('utf-8'), entry.value.encode('utf-8')]) + def index(self): # Get the month details by fetching distinct values and determining the @@ -257,7 +244,7 @@ def publisher_csv(self, month): ''' c.month = month if not month == 'all' else '' response.headers['Content-Type'] = "text/csv; charset=utf-8" - response.headers['Content-Disposition'] = str('attachment; filename=publishers_%s.csv' % (month,)) + response.headers['Content-Disposition'] = str('attachment; filename=organizations_%s.csv' % (month,)) writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) @@ -282,7 +269,7 @@ def dataset_csv(self, id='all', month='all'): if id != 'all': c.publisher = model.Group.get(id) if not c.publisher: - abort(404, 'A publisher with that name could not be found') + abort(404, 'An organization with that name could not be found') packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" @@ -317,7 +304,7 @@ def publishers(self): graph_data = _get_top_publishers_graph() c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) - x = render('ga_report/publisher/index.html') + x = render('ga_report/publisher/index.html') return x @@ -328,17 +315,9 @@ def _get_packages(self, publisher=None, month='', count=-1): if month != 'All': have_download_data = month >= DOWNLOADS_AVAILABLE_FROM - if have_download_data: - download_stats_query = model.Session.query(GA_Stat.key, func.sum(cast(GA_Stat.value, sqlalchemy.types.Integer))) - download_stats_query = download_stats_query.filter(GA_Stat.stat_name=='Downloads') - if month != 'All': - download_stats_query = download_stats_query.filter(GA_Stat.period_name==month) - download_stats_query = download_stats_query.group_by(GA_Stat.key).all() - download_stats = dict(download_stats_query) - q = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ - .filter(GA_Url.package_id != '') + .filter(GA_Url.url.like('/dataset/%')) if publisher: q = q.filter(GA_Url.department_id==publisher.name) q = q.filter(GA_Url.period_name==month) @@ -353,7 +332,14 @@ def _get_packages(self, publisher=None, month='', count=-1): if package: # Downloads .... if have_download_data: - downloads = download_stats.get(package.name, 0) + dls = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name=='Downloads').\ + filter(GA_Stat.key==package.name) + if month != 'All': # Fetch everything unless the month is specific + dls = dls.filter(GA_Stat.period_name==month) + downloads = 0 + for x in dls: + downloads += int(x.value) else: downloads = 'No data' top_packages.append((package, entry.pageviews, entry.visits, downloads)) @@ -373,14 +359,14 @@ def read_publisher(self, id): Lists the most popular datasets for a publisher (or across all publishers) ''' count = 20 - + c.publishers = _get_publishers() id = request.params.get('publisher', id) if id and id != 'all': c.publisher = model.Group.get(id) if not c.publisher: - abort(404, 'A publisher with that name could not be found') + abort(404, 'An organization with that name could not be found') c.publisher_name = c.publisher.name c.top_packages = [] # package, dataset_views in c.top_packages @@ -398,7 +384,7 @@ def read_publisher(self, id): month = c.month or 'All' c.publisher_page_views = 0 q = model.Session.query(GA_Url).\ - filter(GA_Url.url=='/publisher/%s' % c.publisher_name) + filter(GA_Url.url=='/organization/%s' % c.publisher_name) entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 @@ -435,11 +421,11 @@ def _to_rickshaw(data, percentageMode=False): x_axis = x_axis[:-1] # Remove latest month totals = {} for series in data: - series['data'] = [] + series["data"] = [] for x_string in x_axis: x = _get_unix_epoch( x_string ) - y = series['raw'].get(x_string,0) - series['data'].append({'x':x,'y':y}) + y = series["raw"].get(x_string,0) + series["data"].append({"x":x,"y":y}) totals[x] = totals.get(x,0)+y if not percentageMode: return data @@ -449,14 +435,11 @@ def _to_rickshaw(data, percentageMode=False): raw_data = data data = [] for series in raw_data: - for point in series['data']: - try: - percentage = (100*float(point['y'])) / totals[point['x']] - except ZeroDivisionError: - percentage = 0 + for point in series["data"]: + percentage = (100*float(point["y"])) / totals[point["x"]] if not (series in data) and percentage>THRESHOLD: data.append(series) - point['y'] = percentage + point["y"] = percentage others = [ x for x in raw_data if not (x in data) ] if len(others): data_other = [] @@ -464,11 +447,11 @@ def _to_rickshaw(data, percentageMode=False): x = _get_unix_epoch(x_axis[i]) y = 0 for series in others: - y += series['data'][i]['y'] - data_other.append({'x':x,'y':y}) + y += series["data"][i]["y"] + data_other.append({"x":x,"y":y}) data.append({ - 'name':'Other', - 'data': data_other + "name":"Other", + "data": data_other }) return data @@ -485,6 +468,7 @@ def _get_top_publishers(limit=20): from ga_url where department_id <> '' and package_id <> '' + and url like '/dataset/%%' and period_name=%s group by department_id order by views desc """ @@ -511,6 +495,7 @@ def _get_top_publishers_graph(limit=20): from ga_url where department_id <> '' and package_id <> '' + and url like '/dataset/%%' and period_name='All' group by department_id order by views desc """ diff --git a/ckanext/ga_report/download_analytics.py b/ckanext/ga_report/download_analytics.py old mode 100644 new mode 100755 index 99db79d..7a36ab8 --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -1,39 +1,41 @@ import os +import logging import datetime +import httplib import collections import requests -import time -import re - +import json from pylons import config +from ga_model import _normalize_url import ga_model -log = __import__('logging').getLogger(__name__) +#from ga_client import GA + +log = logging.getLogger('ckanext.ga-report') FORMAT_MONTH = '%Y-%m' MIN_VIEWS = 50 MIN_VISITS = 20 - +MIN_DOWNLOADS = 1 class DownloadAnalytics(object): '''Downloads and stores analytics info''' def __init__(self, service=None, token=None, profile_id=None, delete_first=False, - stat=None, print_progress=False): + skip_url_stats=False): self.period = config['ga-report.period'] self.service = service self.profile_id = profile_id self.delete_first = delete_first - self.stat = stat + self.skip_url_stats = skip_url_stats self.token = token - self.print_progress = print_progress def specific_month(self, date): import calendar first_of_this_month = datetime.datetime(date.year, date.month, 1) _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month)) - last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) + last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) # if this is the latest month, note that it is only up until today now = datetime.datetime.now() if now.year == date.year and now.month == date.month: @@ -44,6 +46,7 @@ def specific_month(self, date): first_of_this_month, last_of_this_month),) self.download_and_store(periods) + def latest(self): if self.period == 'monthly': # from first of this month to today @@ -61,6 +64,7 @@ def for_date(self, for_date): assert isinstance(for_date, datetime.datetime) periods = [] # (period_name, period_complete_day, start_date, end_date) if self.period == 'monthly': + first_of_the_months_until_now = [] year = for_date.year month = for_date.month now = datetime.datetime.now() @@ -97,6 +101,7 @@ def get_full_period_name(period_name, period_complete_day): else: return period_name + def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info('Period "%s" (%s - %s)', @@ -109,59 +114,39 @@ def download_and_store(self, periods): period_name) ga_model.delete(period_name) - if self.stat in (None, 'url'): + if not self.skip_url_stats: # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') - path_prefix = '~' # i.e. it is a regex - # Possibly there is a domain in the path. - # I'm not sure why, but on the data.gov.uk property we see - # the domain gets added to the GA path. e.g. - # '/data.gov.uk/data/search' - # '/co-prod2.dh.bytemark.co.uk/apps/test-app' - # but on other properties we don't. e.g. - # '/data/search' - path_prefix += '(/%s)?' % accountName - log.info('Downloading analytics for dataset views') - data = self.download(start_date, end_date, - path_prefix + '/dataset/[a-z0-9-_]+') - + #data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') + log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) - log.info('Downloading analytics for publisher views') - data = self.download(start_date, end_date, - path_prefix + '/publisher/[a-z0-9-_]+') + log.info('Downloading analytics for organization views') + #data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~/organization/[a-z0-9-_]+') - log.info('Storing publisher views (%i rows)', len(data.get('url'))) + #log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) - # Create the All records + # Make sure the All records are correct. ga_model.post_update_url_stats() - log.info('Associating datasets with their publisher') + log.info('Associating datasets with their organization') ga_model.update_publisher_stats(period_name) # about 30 seconds. - if self.stat == 'url-all': - # This stat is split off just for test purposes - ga_model.post_update_url_stats() - - if self.stat in (None, 'sitewide'): - # Clean out old ga_stats data before storing the new - ga_model.pre_update_sitewide_stats(period_name) - log.info('Downloading and storing analytics for site-wide stats') - self.sitewide_stats(period_name, period_complete_day) + log.info('Downloading and storing analytics for site-wide stats') + self.sitewide_stats( period_name, period_complete_day ) - if self.stat in (None, 'social'): - # Clean out old ga_stats data before storing the new - ga_model.pre_update_social_stats(period_name) + log.info('Downloading and storing analytics for social networks') + self.update_social_info(period_name, start_date, end_date) - log.info('Downloading and storing analytics for social networks') - self.update_social_info(period_name, start_date, end_date) def update_social_info(self, period_name, start_date, end_date): start_date = start_date.strftime('%Y-%m-%d') @@ -171,35 +156,42 @@ def update_social_info(self, period_name, start_date, end_date): sort = '-ga:entrances' try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = dict(ids='ga:' + self.profile_id, - filters=query, - metrics=metrics, - sort=sort, - dimensions="ga:landingPagePath,ga:socialNetwork", - max_results=10000) + filters=query, + metrics=metrics, + sort=sort, + dimensions="ga:landingPagePath,ga:socialNetwork", + max_results=10000) args['start-date'] = start_date args['end-date'] = end_date - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) + data = collections.defaultdict(list) - rows = results.get('rows') + rows = results.get('rows',[]) for row in rows: - url = strip_off_host_prefix(row[0]) - data[url].append((row[1], int(row[2]),)) + #url = _normalize_url('http:/' + row[0]) + url = row[0] + data[url].append( (row[1], int(row[2]),) ) ga_model.update_social(period_name, data) + def download(self, start_date, end_date, path=None): - '''Get views & visits data for particular paths & time period from GA - ''' + '''Get data from GA for a given time period''' start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') query = 'ga:pagePath=%s$' % path metrics = 'ga:pageviews, ga:visits' + sort = '-ga:pageviews' # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference @@ -215,19 +207,28 @@ def download(self, start_date, end_date, path=None): args["filters"] = query args["alt"] = "json" - results = self._get_ga_data(args) - + results = self._get_json(args) + except Exception, e: log.exception(e) return dict(url=[]) - + log.info("profile id:") + log.info(self.profile_id) + log.info(results) + log.info(self.profile_id) + packages = [] - log.info('There are %d results', results['totalResults']) + log.info("There are %d results" % results['totalResults']) + if 'rows' not in results : + return dict(url=packages) for entry in results.get('rows'): - (path, pageviews, visits) = entry - url = strip_off_host_prefix(path) # strips off domain e.g. www.data.gov.uk or data.gov.uk + (loc,pageviews,visits) = entry + + #url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk + url = loc - if not url.startswith('/dataset/') and not url.startswith('/publisher/'): + + if not url.startswith('/dataset/') and not url.startswith('/organization/'): # filter out strays like: # /data/user/login?came_from=http://data.gov.uk/dataset/os-code-point-open # /403.html?page=/about&from=http://data.gov.uk/publisher/planning-inspectorate @@ -237,8 +238,7 @@ def download(self, start_date, end_date, path=None): def store(self, period_name, period_complete_day, data): if 'url' in data: - ga_model.update_url_stats(period_name, period_complete_day, data['url'], - print_progress=self.print_progress) + ga_model.update_url_stats(period_name, period_complete_day, data['url']) def sitewide_stats(self, period_name, period_complete_day): import calendar @@ -248,99 +248,48 @@ def sitewide_stats(self, period_name, period_complete_day): start_date = '%s-01' % period_name end_date = '%s-%s' % (period_name, last_day_of_month) funcs = ['_totals_stats', '_social_stats', '_os_stats', - '_locale_stats', '_browser_stats', '_mobile_stats', - '_download_stats' - ] + '_locale_stats', '_browser_stats', '_mobile_stats', '_download_stats'] for f in funcs: log.info('Downloading analytics for %s' % f.split('_')[1]) getattr(self, f)(start_date, end_date, period_name, period_complete_day) - def _get_results(result_data, f): + def _get_results(self, result_data, f): data = {} for result in result_data: key = f(result) data[key] = data.get(key,0) + result[1] return data - def _get_ga_data(self, params): - '''Returns the GA data specified in params. - Does all requests to the GA API and retries if needed. - - Returns a dict with the data, or dict(url=[]) if unsuccessful. - ''' - try: - data = self._get_ga_data_simple(params) - except DownloadError: - log.info('Will retry requests after a pause') - time.sleep(300) - try: - data = self._get_ga_data_simple(params) - except DownloadError: - return dict(url=[]) - except Exception, e: - log.exception(e) - log.error('Uncaught exception in get_ga_data_simple (see ' - 'above)') - return dict(url=[]) - except Exception, e: - log.exception(e) - log.error('Uncaught exception in get_ga_data_simple (see above)') - return dict(url=[]) - return data - - def _get_ga_data_simple(self, params): - '''Returns the GA data specified in params. - Does all requests to the GA API. - - Returns a dict with the data, or raises DownloadError if unsuccessful. - ''' - ga_token_filepath = os.path.expanduser( - config.get('googleanalytics.token.filepath', '')) + def _get_json(self, params, prev_fail=False): + ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) if not ga_token_filepath: - log.error('In the CKAN config you need to specify the filepath ' - 'of the Google Analytics token file under key: ' - 'googleanalytics.token.filepath') + print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ + 'Google Analytics token file under key: googleanalytics.token.filepath' return + log.info("Trying to refresh our OAuth token") try: from ga_auth import init_service self.token, svc = init_service(ga_token_filepath, None) + log.info("OAuth token refreshed") except Exception, auth_exception: - log.error('OAuth refresh failed') + log.error("Oauth refresh failed") log.exception(auth_exception) - return dict(url=[]) - - headers = {'authorization': 'Bearer ' + self.token} - response = self._do_ga_request(params, headers) - # allow any exceptions to bubble up - - data_dict = response.json() - - # If there are 0 results then the rows are missed off, so add it in - if 'rows' not in data_dict: - data_dict['rows'] = [] - return data_dict - - @classmethod - def _do_ga_request(cls, params, headers): - '''Makes a request to GA. Assumes the token init request is already done. + return - Returns the response (requests object). - On error it logs it and raises DownloadError. - ''' - # Because of issues of invalid responses when using the ga library, we - # are going to make these requests ourselves. - ga_url = 'https://www.googleapis.com/analytics/v3/data/ga' try: - response = requests.get(ga_url, params=params, headers=headers) - except requests.exceptions.RequestException, e: - log.error("Exception getting GA data: %s" % e) - raise DownloadError() - if response.status_code != 200: - log.error("Error getting GA data: %s %s" % (response.status_code, - response.content)) - raise DownloadError() - return response + headers = {'authorization': 'Bearer ' + self.token} + r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=params, headers=headers) + if r.status_code != 200: + log.info("STATUS: %s" % (r.status_code,)) + log.info("CONTENT: %s" % (r.content,)) + raise Exception("Request with params: %s failed" % params) + + return json.loads(r.content) + except Exception, e: + log.exception(e) + + return dict(url=[]) def _totals_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches distinct totals, total pageviews etc """ @@ -355,7 +304,7 @@ def _totals_stats(self, start_date, end_date, period_name, period_complete_day): args["sort"] = "-ga:pageviews" args["alt"] = "json" - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) @@ -365,6 +314,10 @@ def _totals_stats(self, start_date, end_date, period_name, period_complete_day): period_complete_day) try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = {} args["max-results"] = 100000 args["start-date"] = start_date @@ -374,7 +327,7 @@ def _totals_stats(self, start_date, end_date, period_name, period_complete_day): args["metrics"] = "ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits" args["alt"] = "json" - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) @@ -391,19 +344,25 @@ def _totals_stats(self, start_date, end_date, period_name, period_complete_day): # Bounces from / or another configurable page. path = '/%s%s' % (config.get('googleanalytics.account'), config.get('ga-report.bounce_url', '/')) +# path = '/' try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = {} args["max-results"] = 100000 args["start-date"] = start_date args["end-date"] = end_date args["ids"] = "ga:" + self.profile_id - args["filters"] = 'ga:pagePath==%s' % path + + args["filters"] = 'ga:pagePath==%s' % (path,) args["dimensions"] = 'ga:pagePath' args["metrics"] = "ga:visitBounceRate" args["alt"] = "json" - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) @@ -425,6 +384,10 @@ def _locale_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches stats about language and country """ try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = {} args["max-results"] = 100000 args["start-date"] = start_date @@ -436,7 +399,7 @@ def _locale_stats(self, start_date, end_date, period_name, period_complete_day): args["sort"] = "-ga:pageviews" args["alt"] = "json" - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) @@ -457,11 +420,15 @@ def _locale_stats(self, start_date, end_date, period_name, period_complete_day): def _download_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches stats about data downloads """ + import ckan.model as model data = {} - identifier = ga_model.Identifier() try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = {} args["max-results"] = 100000 args["start-date"] = start_date @@ -469,65 +436,93 @@ def _download_stats(self, start_date, end_date, period_name, period_complete_day args["ids"] = "ga:" + self.profile_id args["filters"] = 'ga:eventAction==download' - args["dimensions"] = "ga:pagePath" + args["dimensions"] = "ga:eventLabel" args["metrics"] = "ga:totalEvents" - args["sort"] = "-ga:totalEvents" args["alt"] = "json" - - results = self._get_ga_data(args) + + + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) result_data = results.get('rows') + import pprint + print "Download data: " + pprint.pprint(result_data) if not result_data: # We may not have data for this time period, so we need to bail # early. log.info("There is no download data for this time period") return - def process_result_data(result_data): + def process_result_data(result_data, cached=False): + if not result_data : return + progress_total = len(result_data) + progress_count = 0 resources_not_matched = [] for result in result_data: - page_path, total_events = result - #e.g. page=u'/data.gov.uk/dataset/road-accidents-safety-data' - page_path = strip_off_host_prefix(page_path) # strips off domain + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + + url = result[0].strip() + # Get package id associated with the resource that has this URL. - package_name = identifier.get_package(page_path) + q = model.Session.query(model.Resource) + if cached: + print "Cached download." + r = q.filter(model.Resource.cache_url.like("%s%%" % url)).first() + else: + r = q.filter(model.Resource.url.like("%s%%" % url)).first() + + package_name = r.resource_group.package.name if r else "" + print 'Package_name for resoure : ', package_name if package_name: - data[package_name] = data.get(package_name, 0) + int(total_events) + data[package_name] = data.get(package_name, 0) + int(result[1]) else: - resources_not_matched.append(page_path) + resources_not_matched.append(url) continue if resources_not_matched: - log.debug('Could not match %i of %i resource URLs to datasets. e.g. %r', - len(resources_not_matched), len(result_data), resources_not_matched[:3]) + log.debug('Could not match %i or %i resource URLs to datasets. e.g. %r', + len(resources_not_matched), progress_total, resources_not_matched[:3]) log.info('Associating downloads of resource URLs with their respective datasets') process_result_data(results.get('rows')) try: - args['filters'] = 'ga:eventAction==download-cache' + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + filters='ga:eventAction==download-cache', + metrics='ga:totalEvents', + sort='-ga:totalEvents', + dimensions="ga:eventLabel", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) - result_data = results.get('rows') - if not result_data: - # We may not have data for this time period, so we need to bail - # early. - log.info("There is no cached download data for this time period") - return - log.info('Associating cached downloads of resource URLs with their respective datasets') - process_result_data(results.get('rows')) + log.info('Associating downloads of cache resource URLs with their respective datasets') + process_result_data(results.get('rows'), cached=False) + + self._filter_out_long_tail(data, MIN_DOWNLOADS) ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day) def _social_stats(self, start_date, end_date, period_name, period_complete_day): """ Finds out which social sites people are referred from """ try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = dict( ids='ga:' + self.profile_id, metrics='ga:pageviews', sort='-ga:pageviews', @@ -536,7 +531,7 @@ def _social_stats(self, start_date, end_date, period_name, period_complete_day): args['start-date'] = start_date args['end-date'] = end_date - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) @@ -553,6 +548,10 @@ def _social_stats(self, start_date, end_date, period_name, period_complete_day): def _os_stats(self, start_date, end_date, period_name, period_complete_day): """ Operating system stats """ try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = dict( ids='ga:' + self.profile_id, metrics='ga:pageviews', sort='-ga:pageviews', @@ -561,7 +560,7 @@ def _os_stats(self, start_date, end_date, period_name, period_complete_day): args['start-date'] = start_date args['end-date'] = end_date - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) @@ -585,6 +584,10 @@ def _browser_stats(self, start_date, end_date, period_name, period_complete_day) """ Information about browsers and browser versions """ try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = dict( ids='ga:' + self.profile_id, metrics='ga:pageviews', sort='-ga:pageviews', @@ -594,7 +597,7 @@ def _browser_stats(self, start_date, end_date, period_name, period_complete_day) args['start-date'] = start_date args['end-date'] = end_date - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) @@ -642,6 +645,10 @@ def _mobile_stats(self, start_date, end_date, period_name, period_complete_day): """ Info about mobile devices """ try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + args = dict( ids='ga:' + self.profile_id, metrics='ga:pageviews', sort='-ga:pageviews', @@ -650,13 +657,16 @@ def _mobile_stats(self, start_date, end_date, period_name, period_complete_day): args['start-date'] = start_date args['end-date'] = end_date - results = self._get_ga_data(args) + results = self._get_json(args) except Exception, e: log.exception(e) results = dict(url=[]) result_data = results.get('rows') + + if not result_data : return + data = {} for result in result_data: data[result[0]] = data.get(result[0], 0) + int(result[2]) @@ -679,29 +689,3 @@ def _filter_out_long_tail(cls, data, threshold=10): for key, value in data.items(): if value < threshold: del data[key] - -global host_re -host_re = None - - -def strip_off_host_prefix(url): - '''Strip off the hostname that gets prefixed to the GA Path on data.gov.uk - UA-1 but not on others. - - >>> strip_off_host_prefix('/data.gov.uk/dataset/weekly_fuel_prices') - '/dataset/weekly_fuel_prices' - >>> strip_off_host_prefix('/dataset/weekly_fuel_prices') - '/dataset/weekly_fuel_prices' - ''' - global host_re - if not host_re: - host_re = re.compile('^\/[^\/]+\.') - # look for a dot in the first part of the path - if host_re.search(url): - # there is a dot, so must be a host name - strip it off - return '/' + '/'.join(url.split('/')[2:]) - return url - - -class DownloadError(Exception): - pass diff --git a/ckanext/ga_report/fanstatic/ga_index.js b/ckanext/ga_report/fanstatic/ga_index.js new file mode 100644 index 0000000..c454140 --- /dev/null +++ b/ckanext/ga_report/fanstatic/ga_index.js @@ -0,0 +1,14 @@ + $('a[data-toggle="dropdown"]').click(function(){ + $("#graph-legend-container").hide(); + }); + +$("a[href=#totals]").click(function(e) { + $("#graph-legend-container").hide(); + }); + +$(function() { + $("#graph-legend-container").hide(); + CKAN.GA_Reports.bind_sparklines(); + CKAN.GA_Reports.bind_sidebar(); + CKAN.GA_Reports.bind_month_selector(); +}); diff --git a/ckanext/ga_report/fanstatic/ga_org_index.js b/ckanext/ga_report/fanstatic/ga_org_index.js new file mode 100644 index 0000000..4f1756f --- /dev/null +++ b/ckanext/ga_report/fanstatic/ga_org_index.js @@ -0,0 +1,3 @@ + $(function() { + CKAN.GA_Reports.bind_month_selector(); + }); diff --git a/ckanext/ga_report/ga_auth.py b/ckanext/ga_report/ga_auth.py old mode 100644 new mode 100755 index 54ed975..d4ea48a --- a/ckanext/ga_report/ga_auth.py +++ b/ckanext/ga_report/ga_auth.py @@ -7,8 +7,6 @@ from pylons import config -log = __import__('logging').getLogger(__name__) - def _prepare_credentials(token_filename, credentials_filename): """ @@ -20,7 +18,8 @@ def _prepare_credentials(token_filename, credentials_filename): if credentials is None or credentials.invalid: flow = flow_from_clientsecrets(credentials_filename, - scope='https://www.googleapis.com/auth/analytics.readonly') + scope='https://www.googleapis.com/auth/analytics.readonly', + message="Can't find the credentials file") credentials = run(flow, storage) return credentials @@ -31,61 +30,44 @@ def init_service(token_file, credentials_file): Given a file containing the user's oauth token (and another with credentials in case we need to generate the token) will return a service object representing the analytics API. - - On error, GA appears to raise TypeError. """ http = httplib2.Http() credentials = _prepare_credentials(token_file, credentials_file) http = credentials.authorize(http) # authorize the http object - service = credentials.access_token, build('analytics', 'v3', http=http) - return service + return credentials.access_token, build('analytics', 'v3', http=http) def get_profile_id(service): """ - Returns the GA Profile ID (a number), which is derived from the GA Property - ID (e.g. 'UA-10855508-6'), as specified by configured googleananalyics.id. - It also checks that that Property ID exists for the configured - googleanalytics.account and is accessible with the OAuth token. + Get the profile ID for this user and the service specified by the + 'googleanalytics.id' configuration option. This function iterates + over all of the accounts available to the user who invoked the + service to find one where the account name matches (in case the + user has several). """ - # Get list of GA Accounts available to the GA user represented by the OAuth - # token accounts = service.management().accounts().list().execute() + if not accounts.get('items'): - log.error('No GA accounts are associated with the GA user (OAuth token)') return None - # Check the config of the GA Account (googleanalytics.account) accountName = config.get('googleanalytics.account') if not accountName: raise Exception('googleanalytics.account needs to be configured') - accounts_by_name = dict([(acc.get('name'), acc.get('id')) - for acc in accounts.get('items', [])]) - if accountName not in accounts_by_name: - log.error('The specified GA account is not available. Configure googleanalytics.account to one of: %r', accounts_by_name.keys()) - return None - accountId = accounts_by_name[accountName] # e.g. accountId='10855508' - - # Check the config of the GA Property ID (googleanalyics.id) - webproperties = service.management().webproperties().list(accountId=accountId).execute() - property_ids = [prop.get('id') for prop in webproperties.get('items', [])] webPropertyId = config.get('googleanalytics.id') if not webPropertyId: raise Exception('googleanalytics.id needs to be configured') - if webPropertyId not in property_ids: - log.error('The specified GA Property is not available. Configure googleanalytics.id to one of: %r', property_ids.keys()) - return None + for acc in accounts.get('items'): + if acc.get('name') == accountName: + accountId = acc.get('id') + + webproperties = service.management().webproperties().list(accountId=accountId).execute() - # Convert the GA Property ID to GA's internal number "Profile ID" profiles = service.management().profiles().list( accountId=accountId, webPropertyId=webPropertyId).execute() - if not profiles.get('items'): - log.error('The specified GA Property ID does not appear to have an internal profile.Check config of googleanalytics.id') - return None - profileId = profiles['items'][0]['id'] - log.debug('GA Property %s has GA Profile id: %s', webPropertyId, profileId) - return profileId + if profiles.get('items'): + return profiles.get('items')[0].get('id') + return None diff --git a/ckanext/ga_report/ga_model.py b/ckanext/ga_report/ga_model.py old mode 100644 new mode 100755 index 7e56f1a..64b70e9 --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -1,15 +1,14 @@ import re import uuid -from sqlalchemy import Table, Column, MetaData +from sqlalchemy import Table, Column, MetaData, ForeignKey from sqlalchemy import types -from sqlalchemy.orm import mapper -from sqlalchemy.sql.expression import cast +from sqlalchemy.sql import select +from sqlalchemy.orm import mapper, relation from sqlalchemy import func import ckan.model as model - -from lib import GaProgressBar +from ckan.lib.base import * log = __import__('logging').getLogger(__name__) @@ -109,45 +108,33 @@ def get_table(name): return cached_tables[name] -class Identifier: - def __init__(self): - Identifier.dataset_re = re.compile('/dataset/([^/]+)(/.*)?') - Identifier.publisher_re = re.compile('/publisher/([^/]+)(/.*)?') +def _normalize_url(url): + '''Strip off the hostname etc. Do this before storing it. - def get_package(self, url): - # e.g. /dataset/fuel_prices - # e.g. /dataset/fuel_prices/resource/e63380d4 - dataset_match = Identifier.dataset_re.match(url) - if dataset_match: - dataset_ref = dataset_match.groups()[0] - else: - dataset_ref = None - return dataset_ref - - def get_package_and_publisher(self, url): - # Example urls: - # /dataset/fuel_prices - # /dataset/d7fc8964-e9da-42ab-8385-cbac70479f4b - # /dataset/fuel_prices/resource/e63380d4 - dataset_match = Identifier.dataset_re.match(url) - if dataset_match: - dataset_ref = dataset_match.groups()[0] - dataset = model.Package.get(dataset_ref) - if dataset: - if hasattr(dataset, 'owner_org'): - # CKAN 2+ - org = model.Group.get(dataset.owner_org) - org_name = org.name if org else None - else: - publisher_groups = dataset.get_groups('organization') - org_name = publisher_groups[0].name if publisher_groups else None - return dataset.name, org_name - return dataset_ref, None - else: - publisher_match = Identifier.publisher_re.match(url) - if publisher_match: - return None, publisher_match.groups()[0] - return None, None + >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') + '/dataset/weekly_fuel_prices' + ''' + #return '/' + '/'.join(url.split('/')[3:]) + return url + + +def _get_package_and_publisher(url): + # e.g. /dataset/fuel_prices + # e.g. /dataset/fuel_prices/resource/e63380d4 + dataset_match = re.match('/dataset/([^/]+)(/.*)?', url) + if dataset_match: + dataset_ref = dataset_match.groups()[0] + dataset = model.Package.get(dataset_ref) + if dataset: + publisher_groups = dataset.get_groups('organization') + if publisher_groups: + return dataset_ref,publisher_groups[0].name + return dataset_ref, None + else: + publisher_match = re.match('/organization/([^/]+)(/.*)?', url) + if publisher_match: + return None, publisher_match.groups()[0] + return None, None def update_sitewide_stats(period_name, stat_name, data, period_complete_day): for k,v in data.iteritems(): @@ -177,7 +164,12 @@ def update_sitewide_stats(period_name, stat_name, data, period_complete_day): def pre_update_url_stats(period_name): q = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name) - log.debug("Deleting %d '%s' URL records" % (q.count(), period_name)) + log.debug("Deleting %d '%s' records" % (q.count(), period_name)) + q.delete() + + q = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == 'All') + log.debug("Deleting %d 'All' records..." % q.count()) q.delete() model.Session.flush() @@ -185,7 +177,6 @@ def pre_update_url_stats(period_name): model.repo.commit_and_remove() log.debug('...done') - def post_update_url_stats(): """ Check the distinct url field in ga_url and make sure @@ -195,70 +186,43 @@ def post_update_url_stats(): record regardless of whether the URL has an entry for the month being currently processed. """ - q = model.Session.query(GA_Url).\ - filter_by(period_name='All') - log.debug("Deleting %d 'All' URL records..." % q.count()) - q.delete() - - # For dataset URLs: - # Calculate the total views/visits for All months - log.debug('Calculating Dataset "All" records') - query = '''select package_id, sum(pageviews::int), sum(visits::int) + log.debug('Post-processing "All" records...') + query = """select url, pageviews::int, visits::int from ga_url - where package_id != '' - group by package_id - order by sum(pageviews::int) desc - ''' - res = model.Session.execute(query).fetchall() - # Now get the link between dataset and org as the previous - # query doesn't return that - package_to_org = \ - model.Session.query(GA_Url.package_id, GA_Url.department_id)\ - .filter(GA_Url.package_id != None)\ - .group_by(GA_Url.package_id, GA_Url.department_id)\ - .all() - package_to_org = dict(package_to_org) - for package_id, views, visits in res: - values = {'id': make_uuid(), - 'period_name': "All", - 'period_complete_day': 0, - 'url': '', - 'pageviews': views, - 'visits': visits, - 'department_id': package_to_org.get(package_id, ''), - 'package_id': package_id - } - model.Session.add(GA_Url(**values)) + where url not in (select url from ga_url where period_name ='All')""" + connection = model.Session.connection() + res = connection.execute(query) - # For non-dataset URLs: - # Calculate the total views/visits for All months - log.debug('Calculating URL "All" records...') - query = '''select url, sum(pageviews::int), sum(visits::int) - from ga_url - where package_id = '' - group by url - order by sum(pageviews::int) desc - ''' - res = model.Session.execute(query).fetchall() + views, visits = {}, {} + # url, views, visits + for row in res: + views[row[0]] = views.get(row[0], 0) + row[1] + visits[row[0]] = visits.get(row[0], 0) + row[2] + + progress_total = len(views.keys()) + progress_count = 0 + for key in views.keys(): + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + + package, publisher = _get_package_and_publisher(key) - for url, views, visits in res: values = {'id': make_uuid(), 'period_name': "All", 'period_complete_day': 0, - 'url': url, - 'pageviews': views, - 'visits': visits, - 'department_id': '', - 'package_id': '' + 'url': key, + 'pageviews': views[key], + 'visits': visits[key], + 'department_id': publisher, + 'package_id': package } model.Session.add(GA_Url(**values)) model.Session.commit() - - log.debug('Done URL "All" records') + log.debug('..done') -def update_url_stats(period_name, period_complete_day, url_data, - print_progress=False): +def update_url_stats(period_name, period_complete_day, url_data): ''' Given a list of urls and number of hits for each during a given period, stores them in GA_Url under the period and recalculates the totals for @@ -266,26 +230,19 @@ def update_url_stats(period_name, period_complete_day, url_data, ''' progress_total = len(url_data) progress_count = 0 - if print_progress: - progress_bar = GaProgressBar(progress_total) - urls_in_ga_url_this_period = set( - result[0] for result in model.Session.query(GA_Url.url) - .filter(GA_Url.period_name==period_name) - .all()) - identifier = Identifier() for url, views, visits in url_data: progress_count += 1 - if print_progress: - progress_bar.update(progress_count) + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) - package, publisher = identifier.get_package_and_publisher(url) + package, publisher = _get_package_and_publisher(url) - if url in urls_in_ga_url_this_period: - item = model.Session.query(GA_Url).\ - filter(GA_Url.period_name==period_name).\ - filter(GA_Url.url==url).first() - item.pageviews = int(item.pageviews or 0) + int(views or 0) - item.visits = int(item.visits or 0) + int(visits or 0) + item = model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name).\ + filter(GA_Url.url==url).first() + if item: + item.pageviews = item.pageviews + views + item.visits = item.visits + visits if not item.package_id: item.package_id = package if not item.department_id: @@ -300,61 +257,42 @@ def update_url_stats(period_name, period_complete_day, url_data, 'visits': visits, 'department_id': publisher, 'package_id': package - } + } model.Session.add(GA_Url(**values)) - urls_in_ga_url_this_period.add(url) model.Session.commit() if package: - counts = \ - model.Session.query(func.sum(cast(GA_Url.pageviews, - types.INTEGER)), - func.sum(cast(GA_Url.visits, - types.INTEGER)) - ) \ - .filter(GA_Url.period_name!='All') \ - .filter(GA_Url.url==url) \ - .all() - pageviews, visits = counts[0] + old_pageviews, old_visits = 0, 0 + old = model.Session.query(GA_Url).\ + filter(GA_Url.period_name=='All').\ + filter(GA_Url.url==url).all() + old_pageviews = sum([int(o.pageviews) for o in old]) + old_visits = sum([int(o.visits) for o in old]) + + entries = model.Session.query(GA_Url).\ + filter(GA_Url.period_name!='All').\ + filter(GA_Url.url==url).all() values = {'id': make_uuid(), 'period_name': 'All', 'period_complete_day': 0, 'url': url, - 'pageviews': pageviews, - 'visits': visits, + 'pageviews': sum([int(e.pageviews) for e in entries]) + int(old_pageviews), + 'visits': sum([int(e.visits or 0) for e in entries]) + int(old_visits), 'department_id': publisher, 'package_id': package - } + } model.Session.add(GA_Url(**values)) model.Session.commit() -def pre_update_sitewide_stats(period_name): - q = model.Session.query(GA_Stat).\ - filter(GA_Stat.period_name==period_name) - log.debug("Deleting %d '%s' sitewide records..." % (q.count(), period_name)) - q.delete() - - model.Session.flush() - model.Session.commit() - model.repo.commit_and_remove() - log.debug('...done') - - -def pre_update_social_stats(period_name): - q = model.Session.query(GA_ReferralStat).\ - filter(GA_ReferralStat.period_name==period_name) - log.debug("Deleting %d '%s' social records..." % (q.count(), period_name)) - q.delete() - - model.Session.flush() - model.Session.commit() - model.repo.commit_and_remove() - log.debug('...done') def update_social(period_name, data): + # Clean up first. + model.Session.query(GA_ReferralStat).\ + filter(GA_ReferralStat.period_name==period_name).delete() + for url,data in data.iteritems(): for entry in data: source = entry[0] @@ -378,7 +316,6 @@ def update_social(period_name, data): model.Session.add(GA_ReferralStat(**values)) model.Session.commit() - def update_publisher_stats(period_name): """ Updates the publisher stats from the data retrieved for /dataset/* @@ -496,5 +433,5 @@ def get_score_for_dataset(dataset_name): score += views_per_day score = int(score * 100) - #log.debug('Popularity %s: %s', score, dataset_name) + log.debug('Popularity %s: %s', score, dataset_name) return score diff --git a/ckanext/ga_report/helpers.py b/ckanext/ga_report/helpers.py old mode 100644 new mode 100755 index c699e77..9c57ca7 --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -140,4 +140,8 @@ def month_option_title(month_iso, months, day): return month_name + (' (up to %s)'%day) return month_name +def get_graph_x(graph): + return ','.join([x for x,y in graph]) +def get_graph_y(graph): + return ','.join([y for x,y in graph]) diff --git a/ckanext/ga_report/plugin.py b/ckanext/ga_report/plugin.py old mode 100644 new mode 100755 index 04525f1..0f69ab1 --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -1,31 +1,27 @@ import logging import ckan.lib.helpers as h import ckan.plugins as p -from ckan.plugins import toolkit +from ckan.plugins import implements, toolkit from ckanext.ga_report.helpers import (most_popular_datasets, popular_datasets, single_popular_dataset, - month_option_title) -try: - from ckanext.report.interfaces import IReport -except ImportError: - # if you've not install ckanext-report then you just find you can't use the report. - IReport = p.ITemplateHelpers + month_option_title, + get_graph_x, + get_graph_y) log = logging.getLogger('ckanext.ga-report') - class GAReportPlugin(p.SingletonPlugin): - p.implements(p.IConfigurer, inherit=True) - p.implements(p.IRoutes, inherit=True) - p.implements(p.ITemplateHelpers, inherit=True) - p.implements(IReport) + implements(p.IConfigurer, inherit=True) + implements(p.IRoutes, inherit=True) + implements(p.ITemplateHelpers, inherit=True) def update_config(self, config): toolkit.add_template_directory(config, 'templates') toolkit.add_public_directory(config, 'public') - + toolkit.add_resource('fanstatic', 'ga_report') + def get_helpers(self): """ A dictionary of extra helpers that will be available to provide @@ -36,7 +32,9 @@ def get_helpers(self): 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, 'single_popular_dataset': single_popular_dataset, - 'month_option_title': month_option_title + 'month_option_title': month_option_title, + 'get_graph_x': get_graph_x, + 'get_graph_y': get_graph_y } def after_map(self, map): @@ -64,12 +62,12 @@ def after_map(self, map): # GaDatasetReport map.connect( - '/data/site-usage/publisher', + '/data/site-usage/organization', controller='ckanext.ga_report.controller:GaDatasetReport', action='publishers' ) map.connect( - '/data/site-usage/publishers_{month}.csv', + '/data/site-usage/organizations_{month}.csv', controller='ckanext.ga_report.controller:GaDatasetReport', action='publisher_csv' ) @@ -84,15 +82,9 @@ def after_map(self, map): action='read' ) map.connect( - '/data/site-usage/publisher/{id}', + '/data/site-usage/dataset/{id}', controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher' ) return map - # IReport - - def register_reports(self): - """Register details of an extension's reports""" - from ckanext.ga_report import reports - return [reports.publisher_report_info] diff --git a/ckanext/ga_report/public/css/ga_report.css b/ckanext/ga_report/public/css/ga_report.css old mode 100644 new mode 100755 index 0ccef65..ecfcde6 --- a/ckanext/ga_report/public/css/ga_report.css +++ b/ckanext/ga_report/public/css/ga_report.css @@ -53,19 +53,83 @@ white-space: normal; float: left; width: 200px; + border-width : 0; } .rickshaw_legend .line .label:hover { text-decoration: underline; } +.ga-reports-table { + table-layout : auto; + background-color : transparent; + border-collapse: collapse; + border-spacing: 0; + width : 100%; +} + + +.ga-reports-table>tbody>tr:nth-child(odd)>td, .ga-reports-table>tbody>tr:nth-child(odd)>th { + background-color: #f9f9f9; +} + +.ga-reports-table>tbody>tr:nth-child(even)>td, .ga-reports-table>tbody>tr:nth-child(even)>th { + background-color: #ffffff; +} + +.ga-reports-table > tbody > tr > th, .ga-reports-table > tbody > tr > td { + line-height : 20px; +} + .ga-reports-table .td-numeric { text-align: center; } + .ga-reports-heading { padding-right: 10px; - margin-top: 4px; + margin-top: 2px; float: left; + font-size : 18px; } .tab-content { padding-top: 12px; } + +.container { + max-width : 1170px; +} + +.stat-details { + padding : 10px; + margin : 0px; +} + +.row { + margin : 0px; + padding-top : 10px; +} + +.ga-header { + align: middle; + position: relative; + float : left; + width : 100%; + padding : 10px; + min-height : 1px; +} + +.ga-header h3 { + margin-top : 0px; +} + + +.panel-body { + padding-left : 25px; +} + +ul li { + line-height : 20px; +} + +.ga-report-divider { + margin : 2px; +} diff --git a/ckanext/ga_report/public/scripts/ckanext-googleanalytics.js b/ckanext/ga_report/public/scripts/ckanext-googleanalytics.js new file mode 100755 index 0000000..8e9047d --- /dev/null +++ b/ckanext/ga_report/public/scripts/ckanext-googleanalytics.js @@ -0,0 +1,72 @@ +(function ($) { + $(document).ready(function () { + // Google Analytics event tracking + + // group links on home page + $('body.home div.group a').click(function() { + _gaq.push(['_trackEvent', 'Home', 'Click: Group Link', $(this).attr('href')]); + }); + + // clicking on user name (go to profile) + $('div.account span.ckan-logged-in a').first().click(function() { + _gaq.push(['_trackEvent', 'User', 'Click: User Name', $(this).attr('href')]); + }); + + // In user profile, clicking on Edit Profile + $('body.user div#minornavigation a') + .filter(function(index) {return $(this).text() === "Edit Profile";}) + .click(function() { + _gaq.push(['_trackEvent', 'User', 'Click: Tab', 'Edit Profile']); + }); + + // Clicking Save Changes on Edit Profile page + $('body.user.edit input#save').click(function() { + _gaq.push(['_trackEvent', 'User', 'Click: Button', 'Save Profile Changes']); + }); + + // Clicking on any dataset link on User Profile page + $('body.user.read ul.datasets a').click(function() { + _gaq.push(['_trackEvent', 'User', 'Click: Dataset Link', $(this).attr('href')]); + }); + + // Compare Button on /dataset/history/X + $('body.package.history form#dataset-revisions input[name="diff"]').click(function() { + _gaq.push(['_trackEvent', 'Dataset', 'Click: Button', 'Compare History']); + }); + + // Tags on right hand sidebar of /dataset/X + $('body.package.read div#sidebar h3') + .filter(function(index) {return $(this).text().indexOf("Tags") != -1;}) + .next('ul') + .find('a') + .click(function() { + _gaq.push(['_trackEvent', 'Dataset', 'Click: Tag', $(this).attr('href')]); + }); + + // Any of the group links on /group + $('body.group.index table.groups a').click(function() { + _gaq.push(['_trackEvent', 'Group', 'Click: Group Link', $(this).attr('href')]); + }); + + // Clicking any of the right hand sidebar tags on /group/X + $('body.group.read div#sidebar h2') + .filter(function(index) {return $(this).text().indexOf("Tags") != -1;}) + .next('ul') + .find('a') + .click(function() { + _gaq.push(['_trackEvent', 'Group', 'Click: Tag', $(this).attr('href')]); + }); + + // Visiting /group/history/X + $('body.group div#minornavigation ul.nav a') + .filter(function(index) {return $(this).text().indexOf("History") != -1;}) + .click(function() { + _gaq.push(['_trackEvent', 'Group', 'Click: History Tab', $(this).attr('href')]); + }); + + // Compare Button on /group/history/X + $('body.group.history form#group-revisions input[name="diff"]').click(function() { + _gaq.push(['_trackEvent', 'Group', 'Click: Button', 'Compare History']); + }); + }); +}(jQuery)); diff --git a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js old mode 100644 new mode 100755 index ac1e841..9870e4e --- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -112,6 +112,7 @@ CKAN.GA_Reports.bind_sidebar = function() { $('#graph-legend-container > *').hide(); $('#graph-legend-container .instructions').show(); $(legend_name).show(); + $("#graph-legend-container").show(); } ); /* The first tab might already have been shown */ @@ -126,6 +127,6 @@ CKAN.GA_Reports.bind_month_selector = function() { window.location = url; }; var selectors = $('select[name="month"]'); - assert(selectors.length>0); + //assert(selectors.length>0); selectors.bind('change', handler); }; diff --git a/ckanext/ga_report/templates/ga_report/ga_util.html b/ckanext/ga_report/templates/ga_report/ga_util.html old mode 100644 new mode 100755 index 9ac3367..bdff4f9 --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -62,8 +62,8 @@
Notes
    -
  • "Views" is the number of times a page was loaded in users' browsers. ("Pageview" is the technical term.)
  • -
  • "Downloads" is the number of times a user has clicked "Download" (or "Direct Link") for a resource (or cache of it) for a particular dataset. Download information is available from 2nd December 2012; "No data" is shown for records before that date. Unpublished and a few published datasets have no resource links and therefore no downloads.
  • +
  • "Views" is the number of times a page was loaded in users' browsers.
  • +
  • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
  • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
  • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
diff --git a/ckanext/ga_report/templates/ga_report/publisher/index.html b/ckanext/ga_report/templates/ga_report/publisher/index.html old mode 100644 new mode 100755 index 84171fb..f7bb55d --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -1,34 +1,43 @@ - +{% extends "page.html" %} - - - Usage by Publisher - - +{% block styles %} + {{ super() }} - - + + + + + + - +{% endblock %} - +{%- block subtitle %}Usage by Publisher{% endblock -%} + +{% block breadcrumb_content %}
  • Site Analytics
  • -
  • Publishers
  • -
    +
  • Organizations
  • +{% endblock %} + +{% block primary_content %} -
    - -   Download as CSV - -

    Site Usage

    + {% set download_link = h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='publisher_csv',month=c.month or 'all') %} + +
    +
    +   Download as CSV +

    Site Usage

    +
    +
    +
    @@ -42,43 +51,38 @@

    Site Usage

    Publishers - ${rickshaw_graph(c.top_publishers_graph,'publishers')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.top_publishers_graph, id='publishers', mode='line', colorscheme='munin' %}
    -
    -
    +
    +
    +

    Statistics for

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}
    - + - + {% for publisher, views, visits in c.top_publishers %} - + - + {% endfor %}
    PublisherOrganization Dataset Views
    - ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))} + {{ h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else '')) }} ${views}{{ views }}
    -
    - - - - - +
    + {% resource 'ga_report/ga_org_index.js' %} +{% endblock %} - +{% block secondary %} +{% endblock %} diff --git a/ckanext/ga_report/templates/ga_report/publisher/read.html b/ckanext/ga_report/templates/ga_report/publisher/read.html old mode 100644 new mode 100755 index 2494351..3c33a52 --- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -1,46 +1,52 @@ - +{% extends "page.html" %} - - - Usage by Dataset - - +{% block styles %} + {{ super() }} - - + + + + + + - +{% endblock %} +{%- block subtitle %}Site usage{% endblock -%} - +{% block breadcrumb_content %}
  • Site Analytics
  • - -
  • Publishers
  • -
  • ${c.publisher.title}
  • -
    - -
  • Usage By Dataset
  • -
    -
    - -
    - - -   Download as CSV - -

    Site Usage - ${c.publisher.title} - All datasets -

    - + {% if c.publisher %} +
  • Organizations
  • +
  • {{ c.publisher.title }}
  • + {% else %} +
  • Usage By Dataset
  • + {% endif %} +{% endblock %} + +{% block primary_content %} + {% set download_link = h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='dataset_csv',id=c.publisher_name or 'all',month=c.month or 'all') %} +
    +
    +   Download as CSV +

    Site Usage + {% if c.publisher %} + {{ c.publisher.title }} + {% else %} + All datasets + {% endif %} +

    +
    +
    +
    @@ -52,59 +58,61 @@

    Site Usage

    - - ${rickshaw_graph(c.graph_data,'dataset-downloads',debug=True)} - + {% if c.graph_data %} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.graph_data, id='dataset-downloads', mode='line', colorscheme='munin' %} + {% endif %}
    -
    - -

    Statistics for ${h.month_option_title(c.month,c.months,c.day)}:

    -
    - +
    +
    + {% if c.month %} +

    Statistics for {{ h.month_option_title(c.month,c.months,c.day) }}:

    + {% else %}

    Statistics for all months

    - -
    + {% endif %} +
    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}
    -
    No page views in this period.
    - + {% if not c.top_packages %} +
    No page views in this period.
    + {% else %} - - + {% for package, views, visits,downloads in c.top_packages %} + - - + + - + {% endfor %}
    Dataset Views Downloads
    - ${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))} + {{ h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name)) }} ${views}${downloads}{{ views }}{{ downloads }}
    -
    - - ${ga_footer()} + {% endif %} + {% snippet 'snippets/ga_footer.html' %} +
    - - +{% endblock %} +{% block secondary %} +{% endblock %} diff --git a/ckanext/ga_report/templates/ga_report/site/index.html b/ckanext/ga_report/templates/ga_report/site/index.html index 97e7e14..adfaa4d 100644 --- a/ckanext/ga_report/templates/ga_report/site/index.html +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -1,36 +1,48 @@ - +{% extends "page.html" %} - - - Site usage - - +{% block styles %} + {{ super() }} - - + + + + + + - +{% endblock %} + +{%- block subtitle %}Site usage{% endblock -%} - +{% block breadcrumb_content %}
  • Site Analytics
  • Site-wide
  • -
    + +{% endblock %} -
    +{% block primary_content %} +
    +
    + {% set download_link=h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv',month=c.month or 'all') %} +   Download as CSV +

    Site Usage

    +
    +
    +
    +
    @@ -90,7 +103,7 @@

    Site Usage

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}
    @@ -98,120 +111,107 @@

    Show stats table for:

    - + {% for name, value, graph in c.global_totals %} - - + + - + {% endfor %}
    Value History
    ${name}${value}{{ name }}{{ value }} - - ${','.join([y for x,y in graph])} + + {{ h.get_graph_y(graph) }}
    - ${rickshaw_graph(c.browser_versions_graph,'browser-versions',mode='stack')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.browser_versions_graph, id='browser_versions', mode='stack', colorscheme='munin' %}

    Note: Where a browser has a large number of versions, these have been grouped together.

    -
    +

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}

    - ${stat_table(c.browser_versions)} + {% snippet 'snippets/stat_table.html', items=c.browser_versions, title='Views' %}
    - ${rickshaw_graph(c.browsers_graph,'browsers',mode='stack')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.browsers_graph, id='browsers', mode='stack', colorscheme='munin' %}

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}

    - ${stat_table(c.browsers)} + {% snippet 'snippets/stat_table.html', items=c.browsers, title='Views' %}
    - ${rickshaw_graph(c.os_graph,'os',mode='stack')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.os_graph, id='os', mode='stack', colorscheme='munin' %}

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}

    - ${stat_table(c.os)} + {% snippet 'snippets/stat_table.html', items=c.os, title='Views' %}
    - ${rickshaw_graph(c.os_versions_graph,'os_versions',mode='stack')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.os_versions_graph, id='os_versions', mode='stack', colorscheme='munin' %}

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}

    - ${stat_table(c.os_versions)} + {% snippet 'snippets/stat_table.html', items=c.os_versions, title='Views' %}

    Number of visits that were referred from social networks

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}
    - ${social_table(c.social_referrer_totals)} + {% snippet 'snippets/social_table.html', items=c.social_referrer_totals, with_source=False %}
    - ${rickshaw_graph(c.social_networks_graph, 'social_networks',mode='stack')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.social_networks_graph, id='social_networks', mode='stack', colorscheme='munin' %}

    Percentage of visits that were referred from these social networks

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}

    - ${stat_table(c.social_networks, 'Visits')} + {% snippet 'snippets/stat_table.html', items=c.social_networks, title='Visits' %}
    - ${rickshaw_graph(c.languages_graph,'languages',mode='stack')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.languages_graph, id='languages', mode='stack', colorscheme='munin' %}

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}

    - ${stat_table(c.languages)} + {% snippet 'snippets/stat_table.html', items=c.languages, title='Views' %}
    - ${rickshaw_graph(c.country_graph,'country',mode='stack')} + {% snippet 'snippets/rickshaw_graph.html', items_json=c.country_graph, id='country', mode='stack', colorscheme='munin' %}

    Show stats table for:

    - ${month_selector(c.month, c.months, c.day)} + {% snippet 'snippets/month_selector.html', current_moth=c.month, months=c.months, day=c.day %}

    - ${stat_table(c.country)} + {% snippet 'snippets/stat_table.html', items=c.country, title='Views' %}
    - -
    - - - - - - - - - + {% resource 'ga_report/ga_index.js' %} +{% endblock %} +{% block secondary %} +{% endblock %} diff --git a/ckanext/ga_report/templates/ga_report/site/index_orig.html b/ckanext/ga_report/templates/ga_report/site/index_orig.html new file mode 100755 index 0000000..575866d --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/site/index_orig.html @@ -0,0 +1,224 @@ + + + + + Site usage + + + + + + + + + + + + + + + + + + +
  • Site Analytics
  • +
  • Site-wide
  • +
    + +
    +
    +
    + +   Download as CSV + +

    Site Usage

    +
    +
    + +
    +
    + +
    +
    +
    + Graph Legend +
    + +
    +
    +
    +
    +
    +
    + +
    +
    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    + + + + + + + + + + + + + +
    NameValueHistory
    ${name}${value} + + ${','.join([y for x,y in graph])} + +
    +
    +
    + ${rickshaw_graph(c.browser_versions_graph,'browser-versions',mode='stack')} +
    +

    Note: Where a browser has a large number of versions, these have been grouped together.

    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    +
    + ${stat_table(c.browser_versions)} +
    +
    + ${rickshaw_graph(c.browsers_graph,'browsers',mode='stack')} +
    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    +
    + ${stat_table(c.browsers)} +
    +
    + ${rickshaw_graph(c.os_graph,'os',mode='stack')} +
    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    +
    + ${stat_table(c.os)} +
    +
    + ${rickshaw_graph(c.os_versions_graph,'os_versions',mode='stack')} +
    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    +
    + ${stat_table(c.os_versions)} +
    +
    +

    Number of visits that were referred from social networks

    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    + ${social_table(c.social_referrer_totals)} +
    +
    + ${rickshaw_graph(c.social_networks_graph, 'social_networks',mode='stack')} +
    +

    Percentage of visits that were referred from these social networks

    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    +
    + ${stat_table(c.social_networks, 'Visits')} +
    +
    + ${rickshaw_graph(c.languages_graph,'languages',mode='stack')} +
    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    +
    + ${stat_table(c.languages)} +
    +
    + ${rickshaw_graph(c.country_graph,'country',mode='stack')} +
    +
    +

    Show stats table for:

    + ${month_selector(c.month, c.months, c.day)} +
    +
    + ${stat_table(c.country)} +
    +
    +
    +
    +
    +
    + +
    + + + + + + + + + + + diff --git a/ckanext/ga_report/templates/snippets/ga_footer.html b/ckanext/ga_report/templates/snippets/ga_footer.html new file mode 100644 index 0000000..de442b2 --- /dev/null +++ b/ckanext/ga_report/templates/snippets/ga_footer.html @@ -0,0 +1,11 @@ +
    +
    Notes
    +
    +
      +
    • "Views" is the number of times a page was loaded in users' browsers.
    • +
    • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
    • +
    • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
    • +
    • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
    • +
    +
    +
    diff --git a/ckanext/ga_report/templates/snippets/month_selector.html b/ckanext/ga_report/templates/snippets/month_selector.html new file mode 100644 index 0000000..3b55a4f --- /dev/null +++ b/ckanext/ga_report/templates/snippets/month_selector.html @@ -0,0 +1,8 @@ + + + diff --git a/ckanext/ga_report/templates/snippets/rickshaw_graph.html b/ckanext/ga_report/templates/snippets/rickshaw_graph.html new file mode 100644 index 0000000..d5f80eb --- /dev/null +++ b/ckanext/ga_report/templates/snippets/rickshaw_graph.html @@ -0,0 +1,14 @@ + +
    +
    +
    +
    + +
    +
    + diff --git a/ckanext/ga_report/templates/snippets/social_table.html b/ckanext/ga_report/templates/snippets/social_table.html new file mode 100644 index 0000000..cbfaff3 --- /dev/null +++ b/ckanext/ga_report/templates/snippets/social_table.html @@ -0,0 +1,19 @@ + + + + + + {% if with_source %} + + {% endif %} + + + {% for name, url, source, count in items %} + + + + + + {% endfor %} +
    NameSourceVisits
    {{ name }}{{ source }}{{ count }}
    + diff --git a/ckanext/ga_report/templates/snippets/stat_table.html b/ckanext/ga_report/templates/snippets/stat_table.html new file mode 100644 index 0000000..8cf5195 --- /dev/null +++ b/ckanext/ga_report/templates/snippets/stat_table.html @@ -0,0 +1,14 @@ + + + + + + + {% for name, value in items %} + + + + + {% endfor %} +
    Name% {{ title }}
    {{ name }}{{ value }}
    + diff --git a/ckanext/ga_report/tests/test_download.py b/ckanext/ga_report/tests/test_download.py old mode 100644 new mode 100755 index e69de29..87f6252 --- a/ckanext/ga_report/tests/test_download.py +++ b/ckanext/ga_report/tests/test_download.py @@ -0,0 +1,29 @@ +from nose.tools import assert_equal + +from ckanext.ga_report.download_analytics import DownloadAnalytics + +_filter_browser_version = DownloadAnalytics._filter_browser_version + +class TestBrowserVersionFilter: + def test_chrome(self): + assert_equal(_filter_browser_version('Chrome', u'6.0.472.0'), '6') + def test_firefox(self): + assert_equal(_filter_browser_version('Firefox', u'16.1'), '16') + def test_safari(self): + assert_equal(_filter_browser_version('Safari', u'534.55.3'), '53X') + assert_equal(_filter_browser_version('Safari', u'1534.55.3'), '15XX') + def test_ie(self): + assert_equal(_filter_browser_version('Internet Explorer', u'8.0'), '8') + def test_opera_mini(self): + assert_equal(_filter_browser_version('Opera Mini', u'6.5.27431'), '6') + def test_opera(self): + assert_equal(_filter_browser_version('Opera', u'11.60'), '11') + +class TestDownloadAnalytics: + def test_filter_out_long_tail(self): + data = {'Firefox': 100, + 'Obscure Browser': 5, + 'Chrome': 150} + DownloadAnalytics._filter_out_long_tail(data, 10) + assert_equal(data, {'Firefox': 100, + 'Chrome': 150}) diff --git a/ckanext/ga_report/tests/test_model.py b/ckanext/ga_report/tests/test_model.py new file mode 100755 index 0000000..7434432 --- /dev/null +++ b/ckanext/ga_report/tests/test_model.py @@ -0,0 +1,17 @@ +from nose.tools import assert_equal + +from ckanext.ga_report.ga_model import _normalize_url + +class TestNormalizeUrl: + def test_normal(self): + assert_equal(_normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices'), + '/dataset/weekly_fuel_prices') + + def test_www_dot(self): + assert_equal(_normalize_url('http://www.data.gov.uk/dataset/weekly_fuel_prices'), + '/dataset/weekly_fuel_prices') + + def test_https(self): + assert_equal(_normalize_url('https://data.gov.uk/dataset/weekly_fuel_prices'), + '/dataset/weekly_fuel_prices') +