From 0179c9d61d3ffa5f942c8b8ed9f71447b494599e Mon Sep 17 00:00:00 2001 From: Sanjay-Reddy-S Date: Thu, 9 Jul 2015 11:48:36 +0530 Subject: [PATCH 1/5] indexes block included --- gnowsys-ndf/gnowsys_ndf/ndf/models.py | 68 +++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/models.py b/gnowsys-ndf/gnowsys_ndf/ndf/models.py index 10fd21579a..5bf34ceea9 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/models.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/models.py @@ -228,7 +228,39 @@ class Node(DjangoDocument): 'user_id':int, 'ip_address':basestring}] } - + indexes=[ + { + 'fields':['_type','name'], + }, + # { + # 'fields':['member_of','group_set'], + # }, + { + 'fields':['_type','_id'], + }, + { + 'fields':['member_of','status','last_update'] + } + # { + # 'fields':['content'], + # }, + # { + # 'fields':['tags'], + # }, + # { + # 'fields':['collection_set'], + # }, + # { + # 'fields':['type_of'], + # }, + # { + # 'fields':['member_of'], + # }, + # { + # 'fields':['status'] + # } + ] + required_fields = ['name', '_type'] # 'group_set' to be included # here after the default # 'Administration' group is @@ -1105,8 +1137,15 @@ class GSystem(Node): 'annotations': [dict], # List of json files for annotations on the page 'license': basestring # contains license/s in string format } - - use_dot_notation = True +#indexes=[ +#{ +#'fields':['attribute_set'], +#}, +#{ +#'fields':['relation_set'], +#} +#] +use_dot_notation = True @connection.register @@ -1559,7 +1598,17 @@ class Triple(DjangoDocument): 'lang': basestring, # Put validation for standard language codes 'status': STATUS_CHOICES_TU } - + indexes=[ + { + 'fields':['_type','name'], + }, + { + 'fields':['_type','subject'], + }, + # { + #'fields':['status'], + #} + ] required_fields = ['name', 'subject'] use_dot_notation = True use_autorefs = True @@ -1772,6 +1821,11 @@ class GAttribute(Triple): } required_fields = ['attribute_type', 'object_value'] + indexes=[ + { + 'fields':['attribute_type'], + } + ] use_dot_notation = True use_autorefs = True # To support Embedding of Documents @@ -1785,7 +1839,11 @@ class GRelation(Triple): # ObjectId's of GSystems Class / List of list of ObjectId's of GSystem Class 'right_subject': OR(ObjectId, list) } - + indexes=[ + { + 'fields':['relation_type'], + } + ] required_fields = ['relation_type', 'right_subject'] use_dot_notation = True use_autorefs = True # To support Embedding of Documents From 053cfe98d6dcdf9e4cb085eae37c13381c3502c3 Mon Sep 17 00:00:00 2001 From: Sanjay-Reddy-S Date: Thu, 9 Jul 2015 11:49:25 +0530 Subject: [PATCH 2/5] Run this to create indexes manually in mongoshell --- gnowsys-ndf/create_indexes_mongoshell.js | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 gnowsys-ndf/create_indexes_mongoshell.js diff --git a/gnowsys-ndf/create_indexes_mongoshell.js b/gnowsys-ndf/create_indexes_mongoshell.js new file mode 100644 index 0000000000..ec051b5d06 --- /dev/null +++ b/gnowsys-ndf/create_indexes_mongoshell.js @@ -0,0 +1,21 @@ +/*run this file from the terminal by typing the command mongo Date: Thu, 9 Jul 2015 12:06:49 +0530 Subject: [PATCH 3/5] How to do indexing on gstudio --- indexing_readme.txt | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 indexing_readme.txt diff --git a/indexing_readme.txt b/indexing_readme.txt new file mode 100644 index 0000000000..450094dba6 --- /dev/null +++ b/indexing_readme.txt @@ -0,0 +1,45 @@ +For indexing to be created and used we have to follow two steps: + +1)Index usage: + This is straight forward where we go to file where the class on databases is defined (In gstudio it is models.py) and below structures variable (where we define the various fields present along with their properties) we add another variable called indexes and list the fields on which we want to perform indexing. + +For Example- + +>>> class MyDoc(Document): +... structure = { +... 'standard':unicode, +... 'other':{ +... 'deep':unicode, +... }, +... 'notindexed':unicode, +... } +... +... indexes = [ +... { +... 'fields':['standard', 'other.deep'], +... 'unique':True, +... }, +... ] + + In versions before 0.7.1 of mongokit this simple addition would automatically create indexes for the data in mongoDB also. But in later versions this automatic index creation was removed (as people felt that indexes should be created with care directly on the collection). Gstudio uses Mongokit version 0.9.1.1 so this above addition only enables the database class to use the indexes if they are present in the database. We have to now manually create the indexes in mongoDB through mongo shell commands (This is infact told by mongokit as a deprecation warning that indexing is no longer automatic and we have to do it manually). + +2)Index creation: + We can actually use createIndex() command directly in mongoshell individually on various databases for required fields to create indexes. But it will reduce relocation, so we wrote the commands in a js file and then ran the script. + +For Example- + +test.js => + conn=new Mongo() + db=conn.getDB("studio-dev") + db.Nodes.createIndex({'_type':1,'name':1}) + //just keep adding these commands for creating Indexes in desired databases to required fields + +$mongo Date: Thu, 9 Jul 2015 12:10:30 +0530 Subject: [PATCH 4/5] Simple optimising techniques --- optimize_readme.txt | 107 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 optimize_readme.txt diff --git a/optimize_readme.txt b/optimize_readme.txt new file mode 100644 index 0000000000..40b9cd3f52 --- /dev/null +++ b/optimize_readme.txt @@ -0,0 +1,107 @@ +1)Creating temporary variables for faster lookup: + Lists are very commonly used data structure for storing mutating information in python. While using lists we frequently use append methods on them. This append function when combined with loops is a costly operation but with a fairly simple hack the time can be reduced. +def func1(): + lst1=[] + lst2=[] + for i in range(500): + lst1.append(i) + lst2.append(i+500) + for j in lst2: + lst1.append(j) + +def func2(): + lst1=[] + lst2=[] + l1_append_temp=lst1.append #a tempoary variable + l2_append_temp=lst2.append #lookup for append already done + for i in range(500): + l1_append_temp(i) + l2_append_temp(i+500) + for j in lst2: + l1_append_temp(j) + +Using timeit library of python we timed the functions got the following results: + func1-0.048635005950927734 + func2-0.032353162765502930 +(Note that here we are dealing with relatively small data so the time difference may be small but with big data the difference can be huge) + +Here for each loop we look for the append function of the list and then use the function but by using the temporary variable (where we store the lookup early on) we skip the first step. + +We can use the same technique for commonly used queries on databases present within loops. +Eg- dB_find_temp=node_collection.find + +(Note that using this when there are no loops present gives no time advantage.Infact using this without loops can lead to loss of readability and also the problem of many local variables being present) + +2)Multiprocessing library of python: + Because of GIL (Global Interpreter Lock) working with threads in python is not very easy as was the case with old languages like C. +GIL of python interpreter synchronizes the execution of threads so that only one thread can execute at a time even if the computer has multiple cores and can run threads simultaneously. Still, using multiprocessing library allows the programmer some leeway where he can use the multiple cores to some extent.(But note that using this library creates a big software overhead and thus must be used only when dealing with big loops).In python also the old rule that 'multiprocessing must be used only when dealing with independent objects' applies. + +def func3(): + for each_gapp in already_selected_gapps: + gapp_name = each_gapp["name"] + if gapp_name in gapps_list: + gapps_list_remove(gapp_name) + +import multiprocessing +def func4(): + processes=[] + n1=len(already_selected_gapps) + lst1=already_selected_gapps + x=mp.cpu_count() + n2=n1/x + for i in x:#dividing the list (of independent elements) by number of cores and passing each partition to one thread + processes.append(mp.Process(target=multi_,args=(lst1[i*n2:(i+1)*n2]))) + for i in x: + processes[i].start() + for i in x: + processes[i].join() + +def multi_(lst):#the logic of the loop must be put in a function so that each thread can use it + for each_gapp in lst: + gapp_name = each_gapp["name"] + + if gapp_name in gapps_list: + gapps_list_remove(gapp_name) + + +3)List comprehensions: + The best way to visualize list comprehensions is thinking of them as sets in set-builder form. This is an excellent alternative to loops dealing with lists as it results in faster computations. + + A={x^2:x is in (1,2,3)}={1,4,9} + +def func5(): + lst=[] + lst2=[] + for i in range(500): + lst2.append(i) + for i in lst2: + lst.append(i*i) + +def func6(): + lst2=[] + lst2_append_temp=lst2.append + for i in range(500): + lst2_append_temp(i) + lst=[i*i for i in lst2] #see the similarity with set builder form + +Using timeit library of python we timed the functions got the following results: + func5-0.047894954681396484 + func6-0.021952867507934570 +(Note that here we are dealing with relatively small data so the time difference may be small but with big data the difference can be huge) + +The general format of list comprehension is: + [expression for item in old_list if condition] + +This is equivalent to: + for item in old_list: + if condition: + expression + +Eg- + new_lst = [x**2 for x in old_lst if x%2==0] +is equivalent to + new_lst=[] + for x in old_lst: + if x%2==0: + new_lst.append(x**2) +Note that here old_lst must be different from new_lst and also that new_lst must be empty (if not new_lst will be replaced) From 181315f1dba1100480851c3124c205f7eb22bfd8 Mon Sep 17 00:00:00 2001 From: Sanjay-Reddy-S Date: Thu, 16 Jul 2015 11:42:48 +0530 Subject: [PATCH 5/5] Low Level API cache implemented in user_access_policy function --- .../gnowsys_ndf/ndf/templatetags/ndf_tags.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/templatetags/ndf_tags.py b/gnowsys-ndf/gnowsys_ndf/ndf/templatetags/ndf_tags.py index 3a6fd4c951..96cce9ff1e 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/templatetags/ndf_tags.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/templatetags/ndf_tags.py @@ -1710,7 +1710,6 @@ def group_type_info(groupid,user=0): return group_type - @get_execution_time @register.assignment_tag def user_access_policy(node, user): @@ -1732,7 +1731,11 @@ def user_access_policy(node, user): string value (allow/disallow), i.e. whether user is allowed or not! """ user_access = False - + group_name, group_id = get_group_name_id(node) + cache_key='access'+str(group_id) #Low level API cache implemented in this function + cache_result=cache.get(cache_key) + if cache_result: + return cache_result try: # Please make a note, here the order in which check is performed is IMPORTANT! @@ -1741,7 +1744,6 @@ def user_access_policy(node, user): else: # group_node = node_collection.one({'_type': {'$in': ["Group", "Author"]}, '_id': ObjectId(node)}) - group_name, group_id = get_group_name_id(node) group_node = node_collection.one({"_id": ObjectId(group_id)}) if user.id == group_node.created_by: @@ -1760,15 +1762,18 @@ def user_access_policy(node, user): user_access = False if user_access: + cache.set(cache_key,"allow") return "allow" else: + cache.set(cache_key,"disallow") return "disallow" except Exception as e: error_message = "\n UserAccessPolicyError: " + str(e) + " !!!\n" raise Exception(error_message) - +#implemeted cache in this method + @get_execution_time @register.assignment_tag def resource_info(node):