gnowledge · Sanjay-Reddy-S · Jul 9, 2015 · Jul 9, 2015 · Jul 9, 2015 · Jul 9, 2015
diff --git a/gnowsys-ndf/create_indexes_mongoshell.js b/gnowsys-ndf/create_indexes_mongoshell.js
@@ -0,0 +1,21 @@
+/*run this file from the terminal by typing the command mongo<creating_indexes_mongoshell.js this will run the script and create indexes on the below fields and when ever u want to include new fields make the update in the respective class in /gnowsys-ndf/ndf/models.py and include it in this file*/
+
+conn=new Mongo();
+db=conn.getDB("studio-dev");
+db.Nodes.createIndex({'_type':1,'name':1})
+db.Nodes.createIndex({'_type':1,'_id':1})
+//db.Nodes.createIndex({'content':1})
+//db.Nodes.createIndex({'tags':1})
+//db.Nodes.createIndex({'status':1})
+//db.Nodes.createIndex({'collection_set':1})
+//db.Nodes.createIndex({'type_of':1})
+//db.Nodes.createIndex({'member_of':1})
+//db.Nodes.createIndex({'attribute_set':1})
+//db.Nodes.createIndex({'relation_set':1})
+db.Triples.createIndex({'_type':1,'name':1})
+//db.Triples.createIndex({'object_value':1})
+//db.Triples.createIndex({'status':1})
+//db.Triples.createIndex({'right_subject':1})
+db.Triples.createIndex({'_type':1,'subject':1,'atribute_type':1})
+db.Triples.createIndex({'_type':1,'subject':1,'relation_type':1})
+db.Nodes.createIndex({'member_of':1,'status':1,'last_update':1})
diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/models.py b/gnowsys-ndf/gnowsys_ndf/ndf/models.py
@@ -229,7 +229,39 @@ class Node(DjangoDocument):
                   'ip_address':basestring}],
 	'snapshot':dict
     }
-
+    indexes=[ 
+      {
+    'fields':['_type','name'],
+      },
+  #    {
+  #  'fields':['member_of','group_set'],
+  #    },
+      {
+    'fields':['_type','_id'],
+      },
+      {
+      'fields':['member_of','status','last_update']
+      }
+  #    {
+  #    'fields':['content'],
+  #    },
+  #    {
+  #    'fields':['tags'],
+  #    },
+  #    {
+  #    'fields':['collection_set'],
+  #    },
+  #    {
+  #    'fields':['type_of'],
+  #    },
+  #    {
+  #    'fields':['member_of'],
+  #    },
+  #    {
+  #    'fields':['status']
+  #    }
+    ]
+
     required_fields = ['name', '_type'] # 'group_set' to be included
                                         # here after the default
                                         # 'Administration' group is
@@ -1106,8 +1138,15 @@ class GSystem(Node):
         'annotations': [dict],      # List of json files for annotations on the page
         'license': basestring       # contains license/s in string format
     }
-
-    use_dot_notation = True
+#indexes=[
+#{
+#'fields':['attribute_set'],	
+#},
+#{
+#'fields':['relation_set'],
+#}
+#]
+use_dot_notation = True
 
 
 @connection.register
@@ -1597,7 +1636,17 @@ class Triple(DjangoDocument):
     'lang': basestring,  # Put validation for standard language codes
     'status': STATUS_CHOICES_TU
   }
-
+  indexes=[
+  {
+  'fields':['_type','name'],
+  },
+  {
+  'fields':['_type','subject'],
+  },
+ # {
+ #'fields':['status'],
+ #}
+  ]
   required_fields = ['name', 'subject']
   use_dot_notation = True
   use_autorefs = True
@@ -1810,6 +1859,11 @@ class GAttribute(Triple):
     }
 
     required_fields = ['attribute_type', 'object_value']
+    indexes=[
+    {
+    'fields':['attribute_type'],
+    }
+    ]
     use_dot_notation = True
     use_autorefs = True                   # To support Embedding of Documents
 
@@ -1823,7 +1877,11 @@ class GRelation(Triple):
         # ObjectId's of GSystems Class / List of list of ObjectId's of GSystem Class
         'right_subject': OR(ObjectId, list)
     }
-
+    indexes=[
+    {
+    'fields':['relation_type'],
+    }
+    ]
     required_fields = ['relation_type', 'right_subject']
     use_dot_notation = True
     use_autorefs = True                   # To support Embedding of Documents

diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/templatetags/ndf_tags.py b/gnowsys-ndf/gnowsys_ndf/ndf/templatetags/ndf_tags.py
@@ -1710,7 +1710,6 @@ def group_type_info(groupid,user=0):
 
 	return group_type
 
-
 @get_execution_time			
 @register.assignment_tag
 def user_access_policy(node, user):
@@ -1732,7 +1731,11 @@ def user_access_policy(node, user):
   string value (allow/disallow), i.e. whether user is allowed or not!
   """
   user_access = False
-
+  group_name, group_id = get_group_name_id(node)
+  cache_key='access'+str(group_id) #Low level API cache implemented in this function
+  cache_result=cache.get(cache_key)
+  if cache_result:
+  	return cache_result
   try:
   	# Please make a note, here the order in which check is performed is IMPORTANT!
 
@@ -1741,7 +1744,6 @@ def user_access_policy(node, user):
 
     else:
       # group_node = node_collection.one({'_type': {'$in': ["Group", "Author"]}, '_id': ObjectId(node)})
-      group_name, group_id = get_group_name_id(node)
       group_node = node_collection.one({"_id": ObjectId(group_id)})
 
       if user.id == group_node.created_by:
@@ -1760,15 +1762,18 @@ def user_access_policy(node, user):
         user_access = False
 
     if user_access:
+      cache.set(cache_key,"allow")
       return "allow"
 
     else:
+      cache.set(cache_key,"disallow")
       return "disallow"
 
   except Exception as e:
     error_message = "\n UserAccessPolicyError: " + str(e) + " !!!\n"
     raise Exception(error_message)
-
+#implemeted cache in this method
+
 @get_execution_time		
 @register.assignment_tag
 def resource_info(node):

diff --git a/indexing_readme.txt b/indexing_readme.txt
@@ -0,0 +1,45 @@
+For indexing to be created and used we have to follow two steps:
+
+1)Index usage:
+	This is straight forward where we go to file where the class on databases is defined (In gstudio it is models.py) and below structures variable (where we define the various fields present along with their properties) we add another variable called indexes and list the fields on which we want to perform indexing.
+
+For Example-
+
+>>> class MyDoc(Document):
+...     structure = {
+...         'standard':unicode,
+...         'other':{
+...             'deep':unicode,
+...         },
+...         'notindexed':unicode,
+...     }
+...     
+...     indexes = [
+...         {
+...             'fields':['standard', 'other.deep'],
+...             'unique':True,
+...         },
+...     ]
+
+	In versions before 0.7.1 of mongokit this simple addition would automatically create indexes for the data in mongoDB also. But in later versions this automatic index creation was removed (as people felt that indexes should be created with care directly on the collection). Gstudio uses Mongokit version 0.9.1.1 so this above addition only enables the database class to use the indexes if they are present in the database. We have to now manually create the indexes in mongoDB through mongo shell commands (This is infact told by mongokit as a deprecation warning that indexing is no longer automatic and we have to do it manually).
+
+2)Index creation:
+	We can actually use createIndex() command directly in mongoshell individually on various databases for required fields to create indexes. But it will reduce relocation, so we wrote the commands in a js file and then ran the script.
+
+For Example-
+
+test.js =>
+	conn=new Mongo()
+	db=conn.getDB("studio-dev")
+	db.Nodes.createIndex({'_type':1,'name':1})
+	//just keep adding these commands for creating Indexes in desired databases to required fields
+
+$mongo<test.js  #written on the terminal
+
+	As written in mongoDB documentation, it is preferable to use createIndex() rather than ensureIndex() command.
+
+3)Index changes:
+	In future if more fields are to be indexed, then we can append them to the js file (present in gnowsys-ndf folder as 'create_indexes_mongoshell.js') and run it again (Point 2)(if index is already present then the instruction does nothing) and also make necessary changes to the index variable present in the database class (models.py) (Point 1). If no indexing changes are done then running the script once will suffice.
+
+
+
diff --git a/optimize_readme.txt b/optimize_readme.txt
@@ -0,0 +1,107 @@
+1)Creating temporary variables for faster lookup:
+	Lists are very commonly used data structure for storing mutating information in python. While using lists we frequently use append methods on them. This append function when combined with loops is a costly operation but with a fairly simple hack the time can be reduced.
+def func1():
+	lst1=[]
+	lst2=[]
+	for i in range(500):
+		lst1.append(i)
+		lst2.append(i+500)
+	for j in lst2:
+		lst1.append(j)
+
+def func2():
+	lst1=[]
+	lst2=[]
+	l1_append_temp=lst1.append #a tempoary variable
+	l2_append_temp=lst2.append #lookup for append already done
+	for i in range(500):
+		l1_append_temp(i)
+		l2_append_temp(i+500)
+	for j in lst2:
+		l1_append_temp(j)
+
+Using timeit library of python we timed the functions got the following results:
+	func1-0.048635005950927734
+	func2-0.032353162765502930
+(Note that here we are dealing with relatively small data so the time difference may be small but with big data the difference can be huge)
+
+Here for each loop we look for the append function of the list and then use the function but by using the temporary variable (where we store the lookup early on) we skip the first step. 
+
+We can use the same technique for commonly used queries on databases present within loops.
+Eg- dB_find_temp=node_collection.find
+
+(Note that using this when there are no loops present gives no time advantage.Infact using this without loops can lead to loss of readability and also the problem of many local variables being present)
+
+2)Multiprocessing library of python:
+	Because of GIL (Global Interpreter Lock) working with threads in python is not very easy as was the case with old languages like C.
+GIL of python interpreter synchronizes the execution of threads so that only one thread can execute at a time even if the computer has multiple cores and can run threads simultaneously. Still, using multiprocessing library allows the programmer some leeway where he can use the multiple cores to some extent.(But note that using this library creates a big software overhead and thus must be used only when dealing with big loops).In python also the old rule that 'multiprocessing must be used only when dealing with independent objects' applies.
+
+def func3():
+	for each_gapp in already_selected_gapps:
+            gapp_name = each_gapp["name"]
+            if gapp_name in gapps_list:
+                gapps_list_remove(gapp_name)
+
+import multiprocessing
+def func4():
+	processes=[]
+        n1=len(already_selected_gapps)
+        lst1=already_selected_gapps
+        x=mp.cpu_count()
+        n2=n1/x
+        for i in x:#dividing the list (of independent elements) by number of cores and passing each partition to one thread 
+          processes.append(mp.Process(target=multi_,args=(lst1[i*n2:(i+1)*n2])))
+        for i in x:
+          processes[i].start()
+        for i in x:
+          processes[i].join()
+
+def multi_(lst):#the logic of the loop must be put in a function so that each thread can use it
+              for each_gapp in lst:
+                gapp_name = each_gapp["name"]
+
+                if gapp_name in gapps_list:
+                    gapps_list_remove(gapp_name)
+
+
+3)List comprehensions:
+	The best way to visualize list comprehensions is thinking of them as sets in set-builder form. This is an excellent alternative to loops dealing with lists as it results in faster computations.
+
+	A={x^2:x is in (1,2,3)}={1,4,9}
+
+def func5():
+	lst=[]
+	lst2=[]
+	for i in range(500):
+		lst2.append(i)
+	for i in lst2:
+		lst.append(i*i)
+
+def func6():
+	lst2=[]
+	lst2_append_temp=lst2.append
+	for i in range(500):
+		lst2_append_temp(i)
+	lst=[i*i for i in lst2] #see the similarity with set builder form
+
+Using timeit library of python we timed the functions got the following results:
+	func5-0.047894954681396484
+	func6-0.021952867507934570
+(Note that here we are dealing with relatively small data so the time difference may be small but with big data the difference can be huge)
+
+The general format of list comprehension is:
+	[expression for item in old_list if condition]
+
+This  is equivalent to:
+	for item in old_list:
+		if condition:
+			expression
+
+Eg-
+	new_lst = [x**2 for x in old_lst if x%2==0]
+is equivalent to
+	new_lst=[]
+	for x in old_lst:
+		if x%2==0:
+			new_lst.append(x**2)
+Note that here old_lst must be different from new_lst and also that new_lst must be empty (if not new_lst will be replaced)