From 1460d155745574c6930fba59916fad867d167096 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Fri, 22 Dec 2017 18:04:36 +0530 Subject: [PATCH 01/13] Modularized and moved dump-node logic in other file: from group_export into export_logic.py --- .../ndf/management/commands/export_logic.py | 390 +++++++++++++++++ .../ndf/management/commands/group_export.py | 396 +++--------------- 2 files changed, 456 insertions(+), 330 deletions(-) create mode 100644 gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py new file mode 100644 index 0000000000..bb260b2683 --- /dev/null +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py @@ -0,0 +1,390 @@ +import os +import datetime +import subprocess +import re +# from threading import Thread +# import multiprocessing +# import math +from bs4 import BeautifulSoup + +try: + from bson import ObjectId +except ImportError: # old pymongo + from pymongo.objectid import ObjectId +from django.template.defaultfilters import slugify +from django.core.management.base import BaseCommand, CommandError +from schema_mapping import create_factory_schema_mapper +from users_dump_restore import create_users_dump +from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection +from gnowsys_ndf.ndf.models import HistoryManager +from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID +from gnowsys_ndf.ndf.views.methods import get_group_name_id +from gnowsys_ndf.ndf.templatetags.simple_filters import get_latest_git_hash, get_active_branch_name + +GROUP_CONTRIBUTORS = [] +DUMP_PATH = None +TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export') +GROUP_ID = None +DATA_EXPORT_PATH = None +MEDIA_EXPORT_PATH = None +RESTORE_USER_DATA = False +SCHEMA_MAP_PATH = None +log_file = None +historyMgr = HistoryManager() +DUMP_NODES_LIST = [] +DUMPED_NODE_IDS = set() + +def build_rcs(node, collection_name): + ''' + Updates the rcs json with the current node's strcuture that + might have missed due to update queries. + Runs a save() method on the node and calls copy_rcs() + ''' + # import ipdb; ipdb.set_trace() + if node: + global log_file + global GROUP_CONTRIBUTORS + try: + node.save() + if collection_name is node_collection and node.content: + pick_media_from_content(BeautifulSoup(node.content, 'html.parser')) + # if collection_name is triple_collection: + # # if 'attribute_type' in node: + # # triple_node_RT_AT = node_collection.one({'_id': node.attribute_type}) + # # elif 'relation_type' in node: + # # triple_node_RT_AT = node_collection.one({'_id': node.relation_type}) + # # node.save(triple_node=triple_node_RT_AT, triple_id=triple_node_RT_AT._id) + # node.save() + # elif collection_name is node_collection: + # node.save() + # if node.content: + # pick_media_from_content(BeautifulSoup(node.content, 'html.parser')) + # elif collection_name is filehive_collection: + # # dump_node(node_id=node['first_parent'], collection_name=node_collection) + # node.save() + # else: + # node.save() + try: + global RESTORE_USER_DATA + if RESTORE_USER_DATA: + if "contributors" in node: + GROUP_CONTRIBUTORS.extend(node.contributors) + except Exception as no_contributors_err: + log_file.write( "\n Error while fetching contributors " + str(no_contributors_err) +\ + " for: " + str(node._id) + " with contributors: " + str(node.contributors)) + pass + log_file.write( "\n RCS Built for " + str(node._id)) + copy_rcs(node) + except Exception as buildRCSError: + error_log = "\n !!! Error found while Building RCS ." + error_log += "\nError: " + str(buildRCSError) + log_file.write( str(error_log)) + print error_log + pass + +def find_file_from_media_url(source_attr): + try: + global log_file + log_file.write( "\n find_file_from_media_url invoked for: " + str(source_attr)) + + if "media" in source_attr: + source_attr = source_attr.split("media/")[-1] + file_node = node_collection.find_one({"$or": [{'if_file.original.relurl': source_attr}, + {'if_file.mid.relurl': source_attr},{'if_file.thumbnail.relurl': source_attr}]}) + + elif "readDoc" in source_attr: + split_src = source_attr.split('/') + node_id = split_src[split_src.index('readDoc') + 1] + file_node = node_collection.one({'_id': ObjectId(node_id)}) + + if file_node: + log_file.write( "\n media file_node gs found: " + str(file_node._id)) + get_file_node_details(file_node) + + except Exception as find_file_from_media_url_err: + error_log = "\n !!! Error found while taking dump in find_file_from_media_url() ." + error_log += "\nError: " + str(find_file_from_media_url_err) + print "\n Error: ", error_log + log_file.write( str(error_log)) + print error_log + pass + +def pick_media_from_content(content_soup): + ''' + Parses through the content of node and finds the media + files and dump it + ''' + try: + global log_file + log_file.write( "\n pick_media_from_content invoked.") + + all_src = content_soup.find_all(src=re.compile('media|readDoc')) + # Fetching the files + for each_src in all_src: + src_attr = each_src["src"] + find_file_from_media_url(src_attr) + + all_transcript_data = content_soup.find_all(attrs={'class':'transcript'}) + for each_transcript in all_transcript_data: + data_ele = each_transcript.findNext('object',data=True) + if data_ele: + if 'media' in data_ele['data']: + find_file_from_media_url(data_ele['data']) + + all_transcript_data = content_soup.find_all(attrs={'class':'transcript-data'}) + for each_transcript in all_transcript_data: + data_ele = each_transcript.findNext('object',data=True) + if data_ele: + if 'media' in data_ele['data']: + find_file_from_media_url(data_ele['data']) + except Exception as pick_media_err: + error_log = "\n !!! Error found in pick_media_from_content()." + error_log += "\nError: " + str(pick_media_err) + print "\n Error: ", error_log + log_file.write( str(error_log)) + print error_log + pass + +def copy_rcs(node): + ''' + Actual copying of RCS files from /data/rcs-repo/ to export_path/rcs-repo + of the nodes called from dump_node() and build_rcs() + ''' + if node: + global log_file + try: + # To update RCS + # import ipdb; ipdb.set_trace() + path = historyMgr.get_file_path(node) + path = path + ",v" + + if not os.path.exists(path): + path = historyMgr.get_file_path(node) + path = path + ",v" + # log_file.write( "\n RCS Copied " + str(path) + print "\nDATA_EXPORT_PATH: ", DATA_EXPORT_PATH + cp = "cp -vu " + path + " " +" --parents " + DATA_EXPORT_PATH + "/" + subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) + + + except Exception as copyRCSError: + error_log = "\n !!! Error found while Copying RCS ." + error_log += "\nError: " + str(copyRCSError) + log_file.write( str(error_log)) + print error_log + pass + +def dumping_call(node, collection_name): + try: + global log_file + global GROUP_ID + global DUMPED_NODE_IDS + log_file.write( "\nDumping Call for : " + str(node)) + if (node._id == GROUP_ID or node._type != "Group") and node._id not in DUMPED_NODE_IDS: + build_rcs(node, collection_name) + + if collection_name == node_collection: + get_triple_data(node._id) + DUMPED_NODE_IDS.add(node._id) + if 'File' in node.member_of_names_list: + get_file_node_details(node, exclude_node=True) + else: + DUMPED_NODE_IDS.add(node._id) + log_file.write( "\n Dump node finished for: " + str(node._id)) + else: + log_file.write( "\n Already dumped node: " + str(node._id)) + + except Exception as dumping_call_err: + error_log = "\n !!! Error found in dumping_call_node() ." + error_log += "\nError: " + str(dumping_call_err) + log_file.write( str(error_log)) + print error_log + pass + +def update_globals(variables_dict): + try: + global GLOBAL_DICT + global GROUP_CONTRIBUTORS + global DUMP_PATH + global TOP_PATH + global GROUP_ID + global DATA_EXPORT_PATH + global MEDIA_EXPORT_PATH + global RESTORE_USER_DATA + global SCHEMA_MAP_PATH + global log_file_path + global DUMP_NODES_LIST + global DUMPED_NODE_IDS + global ROOT_DUMP_NODE_ID + global ROOT_DUMP_NODE_NAME + global MULTI_DUMP + global log_file + + GLOBAL_DICT = variables_dict + GROUP_CONTRIBUTORS = variables_dict.get("GROUP_CONTRIBUTORS", None) + DUMP_PATH = variables_dict.get("DUMP_PATH", None) + TOP_PATH = variables_dict.get("TOP_PATH", None) + GROUP_ID = variables_dict.get("GROUP_ID", None) + DATA_EXPORT_PATH = variables_dict.get("DATA_EXPORT_PATH", None) + MEDIA_EXPORT_PATH = variables_dict.get("MEDIA_EXPORT_PATH", None) + RESTORE_USER_DATA = variables_dict.get("RESTORE_USER_DATA", None) + SCHEMA_MAP_PATH = variables_dict.get("SCHEMA_MAP_PATH", None) + log_file_path = variables_dict.get("log_file_path", None) + if log_file_path: + log_file = open(log_file_path, 'a+') + log_file.write("\n######### Export-Logic entered at : " + str(datetime.datetime.now()) + " #########\n\n") + + DUMP_NODES_LIST = variables_dict.get("DUMP_NODES_LIST", None) + DUMPED_NODE_IDS = variables_dict.get("DUMPED_NODE_IDS", None) + ROOT_DUMP_NODE_ID = variables_dict.get("ROOT_DUMP_NODE_ID", None) + ROOT_DUMP_NODE_NAME = variables_dict.get("ROOT_DUMP_NODE_NAME", None) + MULTI_DUMP = variables_dict.get("MULTI_DUMP", None) + + except Exception as globals_err: + print "\n Error in update_globals() in export_logic.py: ", globals_err + pass + +def dump_node(collection_name=node_collection, node=None, node_id=None, node_id_list=None, variables_dict=None): + ''' + Receives all nodes pertaining to exporting group belonging to all existing collections. + Calls build_rcs. + ''' + try: + if variables_dict: + update_globals(variables_dict) + global DATA_EXPORT_PATH + global MEDIA_EXPORT_PATH + global log_file + log_file.write( "\n dump_node invoked for: " + str(collection_name)) + if node: + dumping_call(node,collection_name) + elif node_id: + log_file.write( "\tNode_id : " + str(node_id)) + node = collection_name.one({'_id': ObjectId(node_id), '_type': {'$nin': ['Group', 'Author']}}) + if node: + dumping_call(node,collection_name) + + elif node_id_list: + node_cur = collection_name.one({'_id': {'$in': node_id_list}, '_type': {'$nin': ['Group', 'Author']}}) + log_file.write( "\tNode_id_list : " + str(node_id_list)) + for each_node in nodes_cur: + if each_node: + dumping_call(node,collection_name) + + except Exception as dump_err: + error_log = "\n !!! Error found while taking dump in dump_node() ." + error_log += "\nError: " + str(dump_err) + log_file.write( str(error_log)) + print error_log + pass + +def dump_media_data(media_path): + # Copy media file to /data/media location + # print MEDIA_EXPORT_PATH + global log_file + log_file.write( "\n--- Media Copying in process --- "+ str(media_path)) + try: + if media_path: + fp = os.path.join(MEDIA_ROOT,media_path) + if os.path.exists(fp): + cp = "cp -u " + fp + " " +" --parents " + MEDIA_EXPORT_PATH + "/" + subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) + log_file.write( "\n Media Copied: " + str(fp)) + + else: + log_file.write( "\n Media NOT Copied: " + str(fp)) + else: + log_file.write( "\n No MediaPath found: " + str(media_path)) + except Exception as dumpMediaError: + error_log = "\n !!! Error found while taking dump of Media.\n" + str(media_path) + error_log += "\nError: " + str(dumpMediaError) + log_file.write( str(error_log)) + print error_log + pass + +def get_triple_data(node_id): + ''' + Gets all data stored in triples for this node. + Fetches GAttrtibutes as wells as GRelations. + ''' + try: + global log_file + log_file.write( "\n get_triple_data invoked for: " + str(node_id)) + + triple_query = {"_type": {'$in': ["GAttribute", "GRelation"]}, "subject": ObjectId(node_id)} + + node_gattr_grel_cur = triple_collection.find(triple_query) + if node_gattr_grel_cur: + for each_triple_node in node_gattr_grel_cur: + fetch_value = None + dump_node(node=each_triple_node, + collection_name=triple_collection) + + # Get ObjectIds in object_value fields + + if each_triple_node._type == u"GAttribute": + fetch_value = "object_value" + elif each_triple_node._type == u"GRelation": + fetch_value = "right_subject" + log_file.write( "\n fetch_value: " + str(fetch_value)) + if fetch_value == "right_subject": + log_file.write( "\n Picking up right-subject nodes.\n\t " + str(each_triple_node[fetch_value])) + + if type(each_triple_node[fetch_value]) == list and all(isinstance(each_obj_value, ObjectId) for each_obj_value in each_triple_node[fetch_value]): + log_file.write( "\n List: " + str(True)) + dump_node(node_id_list=each_triple_node[fetch_value], + collection_name=node_collection) + + elif isinstance(each_triple_node[fetch_value], ObjectId): + log_file.write( "\n ObjectId: " + str(True)) + dump_node(node_id=each_triple_node[fetch_value], + collection_name=node_collection) + + log_file.write( "\n get_triple_data finished for: " + str(node_id)) + + except Exception as get_triple_data_err: + error_log = "\n !!! Error found while taking triple data in get_triple_data() ." + error_log += "\nError: " + str(get_triple_data_err) + print "\n Error: ", error_log + log_file.write( str(error_log)) + print error_log + pass + + + +def get_file_node_details(node, exclude_node=False): + ''' + Check if_file field and take its dump + 'if_file': { + 'mime_type': basestring, + 'original': {'id': ObjectId, 'relurl': basestring}, + 'mid': {'id': ObjectId, 'relurl': basestring}, + 'thumbnail': {'id': ObjectId, 'relurl': basestring} + }, + + ''' + try: + global log_file + log_file.write( "\n get_file_node_details invoked for: " + str(node)) + if not exclude_node: + dump_node(node=node, collection_name=node_collection) + dump_node(node_id=node.if_file['original']['id'], collection_name=filehive_collection) + dump_node(node_id=node.if_file['mid']['id'], collection_name=filehive_collection) + dump_node(node_id=node.if_file['thumbnail']['id'], collection_name=filehive_collection) + dump_media_data(node.if_file['original']['relurl']) + dump_media_data(node.if_file['mid']['relurl']) + dump_media_data(node.if_file['thumbnail']['relurl']) + # if each_field == 'group_set': + # for each_grp_id in node.group_set: + # group_node = node_collection.find_one({"_id":ObjectId(each_grp_id)}) + # if group_node and group_node._type != unicode('Author'): + # group_set.extend(group_node.group_set) + # if each_field == 'author_set': + # user_list.extend(node.author_set) + except Exception as file_dump_err: + error_log = "\n !!! Error found while taking dump in get_file_node_details() ." + error_log += "\nError: " + str(file_dump_err) + log_file.write( str(error_log)) + print error_log + pass + diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py index 7715c9b2eb..fe7cdb4269 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py @@ -13,11 +13,12 @@ from pymongo.objectid import ObjectId from django.template.defaultfilters import slugify from django.core.management.base import BaseCommand, CommandError +from schema_mapping import create_factory_schema_mapper +from users_dump_restore import create_users_dump +from export_logic import dump_node from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection from gnowsys_ndf.ndf.models import HistoryManager from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID -from schema_mapping import create_factory_schema_mapper -from users_dump_restore import create_users_dump from gnowsys_ndf.ndf.views.methods import get_group_name_id from gnowsys_ndf.ndf.templatetags.simple_filters import get_latest_git_hash, get_active_branch_name @@ -31,6 +32,7 @@ RESTORE_USER_DATA = False SCHEMA_MAP_PATH = None log_file = None +log_file_path = None historyMgr = HistoryManager() DUMP_NODES_LIST = [] DUMPED_NODE_IDS = set() @@ -46,7 +48,7 @@ def create_log_file(dump_node_id): log_file_name = 'group_dump_' + str(dump_node_id)+ '.log' if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): os.makedirs(GSTUDIO_LOGS_DIR_PATH) - + global log_file_path log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) # print log_file_path global log_file @@ -97,54 +99,6 @@ def write_md5_of_dump(group_dump_path, configs_file_path): with open(configs_file_path, 'a+') as configs_file_out: configs_file_out.write("\nMD5='" + str(md5hash) + "'") -def get_triple_data(node_id): - ''' - Gets all data stored in triples for this node. - Fetches GAttrtibutes as wells as GRelations. - ''' - try: - global log_file - log_file.write("\n get_triple_data invoked for: " + str(node_id)) - - triple_query = {"_type": {'$in': ["GAttribute", "GRelation"]}, "subject": ObjectId(node_id)} - - node_gattr_grel_cur = triple_collection.find(triple_query) - if node_gattr_grel_cur: - for each_triple_node in node_gattr_grel_cur: - fetch_value = None - dump_node(node=each_triple_node, - collection_name=triple_collection) - # Get ObjectIds in object_value fields - - if each_triple_node._type == u"GAttribute": - fetch_value = "object_value" - elif each_triple_node._type == u"GRelation": - fetch_value = "right_subject" - log_file.write("\n fetch_value: " + str(fetch_value)) - if fetch_value == "right_subject": - log_file.write("\n Picking up right-subject nodes.\n\t " + str(each_triple_node[fetch_value])) - - if type(each_triple_node[fetch_value]) == list and all(isinstance(each_obj_value, ObjectId) for each_obj_value in each_triple_node[fetch_value]): - log_file.write("\n List: " + str(True)) - dump_node(node_id_list=each_triple_node[fetch_value], - collection_name=node_collection) - - elif isinstance(each_triple_node[fetch_value], ObjectId): - log_file.write("\n ObjectId: " + str(True)) - dump_node(node_id=each_triple_node[fetch_value], - collection_name=node_collection) - - log_file.write("\n get_triple_data finished for: " + str(node_id)) - - except Exception as get_triple_data_err: - error_log = "\n !!! Error found while taking triple data in get_triple_data() ." - error_log += "\nError: " + str(get_triple_data_err) - print "\n Error: ", error_log - log_file.write(error_log) - print error_log - pass - - def core_export(group_node): if group_node: print "\tRequest received for Export of : ", group_node.name , ' | ObjectId: ', group_node._id @@ -192,10 +146,26 @@ def call_exit(): print "\n Exiting..." os._exit(0) +def get_nested_ids(node,field_name): + ''' + Recursive function to fetch Objectids from a + particular field of passed node. + field_name can be : collection_set, post_node, prior_node + ''' + global GLOBAL_DICT + if node[field_name]: + for each_id in node[field_name]: + each_node = node_collection.one({"_id":ObjectId(each_id), '_type': {'$nin': ['Group', 'Author']}}) + if each_node and (node._id != each_node._id): + dump_node(node=each_node, collection_name=node_collection, variables_dict=GLOBAL_DICT) + if each_node and each_node[field_name]: + get_nested_ids(each_node, field_name) + def worker_export(nodes_cur): + global GLOBAL_DICT for each_node in nodes_cur: print ".", - dump_node(node=each_node,collection_name=node_collection) + dump_node(node=each_node,collection_name=node_collection,variables_dict=GLOBAL_DICT) # node_collection_ids.add(each_node._id) if each_node.collection_set: @@ -207,16 +177,54 @@ def worker_export(nodes_cur): if each_node.post_node: get_nested_ids(each_node,'post_node') +def update_globals(): + + global GLOBAL_DICT + global GROUP_CONTRIBUTORS + global DUMP_PATH + global TOP_PATH + global GROUP_ID + global DATA_EXPORT_PATH + global MEDIA_EXPORT_PATH + global RESTORE_USER_DATA + global SCHEMA_MAP_PATH + global log_file_path + global DUMP_NODES_LIST + global DUMPED_NODE_IDS + global ROOT_DUMP_NODE_ID + global ROOT_DUMP_NODE_NAME + global MULTI_DUMP + + GLOBAL_DICT = { + "GROUP_CONTRIBUTORS": GROUP_CONTRIBUTORS, + "DUMP_PATH": DUMP_PATH, + "TOP_PATH": TOP_PATH, + "GROUP_ID": GROUP_ID, + "DATA_EXPORT_PATH": DATA_EXPORT_PATH, + "MEDIA_EXPORT_PATH": MEDIA_EXPORT_PATH, + "RESTORE_USER_DATA": RESTORE_USER_DATA, + "SCHEMA_MAP_PATH": SCHEMA_MAP_PATH, + "log_file_path": log_file_path, + "DUMP_NODES_LIST": DUMP_NODES_LIST, + "DUMPED_NODE_IDS": DUMPED_NODE_IDS, + "ROOT_DUMP_NODE_ID": ROOT_DUMP_NODE_ID, + "ROOT_DUMP_NODE_NAME": ROOT_DUMP_NODE_NAME, + "MULTI_DUMP": MULTI_DUMP + } + def call_group_export(group_node, nodes_cur, num_of_processes=5): ''' Introducing multiprocessing to use cores available on the system to take dump of nodes of the entire group. ''' - dump_node(node=group_node,collection_name=node_collection) + global GLOBAL_DICT + global log_file + update_globals() + + dump_node(node=group_node,collection_name=node_collection, variables_dict=GLOBAL_DICT) if group_node.collection_set: get_nested_ids(group_node,'collection_set') - nodes_cur = list(nodes_cur) worker_export(nodes_cur) # print "\nlen(nodes_cur): ", len(nodes_cur) @@ -251,278 +259,6 @@ def call_group_export(group_node, nodes_cur, num_of_processes=5): # return resultlist -def build_rcs(node, collection_name): - ''' - Updates the rcs json with the current node's strcuture that - might have missed due to update queries. - Runs a save() method on the node and calls copy_rcs() - ''' - # import ipdb; ipdb.set_trace() - if node: - global log_file - global GROUP_CONTRIBUTORS - try: - node.save() - if collection_name is node_collection and node.content: - pick_media_from_content(BeautifulSoup(node.content, 'html.parser')) - - # if collection_name is triple_collection: - # # if 'attribute_type' in node: - # # triple_node_RT_AT = node_collection.one({'_id': node.attribute_type}) - # # elif 'relation_type' in node: - # # triple_node_RT_AT = node_collection.one({'_id': node.relation_type}) - # # node.save(triple_node=triple_node_RT_AT, triple_id=triple_node_RT_AT._id) - # node.save() - # elif collection_name is node_collection: - # node.save() - # if node.content: - # pick_media_from_content(BeautifulSoup(node.content, 'html.parser')) - # elif collection_name is filehive_collection: - # # dump_node(node_id=node['first_parent'], collection_name=node_collection) - # node.save() - # else: - # node.save() - try: - global RESTORE_USER_DATA - if RESTORE_USER_DATA: - if "contributors" in node: - GROUP_CONTRIBUTORS.extend(node.contributors) - except Exception as no_contributors_err: - log_file.write("\n Error while fetching contributors " + str(no_contributors_err) +\ - " for: " + str(node._id) + " with contributors: " + str(node.contributors)) - pass - log_file.write("\n RCS Built for " + str(node._id) ) - copy_rcs(node) - except Exception as buildRCSError: - error_log = "\n !!! Error found while Building RCS ." - error_log += "\nError: " + str(buildRCSError) - log_file.write(error_log) - print error_log - pass - -def find_file_from_media_url(source_attr): - try: - global log_file - log_file.write("\n find_file_from_media_url invoked for: " + str(source_attr)) - - if "media" in source_attr: - source_attr = source_attr.split("media/")[-1] - file_node = node_collection.find_one({"$or": [{'if_file.original.relurl': source_attr}, - {'if_file.mid.relurl': source_attr},{'if_file.thumbnail.relurl': source_attr}]}) - - elif "readDoc" in source_attr: - split_src = source_attr.split('/') - node_id = split_src[split_src.index('readDoc') + 1] - file_node = node_collection.one({'_id': ObjectId(node_id)}) - - if file_node: - log_file.write("\n media file_node gs found: " + str(file_node._id) ) - get_file_node_details(file_node) - - except Exception as find_file_from_media_url_err: - error_log = "\n !!! Error found while taking dump in find_file_from_media_url() ." - error_log += "\nError: " + str(find_file_from_media_url_err) - print "\n Error: ", error_log - log_file.write(error_log) - print error_log - pass - -def pick_media_from_content(content_soup): - ''' - Parses through the content of node and finds the media - files and dump it - ''' - try: - global log_file - log_file.write("\n pick_media_from_content invoked.") - - all_src = content_soup.find_all(src=re.compile('media|readDoc')) - # Fetching the files - for each_src in all_src: - src_attr = each_src["src"] - find_file_from_media_url(src_attr) - - all_transcript_data = content_soup.find_all(attrs={'class':'transcript'}) - for each_transcript in all_transcript_data: - data_ele = each_transcript.findNext('object',data=True) - if data_ele: - if 'media' in data_ele['data']: - find_file_from_media_url(data_ele['data']) - - all_transcript_data = content_soup.find_all(attrs={'class':'transcript-data'}) - for each_transcript in all_transcript_data: - data_ele = each_transcript.findNext('object',data=True) - if data_ele: - if 'media' in data_ele['data']: - find_file_from_media_url(data_ele['data']) - except Exception as pick_media_err: - error_log = "\n !!! Error found in pick_media_from_content()." - error_log += "\nError: " + str(pick_media_err) - print "\n Error: ", error_log - log_file.write(error_log) - print error_log - pass - -def copy_rcs(node): - ''' - Actual copying of RCS files from /data/rcs-repo/ to export_path/rcs-repo - of the nodes called from dump_node() and build_rcs() - ''' - if node: - global log_file - try: - # To update RCS - path = historyMgr.get_file_path(node) - path = path + ",v" - - if not os.path.exists(path): - path = historyMgr.get_file_path(node) - path = path + ",v" - - cp = "cp -vu " + path + " " +" --parents " + DATA_EXPORT_PATH + "/" - subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) - - log_file.write("\n RCS Copied " + str(path) ) - - except Exception as copyRCSError: - error_log = "\n !!! Error found while Copying RCS ." - error_log += "\nError: " + str(copyRCSError) - log_file.write(error_log) - print error_log - pass - -def dumping_call(node, collection_name): - try: - global log_file - global GROUP_ID - global DUMPED_NODE_IDS - log_file.write("\nDumping Call for : " + str(node)) - if (node._id == GROUP_ID or node._type != "Group") and node._id not in DUMPED_NODE_IDS: - build_rcs(node, collection_name) - - if collection_name == node_collection: - get_triple_data(node._id) - DUMPED_NODE_IDS.add(node._id) - if 'File' in node.member_of_names_list: - get_file_node_details(node, exclude_node=True) - else: - DUMPED_NODE_IDS.add(node._id) - log_file.write("\n Dump node finished for: " + str(node._id) ) - else: - log_file.write("\n Already dumped node: " + str(node._id) ) - - except Exception as dumping_call_err: - error_log = "\n !!! Error found in dumping_call_node() ." - error_log += "\nError: " + str(dumping_call_err) - log_file.write(error_log) - print error_log - pass - -def dump_node(collection_name=node_collection, node=None, node_id=None, node_id_list=None): - ''' - Receives all nodes pertaining to exporting group belonging to all existing collections. - Calls build_rcs. - ''' - try: - global log_file - log_file.write("\n dump_node invoked for: " + str(collection_name)) - if node: - dumping_call(node,collection_name) - elif node_id: - log_file.write("\tNode_id : " + str(node_id)) - node = collection_name.one({'_id': ObjectId(node_id), '_type': {'$nin': ['Group', 'Author']}}) - if node: - dumping_call(node,collection_name) - - elif node_id_list: - node_cur = collection_name.one({'_id': {'$in': node_id_list}, '_type': {'$nin': ['Group', 'Author']}}) - log_file.write("\tNode_id_list : " + str(node_id_list)) - for each_node in nodes_cur: - if each_node: - dumping_call(node,collection_name) - - except Exception as dump_err: - error_log = "\n !!! Error found while taking dump in dump_node() ." - error_log += "\nError: " + str(dump_err) - log_file.write(error_log) - print error_log - pass - -def dump_media_data(media_path): - # Copy media file to /data/media location - # print MEDIA_EXPORT_PATH - global log_file - log_file.write("\n--- Media Copying in process --- "+ str(media_path)) - try: - if media_path: - fp = os.path.join(MEDIA_ROOT,media_path) - if os.path.exists(fp): - cp = "cp -u " + fp + " " +" --parents " + MEDIA_EXPORT_PATH + "/" - subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) - log_file.write("\n Media Copied: " + str(fp) ) - - else: - log_file.write("\n Media NOT Copied: " + str(fp) ) - else: - log_file.write("\n No MediaPath found: " + str(media_path) ) - - except Exception as dumpMediaError: - error_log = "\n !!! Error found while taking dump of Media.\n" + str(media_path) - error_log += "\nError: " + str(dumpMediaError) - log_file.write(error_log) - print error_log - pass - -def get_file_node_details(node, exclude_node=False): - ''' - Check if_file field and take its dump - 'if_file': { - 'mime_type': basestring, - 'original': {'id': ObjectId, 'relurl': basestring}, - 'mid': {'id': ObjectId, 'relurl': basestring}, - 'thumbnail': {'id': ObjectId, 'relurl': basestring} - }, - - ''' - try: - global log_file - log_file.write("\n get_file_node_details invoked for: " + str(node)) - if not exclude_node: - dump_node(node=node, collection_name=node_collection) - dump_node(node_id=node.if_file['original']['id'], collection_name=filehive_collection) - dump_node(node_id=node.if_file['mid']['id'], collection_name=filehive_collection) - dump_node(node_id=node.if_file['thumbnail']['id'], collection_name=filehive_collection) - dump_media_data(node.if_file['original']['relurl']) - dump_media_data(node.if_file['mid']['relurl']) - dump_media_data(node.if_file['thumbnail']['relurl']) - # if each_field == 'group_set': - # for each_grp_id in node.group_set: - # group_node = node_collection.find_one({"_id":ObjectId(each_grp_id)}) - # if group_node and group_node._type != unicode('Author'): - # group_set.extend(group_node.group_set) - # if each_field == 'author_set': - # user_list.extend(node.author_set) - except Exception as file_dump_err: - error_log = "\n !!! Error found while taking dump in get_file_node_details() ." - error_log += "\nError: " + str(file_dump_err) - log_file.write(error_log) - print error_log - pass - -def get_nested_ids(node,field_name): - ''' - Recursive function to fetch Objectids from a - particular field of passed node. - field_name can be : collection_set, post_node, prior_node - ''' - if node[field_name]: - for each_id in node[field_name]: - each_node = node_collection.one({"_id":ObjectId(each_id), '_type': {'$nin': ['Group', 'Author']}}) - if each_node and (node._id != each_node._id): - dump_node(node=each_node, collection_name=node_collection) - if each_node and each_node[field_name]: - get_nested_ids(each_node, field_name) - def get_counter_ids(group_id): ''' Fetch all the Counter instances of the exporting Group @@ -530,7 +266,7 @@ def get_counter_ids(group_id): counter_collection_cur = counter_collection.find({'group_id':ObjectId(group_id)}) if counter_collection_cur : for each_obj in counter_collection_cur : - dump_node(node=each_obj,collection_name=counter_collection) + dump_node(node=each_obj,collection_name=counter_collection, variables_dict=GLOBAL_DICT) class Command(BaseCommand): def handle(self, *args, **options): @@ -539,12 +275,12 @@ def handle(self, *args, **options): global ROOT_DUMP_NODE_ID global ROOT_DUMP_NODE_NAME global MULTI_DUMP + global GLOBAL_DICT input_name_or_id = raw_input("\n\tPlease enter ObjectID of the Group: ") dump_node_obj = node_collection.one({'_id': ObjectId(input_name_or_id)}) group_node = None if dump_node_obj: - # import ipdb; ipdb.set_trace() log_file_path = create_log_file(dump_node_obj._id) ROOT_DUMP_NODE_ID = dump_node_obj._id ROOT_DUMP_NODE_NAME = dump_node_obj.name @@ -554,8 +290,8 @@ def handle(self, *args, **options): SCHEMA_MAP_PATH = DUMP_PATH create_factory_schema_mapper(SCHEMA_MAP_PATH) else: - global DUMP_NODE_objS_LIST global TOP_PATH + global DUMP_NODES_LIST datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export', slugify(dump_node_obj.name) + "_"+ str(datetimestamp)) SCHEMA_MAP_PATH = TOP_PATH @@ -582,7 +318,7 @@ def handle(self, *args, **options): core_export(each_unit) else: call_exit() - dump_node(node=dump_node_obj,collection_name=node_collection) + dump_node(node=dump_node_obj,collection_name=node_collection, variables_dict=GLOBAL_DICT) create_factory_schema_mapper(SCHEMA_MAP_PATH) print "*"*70 print "\n This will take few minutes. Please be patient.\n" From 1d230a435f6786db9d576d64f7598d4bd753663e Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Mon, 1 Jan 2018 16:39:55 +0530 Subject: [PATCH 02/13] node-level dump-restore --- .../ndf/management/commands/node_export.py | 194 ++++++ .../ndf/management/commands/node_import.py | 604 ++++++++++++++++++ 2 files changed, 798 insertions(+) create mode 100644 gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py create mode 100644 gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py new file mode 100644 index 0000000000..ce46604136 --- /dev/null +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py @@ -0,0 +1,194 @@ +import os +import datetime +import subprocess +import re +# from threading import Thread +# import multiprocessing +# import math +from bs4 import BeautifulSoup + +try: + from bson import ObjectId +except ImportError: # old pymongo + from pymongo.objectid import ObjectId +from django.template.defaultfilters import slugify +from django.core.management.base import BaseCommand, CommandError +from schema_mapping import create_factory_schema_mapper +from users_dump_restore import create_users_dump +from export_logic import dump_node +from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection +from gnowsys_ndf.ndf.models import HistoryManager +from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID +from gnowsys_ndf.ndf.views.methods import get_group_name_id +from gnowsys_ndf.ndf.templatetags.simple_filters import get_latest_git_hash, get_active_branch_name + +# global variables declaration +GROUP_CONTRIBUTORS = [] +DUMP_PATH = None +TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export') +node_id = None +DATA_EXPORT_PATH = None +MEDIA_EXPORT_PATH = None +RESTORE_USER_DATA = False +SCHEMA_MAP_PATH = None +log_file = None +log_file_path = None +historyMgr = HistoryManager() +DUMP_NODES_LIST = [] +DUMPED_NODE_IDS = set() +ROOT_DUMP_NODE_ID = None +ROOT_DUMP_NODE_NAME = None +MULTI_DUMP = False + +def create_log_file(dump_node_id): + ''' + Creates log file in gstudio-logs/ with + the name of the dump folder + ''' + log_file_name = 'node_export_' + str(dump_node_id)+ '.log' + if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): + os.makedirs(GSTUDIO_LOGS_DIR_PATH) + global log_file_path + log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) + # print log_file_path + global log_file + log_file = open(log_file_path, 'w+') + log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") + return log_file_path + +def setup_dump_path(node_id): + ''' + Creates factory_schema.json which will hold basic info + like ObjectId, name, type of TYPES_LIST and GSTUDIO_DEFAULT_GROUPS + ''' + global DUMP_PATH + global TOP_PATH + global DATA_EXPORT_PATH + global MEDIA_EXPORT_PATH + # datetimestamp = datetime.datetime.now().isoformat() + datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") + DUMP_PATH = TOP_PATH + DATA_EXPORT_PATH = os.path.join(DUMP_PATH, 'dump') + MEDIA_EXPORT_PATH = os.path.join(DATA_EXPORT_PATH, 'media_files') + if not os.path.exists(DATA_EXPORT_PATH): + os.makedirs(DATA_EXPORT_PATH) + if not os.path.exists(MEDIA_EXPORT_PATH): + os.makedirs(MEDIA_EXPORT_PATH) + return DATA_EXPORT_PATH + +def create_configs_file(node_id): + global RESTORE_USER_DATA + global DUMP_PATH + configs_file_path = os.path.join(DUMP_PATH, "migration_configs.py") + with open(configs_file_path, 'w+') as configs_file_out: + configs_file_out.write("\nRESTORE_USER_DATA=" + str(RESTORE_USER_DATA)) + configs_file_out.write("\nGSTUDIO_INSTITUTE_ID='" + str(GSTUDIO_INSTITUTE_ID) + "'") + configs_file_out.write("\nNODE_ID='" + str(node_id) + "'") + configs_file_out.write("\nROOT_DUMP_NODE_NAME='" + str(ROOT_DUMP_NODE_NAME) + "'") + configs_file_out.write("\nGIT_COMMIT_HASH='" + str(get_latest_git_hash()) + "'") + configs_file_out.write("\nGIT_BRANCH_NAME='" + str(get_active_branch_name()) + "'") + configs_file_out.write('\nSYSTEM_DETAILS="' + str(os.uname()) + '"') + return configs_file_path + +def write_md5_of_dump(group_dump_path, configs_file_path): + global DUMP_PATH + from checksumdir import dirhash + md5hash = dirhash(group_dump_path, 'md5') + with open(configs_file_path, 'a+') as configs_file_out: + configs_file_out.write("\nMD5='" + str(md5hash) + "'") + + + +def call_exit(): + print "\n Exiting..." + os._exit(0) + +def update_globals(): + global GLOBAL_DICT + global GROUP_CONTRIBUTORS + global DUMP_PATH + global TOP_PATH + global node_id + global DATA_EXPORT_PATH + global MEDIA_EXPORT_PATH + global RESTORE_USER_DATA + global SCHEMA_MAP_PATH + global log_file_path + global DUMP_NODES_LIST + global DUMPED_NODE_IDS + global ROOT_DUMP_NODE_ID + global ROOT_DUMP_NODE_NAME + global MULTI_DUMP + + GLOBAL_DICT = { + "GROUP_CONTRIBUTORS": GROUP_CONTRIBUTORS, + "DUMP_PATH": DUMP_PATH, + "TOP_PATH": TOP_PATH, + "node_id": node_id, + "DATA_EXPORT_PATH": DATA_EXPORT_PATH, + "MEDIA_EXPORT_PATH": MEDIA_EXPORT_PATH, + "RESTORE_USER_DATA": RESTORE_USER_DATA, + "SCHEMA_MAP_PATH": SCHEMA_MAP_PATH, + "log_file_path": log_file_path, + "DUMP_NODES_LIST": DUMP_NODES_LIST, + "DUMPED_NODE_IDS": DUMPED_NODE_IDS, + "ROOT_DUMP_NODE_ID": ROOT_DUMP_NODE_ID, + "ROOT_DUMP_NODE_NAME": ROOT_DUMP_NODE_NAME, + "MULTI_DUMP": MULTI_DUMP + } + + +class Command(BaseCommand): + def handle(self, *args, **options): + global SCHEMA_MAP_PATH + global DUMP_PATH + global ROOT_DUMP_NODE_ID + global ROOT_DUMP_NODE_NAME + global MULTI_DUMP + global GLOBAL_DICT + input_name_or_id = raw_input("\n\tPlease enter ObjectID of the Node: ") + dump_node_obj = node_collection.one({'_id': ObjectId(input_name_or_id), '_type': 'GSystem'}) + + if dump_node_obj: + log_file_path = create_log_file(dump_node_obj._id) + ROOT_DUMP_NODE_ID = dump_node_obj._id + ROOT_DUMP_NODE_NAME = dump_node_obj.name + + global TOP_PATH + global DUMP_NODES_LIST + datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") + TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export', slugify(dump_node_obj.name) + "_"+ str(datetimestamp)) + SCHEMA_MAP_PATH = TOP_PATH + print "\tRequest received for Export of : ", dump_node_obj.name , ' | ObjectId: ', dump_node_obj._id + global RESTORE_USER_DATA + user_data_dump = raw_input("\n\tDo you want to include Users in this export ? Enter y/Y to continue:\t ") + if user_data_dump in ['y', 'Y']: + RESTORE_USER_DATA = True + else: + RESTORE_USER_DATA = False + + print "START : ", str(datetime.datetime.now()) + group_dump_path = setup_dump_path(slugify(dump_node_obj._id)) + + global node_id + node_id = dump_node_obj._id + if RESTORE_USER_DATA: + create_users_dump(group_dump_path, dump_node_obj.contributors) + + configs_file_path = create_configs_file(dump_node_obj._id) + write_md5_of_dump(group_dump_path, configs_file_path) + global log_file + + log_file.write("\n*************************************************************") + log_file.write("\n######### Script Completed at : " + str(datetime.datetime.now()) + " #########\n\n") + print "END : ", str(datetime.datetime.now()) + update_globals() + dump_node(node=dump_node_obj,collection_name=node_collection, variables_dict=GLOBAL_DICT) + create_factory_schema_mapper(SCHEMA_MAP_PATH) + + print "*"*70 + print "\n This will take few minutes. Please be patient.\n" + print "\n Log will be found at: ", log_file_path + print "*"*70 + log_file.close() + call_exit() \ No newline at end of file diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py new file mode 100644 index 0000000000..7476f3d491 --- /dev/null +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py @@ -0,0 +1,604 @@ +''' +Import can also be called using command line args as following: + python manage.py node_import + for e.g: + python manage.py node_import y y y +''' +import os +import json +import imp +import subprocess +from bson import json_util +import pathlib2 +try: + from bson import ObjectId +except ImportError: # old pymongo + from pymongo.objectid import ObjectId + +import time +import datetime + +# from bson.json_util import dumps,loads,object_hook +from django.core.management.base import BaseCommand, CommandError +from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection +from gnowsys_ndf.ndf.models import HistoryManager, RCS +from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID, RCS_REPO_DIR +from users_dump_restore import load_users_dump +from gnowsys_ndf.settings import RCS_REPO_DIR_HASH_LEVEL +from schema_mapping import update_factory_schema_mapper +from gnowsys_ndf.ndf.views.utils import replace_in_list, merge_lists_and_maintain_unique_ele + +# global variables declaration +DATA_RESTORE_PATH = None +DATA_DUMP_PATH = None +DEFAULT_USER_ID = 1 +DEFAULT_USER_SET = False +USER_ID_MAP = {} +SCHEMA_ID_MAP = {} +log_file = None +CONFIG_VARIABLES = None +DATE_AT_IDS = [] +GROUP_CONTAINERS = ['Module'] +date_related_at_cur = node_collection.find({'_type': 'AttributeType', + 'name': {'$in': ["start_time", "end_time", "start_enroll", "end_enroll"]}}) +for each_date_related_at in date_related_at_cur: + DATE_AT_IDS.append(each_date_related_at._id) +history_manager = HistoryManager() +rcs = RCS() + + +''' +Following will be available: + CONFIG_VARIABLES.FORK=True + CONFIG_VARIABLES.CLONE=False + CONFIG_VARIABLES.RESTORE_USER_DATA=True + CONFIG_VARIABLES.GSTUDIO_INSTITUTE_ID='MZ-10' + CONFIG_VARIABLES.NODE_ID='58dded48cc566201992f6e79' + CONFIG_VARIABLES.MD5='aeba0e3629fb0443861c699ae327d962' +''' + +def create_log_file(restore_path): + ''' + Creates log file in gstudio-logs/ with + the name of the dump folder + ''' + restore_path = restore_path.split("/")[-1] + log_file_name = 'node_import_' + str(CONFIG_VARIABLES.NODE_ID)+ '.log' + if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): + os.makedirs(GSTUDIO_LOGS_DIR_PATH) + + log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) + global log_file + log_file = open(log_file_path, 'w+') + log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") + return log_file_path + + +def call_exit(): + print "\n Exiting..." + os._exit(0) + +def read_config_file(): + """ + Read migration_configs.py file generated during + the export of group and load the variables in + CONFIG_VARIABLES to be accessible in entire program + """ + global CONFIG_VARIABLES + CONFIG_VARIABLES = imp.load_source('config_variables', + os.path.join(DATA_RESTORE_PATH,'migration_configs.py')) + +def validate_data_dump(*args): + """ + For validation of the exported dump and the + importing data-dump, calculate MD5 and + check with CONFIG_VARIABLES.MD5 + This will ensure the exported data is NOT altered + before importing + """ + global log_file + from checksumdir import dirhash + md5hash = dirhash(DATA_DUMP_PATH, 'md5') + if CONFIG_VARIABLES.MD5 != md5hash: + print "\n MD5 NOT matching." + print "\nargs: ", args + if args and len(args) == 4: + proceed_without_validation = args[1] + else: + proceed_without_validation = raw_input("MD5 not matching. Restoration not recommended.\n \ + Enter (y/Y) to continue ?") + if proceed_without_validation not in ['y', 'Y']: + log_file.write("\n Checksum validation Failed on dump data") + call_exit() + else: + print "\nValidation Success..!" + proceed_with_validation = '' + if args and len(args) == 4: + proceed_without_validation = args[1] + else: + proceed_with_validation = raw_input("MD5 Matching.\n \ + Enter (y/Y) to proceed to restoration") + if proceed_with_validation in ['y', 'Y']: + log_file.write("\n Checksum validation Success on dump data") + +def get_file_path_with_id(node_id): + file_name = (node_id + '.json') + + collection_dir = os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo', 'Nodes') + + # Example: + # if -- file_name := "523f59685a409213818e3ec6.json" + # then -- collection_hash_dirs := "6/c/3/8/ + # -- from last (2^0)pos/(2^1)pos/(2^2)pos/(2^3)pos/../(2^n)pos" + # here n := hash_level_num + collection_hash_dirs = "" + for pos in range(0, RCS_REPO_DIR_HASH_LEVEL): + collection_hash_dirs += \ + (node_id[-2**pos] + "/") + file_path = \ + os.path.join(collection_dir, \ + (collection_hash_dirs + file_name)) + # print "\n\nfilepath: ", file_path + return file_path + +def user_objs_restoration(*args): + global USER_ID_MAP + global DEFAULT_USER_ID + global DEFAULT_USER_SET + global log_file + user_json_data = None + if CONFIG_VARIABLES.RESTORE_USER_DATA: + user_dump_restore = raw_input("\n\tUser dump is available. \ + Would you like to restore it (y/n) ?: ") + if user_dump_restore in ['y', 'Y']: + log_file.write("\n Request for users restoration : Yes.") + + user_json_file_path = os.path.join(DATA_DUMP_PATH, 'users_dump.json') + with open(user_json_file_path, 'rb+') as user_json_fin: + user_json_data = json.loads(user_json_fin.read()) + print "\n Restoring Users. Please wait.." + USER_ID_MAP = load_users_dump(DATA_DUMP_PATH, user_json_data) + log_file.write("\n USER_ID_MAP: "+ str(USER_ID_MAP)) + print "\n Completed Restoring Users." + else: + log_file.write("\n Request for users restoration : No.") + DEFAULT_USER_SET = True + default_user_confirmation = raw_input("\n\tRestoration will use default user-id=1. \ + \n\tEnter y to continue, or n if you want to use some other id?: ") + if default_user_confirmation in ['y', 'Y']: + log_file.write("\n Request for Default user with id=1 : Yes.") + DEFAULT_USER_ID = 1 + else: + log_file.write("\n Request for Default user with id=1 : No.") + DEFAULT_USER_ID = int(raw_input("Enter user-id: ")) + log_file.write("\n Request for Setting Default user with id :" + str(DEFAULT_USER_SET)) + else: + + print "*"*80 + user_dump_restore_default = '' + if args and len(args) == 4: + user_dump_restore_default = args[3] + else: + user_dump_restore_default = raw_input("\n\tUser dump is NOT available. \ + Would you like to use USER_ID=1 for restoration(y/n) ?: ") + if user_dump_restore_default in ['y', 'Y']: + DEFAULT_USER_SET = True + DEFAULT_USER_ID = 1 + print "\n No RESTORE_USER_DATA available. Setting Default user with id: 1" + log_file.write("\n No RESTORE_USER_DATA available. Setting Default user with id :" + str(DEFAULT_USER_SET)) + +def update_schema_id_for_triple(document_json): + if SCHEMA_ID_MAP: + global log_file + log_file.write("\nUpdating schema_id for triple.") + if u'relation_type' in document_json and document_json[u'relation_type'] in SCHEMA_ID_MAP: + log_file.write("\nOLD relation_type id " + str(document_json[u'relation_type'])) + document_json[u'relation_type'] = SCHEMA_ID_MAP[document_json[u'relation_type']] + log_file.write("\nNEW relation_type id " + str(document_json[u'relation_type'])) + if u'attribute_type' in document_json and document_json[u'attribute_type'] in SCHEMA_ID_MAP: + log_file.write("\nOLD attribute_type id " + str(document_json[u'attribute_type'])) + document_json[u'attribute_type'] = SCHEMA_ID_MAP[document_json[u'attribute_type']] + log_file.write("\nNEW attribute_type id " + str(document_json[u'attribute_type'])) + return document_json + +def _mapper(json_obj, key, MAP_obj, is_list=False): + log_file.write("\n Calling _mapper:\n\t " + str(json_obj)+ str(key)+ str(MAP_obj)+ str(is_list)) + + if key in json_obj: + if is_list: + for eu in json_obj[key]: + if eu in MAP_obj: + replace_in_list(json_obj[key],eu, MAP_obj[eu]) + else: + json_obj[key] = MAP_obj[json_obj[key]] + +def update_schema_and_user_ids(document_json): + log_file.write("\n Invoked update_schema_and_user_ids:\n\t " + str(document_json)) + global DEFAULT_USER_SET + global DEFAULT_USER_ID + if SCHEMA_ID_MAP: + _mapper(document_json, 'member_of', SCHEMA_ID_MAP, is_list=True) + _mapper(document_json, 'type_of', SCHEMA_ID_MAP, is_list=True) + + if DEFAULT_USER_SET: + document_json['contributors'] = [DEFAULT_USER_ID] + document_json['created_by'] = DEFAULT_USER_ID + document_json['modified_by'] = DEFAULT_USER_ID + if 'group_admin' in document_json: + document_json['group_admin'] = [DEFAULT_USER_ID] + if 'author_set' in document_json: + document_json['author_set'] = [DEFAULT_USER_ID] + + elif CONFIG_VARIABLES.RESTORE_USER_DATA and USER_ID_MAP: + _mapper(document_json, 'contributors', USER_ID_MAP, is_list=True) + _mapper(document_json, 'group_admin', USER_ID_MAP, is_list=True) + _mapper(document_json, 'author_set', USER_ID_MAP, is_list=True) + _mapper(document_json, 'created_by', USER_ID_MAP) + _mapper(document_json, 'modified_by', USER_ID_MAP) + + log_file.write("\n Finished update_schema_and_user_ids:\n\t " + str(document_json)) + return document_json + + ''' + else: + Schema is same. No updation required. + ''' + +def copy_version_file(filepath): + if os.path.exists(filepath): + cwd_path = os.getcwd() + posix_filepath = pathlib2.Path(filepath) + rcs_data_path = str(pathlib2.Path(*posix_filepath.parts[:7])) + rcs_file_path = str(pathlib2.Path(*posix_filepath.parts[7:])) + os.chdir(rcs_data_path) + cp = "cp -v " + rcs_file_path + " " +" --parents " + RCS_REPO_DIR + "/" + subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) + os.chdir(cwd_path) + + +def restore_filehive_objects(rcs_filehives_path): + print "\nRestoring Filehives.." + global log_file + log_file.write("\nRestoring Filehives. ") + for dir_, _, files in os.walk(rcs_filehives_path): + for filename in files: + filepath = os.path.join(dir_, filename) + fh_json= get_json_file(filepath) + fh_obj = filehive_collection.one({'_id': ObjectId(fh_json['_id'])}) + + if not fh_obj: + copy_version_file(filepath) + log_file.write("\nRCS file copied : \n\t" + str(filepath) ) + try: + log_file.write("\nInserting new Filehive Object : \n\tNew-obj: " + \ + str(fh_json)) + node_id = filehive_collection.collection.insert(fh_json) + # print "\n fh_json: ", fh_json + fh_obj = filehive_collection.one({'_id': node_id}) + + fh_obj.save() + log_file.write("\nUpdate RCS using save()") + except Exception as fh_insert_err: + log_file.write("\nError while inserting FH obj" + str(fh_insert_err)) + pass + else: + log_file.write("\nFound Existing Filehive Object : \n\tFound-obj: " + \ + str(fh_obj) + "\n\tExiting-obj: "+str(fh_json)) + +def restore_node_objects(rcs_nodes_path): + print "\nRestoring Nodes.." + global log_file + log_file.write("\nRestoring Nodes. ") + for dir_, _, files in os.walk(rcs_nodes_path): + for filename in files: + filepath = os.path.join(dir_, filename) + restore_node(filepath) + +def restore_triple_objects(rcs_triples_path): + print "\nRestoring Triples.." + global log_file + log_file.write("\nRestoring Triples. ") + for dir_, _, files in os.walk(rcs_triples_path): + for filename in files: + filepath = os.path.join(dir_, filename) + triple_json = get_json_file(filepath) + triple_obj = None + if triple_json and ('_id' in triple_json): + triple_obj = triple_collection.one({'_id': ObjectId(triple_json['_id'])}) + + if triple_obj: + log_file.write("\n Found Existing Triple : \n\t " + str(triple_obj)) + + + + + + triple_obj = update_schema_id_for_triple(triple_obj) + log_file.write("\n Updated Triple : \n\t " + str(triple_obj)) + triple_obj.save() + if triple_obj._type == "GRelation": + if triple_obj.right_subject != triple_json['right_subject']: + if type(triple_obj.right_subject) == list: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$addToSet': {'right_subject': triple_json['right_subject']}}, + multi=False, upsert=False) + else: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$set': {'right_subject': triple_json['right_subject']}}, + multi=False, upsert=False) + log_file.write("\n GRelation Updated : \n\t OLD: " + str(triple_obj), + "\n\tNew: "+str(triple_json)) + elif triple_obj.status == u"DELETED" and triple_json['status'] == u"PUBLISHED": + triple_obj.status = triple_json['status'] + triple_collection.collection.update( + {'subject': triple_obj.subject, 'relation_type': triple_json['relation_type'], '_id': {'$ne': triple_obj._id}}, + {'$set': {'status': u'DELETED'}}, + multi=True, upsert=False) + + + if triple_obj._type == "GAttribute": + if triple_obj.object_value != triple_json['object_value']: + if type(triple_obj.object_value) == list: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$addToSet': {'object_value': triple_json['object_value']}}, + multi=False, upsert=False) + else: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$set': {'object_value': triple_json['object_value']}}, + multi=False, upsert=False) + log_file.write("\n GAttribute Updated: \n\t OLD: " + str(triple_obj) + "\n\tNew: "+str(triple_json)) + triple_obj.save() + else: + copy_version_file(filepath) + log_file.write("\n RCS file copied : \n\t" + str(filepath)) + + try: + log_file.write("\n Inserting Triple doc : " + str(triple_json)) + triple_json = update_schema_id_for_triple(triple_json) + + node_id = triple_collection.collection.insert(triple_json) + triple_obj = triple_collection.one({'_id': node_id}) + triple_node_RT_AT_id = None + # if 'attribute_type' in triple_json: + # triple_node_RT_AT_id = triple_json['attribute_type'] + # else: + # triple_node_RT_AT_id = triple_json['relation_type'] + # triple_node_RT_AT = node_collection.one({'_id': ObjectId(triple_node_RT_AT_id)}) + # triple_obj.save(triple_node=triple_node_RT_AT, triple_id=triple_node_RT_AT._id) + triple_obj.save() + log_file.write("\nUpdate RCS using save()") + except Exception as tr_insert_err: + log_file.write("\nError while inserting Triple obj" + str(tr_insert_err)) + pass + + +def call_group_import(rcs_repo_path): + + rcs_filehives_path = os.path.join(rcs_repo_path, "Filehives") + rcs_nodes_path = os.path.join(rcs_repo_path, "Nodes") + rcs_triples_path = os.path.join(rcs_repo_path, "Triples") + rcs_counters_path = os.path.join(rcs_repo_path, "Counters") + + # Following sequence is IMPORTANT + # restore_filehive_objects(rcs_filehives_path) + restore_node_objects(rcs_nodes_path) + restore_triple_objects(rcs_triples_path) + + # skip foll. command katkamrachana 21Apr2017 + # Instead run python manage.py fillCounter + # restore_counter_objects(rcs_counters_path) + + +def copy_media_data(media_path): + # MEDIA_ROOT is destination usually: /data/media/ + # media_path is "dump-data/data/media" + if os.path.exists(media_path): + media_copy_cmd = "rsync -avzhP " + media_path + "/* " + MEDIA_ROOT + "/" + subprocess.Popen(media_copy_cmd,stderr=subprocess.STDOUT,shell=True) + log_file.write("\n Media Copied: " + str(media_path) ) + +def core_import(*args): + global log_file + log_file_path = create_log_file(DATA_RESTORE_PATH) + print "\n Log will be found at: ", log_file_path + log_file.write("\nUpdated CONFIG_VARIABLES: "+ str(CONFIG_VARIABLES)) + print "\n Validating the data-dump" + validate_data_dump(*args) + print "\n User Restoration." + user_objs_restoration(*args) + print "\n Factory Schema Restoration. Please wait.." + # print "\n SCHEMA: ", SCHEMA_ID_MAP + call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo')) + copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media')) + +class Command(BaseCommand): + def handle(self, *args, **options): + + global DATA_RESTORE_PATH + global DATA_DUMP_PATH + global SCHEMA_ID_MAP + if args and len(args) == 4: + DATA_RESTORE_PATH = args[0] + else: + DATA_RESTORE_PATH = raw_input("\n\tEnter absolute path of data-dump folder to restore:") + print "\nDATA_RESTORE_PATH: ", DATA_RESTORE_PATH + if os.path.exists(DATA_RESTORE_PATH): + # Check if DATA_DUMP_PATH has dump, if not then its dump of Node holding Groups. + if os.path.exists(os.path.join(DATA_RESTORE_PATH, 'dump')): + # Single Group Dump + DATA_DUMP_PATH = os.path.join(DATA_RESTORE_PATH, 'dump') + SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) + read_config_file() + core_import(*args) + print "*"*70 + # print "\n Export will be found at: ", DATA_EXPORT_PATH + print "\n This will take few minutes. Please be patient.\n" + print "*"*70 + + else: + print "\n No dump found at entered path." + call_exit() + +def restore_node(filepath): + global log_file + log_file.write("\nRestoring Node: " + str(filepath)) + + node_json = get_json_file(filepath) + print node_json + try: + node_obj = node_collection.one({'_id': ObjectId(node_json['_id'])}) + if node_obj: + node_obj = update_schema_and_user_ids(node_obj) + if SCHEMA_ID_MAP: + _mapper(node_obj, 'member_of', SCHEMA_ID_MAP, is_list=True) + _mapper(node_obj, 'type_of', SCHEMA_ID_MAP, is_list=True) + + log_file.write("\nFound Existing Node : " + str(node_obj._id)) + node_changed = False + if node_obj.author_set != node_json['author_set'] and node_json['author_set']: + log_file.write("\n Old author_set :\n\t " + str(node_obj.author_set)) + node_obj.author_set = merge_lists_and_maintain_unique_ele(node_obj.author_set, + node_json['author_set']) + log_file.write("\n New author_set :\n\t "+ str(node_obj.author_set)) + node_changed = True + + if node_obj.relation_set != node_json['relation_set'] and node_json['relation_set']: + log_file.write("\n Old relation_set :\n\t "+ str(node_obj.relation_set)) + node_obj.relation_set = merge_lists_and_maintain_unique_ele(node_obj.relation_set, + node_json['relation_set'], advanced_merge=True) + log_file.write("\n New relation_set :\n\t "+ str(node_obj.relation_set)) + node_changed = True + + if node_obj.attribute_set != node_json['attribute_set'] and node_json['attribute_set']: + log_file.write("\n Old attribute_set :\n\t "+ str(node_obj.attribute_set)) + node_obj.attribute_set = merge_lists_and_maintain_unique_ele(node_obj.attribute_set, + node_json['attribute_set'], advanced_merge=True) + log_file.write("\n New attribute_set :\n\t "+ str(node_obj.attribute_set)) + node_changed = True + + if node_obj.post_node != node_json['post_node'] and node_json['post_node']: + log_file.write("\n Old post_node :\n\t "+ str(node_obj.post_node)) + node_obj.post_node = merge_lists_and_maintain_unique_ele(node_obj.post_node, + node_json['post_node']) + log_file.write("\n New post_node :\n\t "+ str(node_obj.post_node)) + node_changed = True + + # if node_obj.group_set != node_json['group_set'] and node_json['group_set']: + # log_file.write("\n Old group_set :\n\t "+ str(node_obj.group_set)) + # node_obj.group_set = merge_lists_and_maintain_unique_ele(node_obj.group_set, + # node_json['group_set']) + # log_file.write("\n New group_set :\n\t "+ str(node_obj.group_set)) + # node_changed = True + + if node_obj.prior_node != node_json['prior_node'] and node_json['prior_node']: + log_file.write("\n Old prior_node :\n\t "+ str(node_obj.prior_node)) + node_obj.prior_node = merge_lists_and_maintain_unique_ele(node_obj.prior_node, + node_json['prior_node']) + log_file.write("\n New prior_node :\n\t "+ str(node_obj.prior_node)) + node_changed = True + + if node_obj.origin != node_json['origin'] and node_json['origin']: + log_file.write("\n Old origin :\n\t "+ str(node_obj.origin)) + node_obj.origin = merge_lists_and_maintain_unique_ele(node_obj.origin, + node_json['origin']) + log_file.write("\n New origin :\n\t "+ str(node_obj.origin)) + node_changed = True + + # if node_obj.collection_set != node_json['collection_set'] and node_json['collection_set']: + # log_file.write("\n Old collection_set :\n\t "+ str(node_obj.collection_set)) + # log_file.write("\n Requested collection_set :\n\t "+ str(node_json['collection_set'])) + + # # node_obj.collection_set = merge_lists_and_maintain_unique_ele(node_obj.collection_set, + # # node_json['collection_set']) + # node_obj.collection_set = node_json['collection_set'] + # log_file.write("\n New collection_set :\n\t "+ str(node_obj.collection_set)) + # node_changed = True + + if node_obj.content != node_json['content'] and node_json['content']: + log_file.write("\n Old content :\n\t "+ str(node_obj.content)) + node_obj.content = node_json['content'] + node_changed = True + log_file.write("\n New content :\n\t "+ str(node_obj.content)) + + log_file.write("\n Old collection_set :\n\t "+ str(node_obj.collection_set)) + log_file.write("\n Requested collection_set :\n\t "+ str(node_json['collection_set'])) + + # node_obj.collection_set = merge_lists_and_maintain_unique_ele(node_obj.collection_set, + # node_json['collection_set']) + node_obj.collection_set = node_json['collection_set'] + log_file.write("\n New collection_set :\n\t "+ str(node_obj.collection_set)) + node_changed = True + + log_file.write("\n Old group_set :\n\t "+ str(node_obj.group_set)) + + log_file.write("\n New group_set :\n\t "+ str(node_obj.group_set)) + node_obj.access_policy = u'PUBLIC' + log_file.write("\n Setting access_policy: u'PUBLIC'") + node_changed = True + + if node_changed: + log_file.write("\n Node Updated: \n\t OLD: " + str(node_obj) + "\n\tNew: "+str(node_json)) + node_obj.save() + else: + copy_version_file(filepath) + log_file.write("\n RCS file copied : \n\t" + str(filepath)) + node_json = update_schema_and_user_ids(node_json) + node_json = update_group_set(node_json) + try: + log_file.write("\n Inserting Node doc : \n\t" + str(node_json)) + node_id = node_collection.collection.insert(node_json) + node_obj = node_collection.one({'_id': node_id}) + node_obj.save(groupid=ObjectId(node_obj.group_set[0])) + log_file.write("\nUpdate RCS using save()") + except Exception as node_insert_err: + log_file.write("\nError while inserting Node obj" + str(node_insert_err)) + pass + except Exception as restore_node_obj_err: + print "\n Error in restore_node_obj_err: ", restore_node_obj_err + log_file.write("\nOuter Error while inserting Node obj" + str(restore_node_obj_err)) + pass + +def parse_json_values(d): + # This decoder will be moved to models next to class NodeJSONEncoder + if u'uploaded_at' in d: + d[u'uploaded_at'] = datetime.datetime.fromtimestamp(d[u'uploaded_at']/1e3) + if u'last_update' in d: + d[u'last_update'] = datetime.datetime.fromtimestamp(d[u'last_update']/1e3) + if u'created_at' in d: + d[u'created_at'] = datetime.datetime.fromtimestamp(d[u'created_at']/1e3) + if u'attribute_type' in d or u'relation_type' in d: + d = update_schema_id_for_triple(d) + if u'attribute_type' in d: + if d[u'attribute_type'] in DATE_AT_IDS: + d[u'object_value'] = datetime.datetime.fromtimestamp(d[u'object_value']/1e3) + if u'attribute_set' in d: + for each_attr_dict in d[u'attribute_set']: + for each_key, each_val in each_attr_dict.iteritems(): + if each_key in [u"start_time", u"end_time", u"start_enroll", u"end_enroll"]: + each_attr_dict[each_key] = datetime.datetime.fromtimestamp(each_val/1e3) + return d + + +def get_json_file(filepath): + + # this will create a .json file of the document(node) + # at manage.py level + # Returns json and rcs filepath + try: + rcs.checkout(filepath) + fp = filepath.split('/')[-1] + # fp = filepath + if fp.endswith(',v'): + fp = fp.split(',')[0] + with open(fp, 'r') as version_file: + obj_as_json = json.loads(version_file.read(), object_hook=json_util.object_hook) + parse_json_values(obj_as_json) + rcs.checkin(fp) + # os.remove(fp) + return obj_as_json + except Exception as get_json_err: + print "Exception while getting JSON: ", get_json_err + pass From 18dd72ec5c202e77a6ac819654edd524fcc85248 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Thu, 4 Jan 2018 17:11:19 +0530 Subject: [PATCH 03/13] core-export updated --- .../ndf/management/commands/export_logic.py | 69 +++++++++++++------ 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py index bb260b2683..a2e85edad8 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py @@ -34,6 +34,18 @@ DUMP_NODES_LIST = [] DUMPED_NODE_IDS = set() + +def create_log_file(req_log_file_name): + ''' + Creates log file in gstudio-logs/ with + the name of the dump folder + ''' + log_file_name = req_log_file_name + '.log' + if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): + os.makedirs(GSTUDIO_LOGS_DIR_PATH) + log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) + return log_file_path + def build_rcs(node, collection_name): ''' Updates the rcs json with the current node's strcuture that @@ -162,11 +174,9 @@ def copy_rcs(node): path = historyMgr.get_file_path(node) path = path + ",v" # log_file.write( "\n RCS Copied " + str(path) - print "\nDATA_EXPORT_PATH: ", DATA_EXPORT_PATH - cp = "cp -vu " + path + " " +" --parents " + DATA_EXPORT_PATH + "/" + cp = "cp -u " + path + " " +" --parents " + DATA_EXPORT_PATH + "/" subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) - except Exception as copyRCSError: error_log = "\n !!! Error found while Copying RCS ." error_log += "\nError: " + str(copyRCSError) @@ -174,25 +184,43 @@ def copy_rcs(node): print error_log pass +def get_counter_ids(group_id=None, group_node=None, user_ids=None): + ''' + Fetch all the Counter instances of the exporting Group + ''' + if group_id: + counter_collection_cur = counter_collection.find({'group_id':ObjectId(group_id)}) + elif group_node: + counter_collection_cur = counter_collection.find({'group_id':ObjectId(group_node._id)}) + elif user_ids: + counter_collection_cur = counter_collection.find({'user_id': {'$in': user_ids}}) + + if counter_collection_cur : + for each_obj in counter_collection_cur : + dump_node(node=each_obj,collection_name=counter_collection, variables_dict=GLOBAL_DICT) + + +def write_md5_of_dump(group_dump_path, configs_file_path): + from checksumdir import dirhash + md5hash = dirhash(group_dump_path, 'md5') + with open(configs_file_path, 'a+') as configs_file_out: + configs_file_out.write("\nMD5='" + str(md5hash) + "'") + + def dumping_call(node, collection_name): try: global log_file global GROUP_ID global DUMPED_NODE_IDS log_file.write( "\nDumping Call for : " + str(node)) - if (node._id == GROUP_ID or node._type != "Group") and node._id not in DUMPED_NODE_IDS: - build_rcs(node, collection_name) - - if collection_name == node_collection: - get_triple_data(node._id) - DUMPED_NODE_IDS.add(node._id) - if 'File' in node.member_of_names_list: - get_file_node_details(node, exclude_node=True) - else: - DUMPED_NODE_IDS.add(node._id) - log_file.write( "\n Dump node finished for: " + str(node._id)) - else: - log_file.write( "\n Already dumped node: " + str(node._id)) + print ".", + build_rcs(node, collection_name) + + if collection_name == node_collection: + get_triple_data(node._id) + if 'File' in node.member_of_names_list: + get_file_node_details(node, exclude_node=True) + log_file.write( "\n Dump node finished for: " + str(node._id)) except Exception as dumping_call_err: error_log = "\n !!! Error found in dumping_call_node() ." @@ -257,24 +285,23 @@ def dump_node(collection_name=node_collection, node=None, node_id=None, node_id_ global log_file log_file.write( "\n dump_node invoked for: " + str(collection_name)) if node: - dumping_call(node,collection_name) + dumping_call(node,collection_name) elif node_id: log_file.write( "\tNode_id : " + str(node_id)) node = collection_name.one({'_id': ObjectId(node_id), '_type': {'$nin': ['Group', 'Author']}}) if node: - dumping_call(node,collection_name) + dumping_call(node, collection_name) elif node_id_list: node_cur = collection_name.one({'_id': {'$in': node_id_list}, '_type': {'$nin': ['Group', 'Author']}}) log_file.write( "\tNode_id_list : " + str(node_id_list)) for each_node in nodes_cur: if each_node: - dumping_call(node,collection_name) - + dumping_call(each_node,collection_name) except Exception as dump_err: error_log = "\n !!! Error found while taking dump in dump_node() ." error_log += "\nError: " + str(dump_err) - log_file.write( str(error_log)) + log_file.write(str(error_log)) print error_log pass From 0ac27c1921f563aa91b6a467323109978e8808dc Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Thu, 4 Jan 2018 17:11:47 +0530 Subject: [PATCH 04/13] group-export to invoke base-export script/functions --- .../ndf/management/commands/group_export.py | 55 ++++++------------- 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py index fe7cdb4269..bab60e95f1 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py @@ -15,7 +15,7 @@ from django.core.management.base import BaseCommand, CommandError from schema_mapping import create_factory_schema_mapper from users_dump_restore import create_users_dump -from export_logic import dump_node +from export_logic import dump_node, create_log_file, get_counter_ids, write_md5_of_dump from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection from gnowsys_ndf.ndf.models import HistoryManager from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID @@ -40,22 +40,6 @@ ROOT_DUMP_NODE_NAME = None MULTI_DUMP = False -def create_log_file(dump_node_id): - ''' - Creates log file in gstudio-logs/ with - the name of the dump folder - ''' - log_file_name = 'group_dump_' + str(dump_node_id)+ '.log' - if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): - os.makedirs(GSTUDIO_LOGS_DIR_PATH) - global log_file_path - log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) - # print log_file_path - global log_file - log_file = open(log_file_path, 'w+') - log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") - return log_file_path - def setup_dump_path(node_name): ''' Creates factory_schema.json which will hold basic info @@ -92,13 +76,6 @@ def create_configs_file(group_id): configs_file_out.write('\nSYSTEM_DETAILS="' + str(os.uname()) + '"') return configs_file_path -def write_md5_of_dump(group_dump_path, configs_file_path): - global DUMP_PATH - from checksumdir import dirhash - md5hash = dirhash(group_dump_path, 'md5') - with open(configs_file_path, 'a+') as configs_file_out: - configs_file_out.write("\nMD5='" + str(md5hash) + "'") - def core_export(group_node): if group_node: print "\tRequest received for Export of : ", group_node.name , ' | ObjectId: ', group_node._id @@ -115,13 +92,13 @@ def core_export(group_node): print "\n\tTotal objects found: ", nodes_falling_under_grp.count() confirm_export = raw_input("\n\tEnter y/Y to Continue or any other key to Abort:\t ") if confirm_export in ['y', 'Y']: - print "START : ", str(datetime.datetime.now()) + group_dump_path = setup_dump_path(slugify(group_node.name)) global GROUP_ID GROUP_ID = group_node._id call_group_export(group_node, nodes_falling_under_grp) - get_counter_ids(group_node._id) + get_counter_ids(group_id=group_node._id) # import ipdb; ipdb.set_trace() global GROUP_CONTRIBUTORS if RESTORE_USER_DATA: @@ -134,7 +111,6 @@ def core_export(group_node): log_file.write("\n*************************************************************") log_file.write("\n######### Script Completed at : " + str(datetime.datetime.now()) + " #########\n\n") - print "END : ", str(datetime.datetime.now()) else: call_exit() else: @@ -259,14 +235,6 @@ def call_group_export(group_node, nodes_cur, num_of_processes=5): # return resultlist -def get_counter_ids(group_id): - ''' - Fetch all the Counter instances of the exporting Group - ''' - counter_collection_cur = counter_collection.find({'group_id':ObjectId(group_id)}) - if counter_collection_cur : - for each_obj in counter_collection_cur : - dump_node(node=each_obj,collection_name=counter_collection, variables_dict=GLOBAL_DICT) class Command(BaseCommand): def handle(self, *args, **options): @@ -276,12 +244,21 @@ def handle(self, *args, **options): global ROOT_DUMP_NODE_NAME global MULTI_DUMP global GLOBAL_DICT + global log_file + global log_file_path + global TOP_PATH + global DUMP_NODES_LIST input_name_or_id = raw_input("\n\tPlease enter ObjectID of the Group: ") dump_node_obj = node_collection.one({'_id': ObjectId(input_name_or_id)}) group_node = None if dump_node_obj: - log_file_path = create_log_file(dump_node_obj._id) + datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") + log_file_name = 'group_dump_' + slugify(dump_node_obj.name) + "_" + str(datetimestamp) + log_file_path = create_log_file(log_file_name) + log_file = open(log_file_path, 'w+') + log_file.write("\n######### Script ran on : " + str(datetimestamp) + " #########\n\n") + ROOT_DUMP_NODE_ID = dump_node_obj._id ROOT_DUMP_NODE_NAME = dump_node_obj.name @@ -290,8 +267,6 @@ def handle(self, *args, **options): SCHEMA_MAP_PATH = DUMP_PATH create_factory_schema_mapper(SCHEMA_MAP_PATH) else: - global TOP_PATH - global DUMP_NODES_LIST datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export', slugify(dump_node_obj.name) + "_"+ str(datetimestamp)) SCHEMA_MAP_PATH = TOP_PATH @@ -321,8 +296,10 @@ def handle(self, *args, **options): dump_node(node=dump_node_obj,collection_name=node_collection, variables_dict=GLOBAL_DICT) create_factory_schema_mapper(SCHEMA_MAP_PATH) print "*"*70 - print "\n This will take few minutes. Please be patient.\n" + print "\n START : ", str(datetimestamp) print "\n Log will be found at: ", log_file_path + print "\n Dump will be found at: ", SCHEMA_MAP_PATH + print "\n END : ", str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) print "*"*70 log_file.close() call_exit() \ No newline at end of file From 3e51779b4dd7641c59ea36b31c7a6bd4e5c374a2 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Thu, 4 Jan 2018 17:12:27 +0530 Subject: [PATCH 05/13] group_import core logic moved to import_logic file --- .../ndf/management/commands/group_import.py | 73 ++++--------------- 1 file changed, 16 insertions(+), 57 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py index c7ae9dd392..5f2f2dc78c 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py @@ -27,6 +27,7 @@ from gnowsys_ndf.ndf.models import HistoryManager, RCS from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID, RCS_REPO_DIR from users_dump_restore import load_users_dump +from import_logic import * from gnowsys_ndf.settings import RCS_REPO_DIR_HASH_LEVEL from schema_mapping import update_factory_schema_mapper from gnowsys_ndf.ndf.views.utils import replace_in_list, merge_lists_and_maintain_unique_ele @@ -60,22 +61,6 @@ CONFIG_VARIABLES.MD5='aeba0e3629fb0443861c699ae327d962' ''' -def create_log_file(restore_path): - ''' - Creates log file in gstudio-logs/ with - the name of the dump folder - ''' - restore_path = restore_path.split("/")[-1] - log_file_name = 'group_restore_of_dump_' + str(CONFIG_VARIABLES.GROUP_ID)+ '.log' - if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): - os.makedirs(GSTUDIO_LOGS_DIR_PATH) - - log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) - global log_file - log_file = open(log_file_path, 'w+') - log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") - return log_file_path - def call_exit(): print "\n Exiting..." @@ -91,39 +76,6 @@ def read_config_file(): CONFIG_VARIABLES = imp.load_source('config_variables', os.path.join(DATA_RESTORE_PATH,'migration_configs.py')) -def validate_data_dump(*args): - """ - For validation of the exported dump and the - importing data-dump, calculate MD5 and - check with CONFIG_VARIABLES.MD5 - This will ensure the exported data is NOT altered - before importing - """ - global log_file - from checksumdir import dirhash - md5hash = dirhash(DATA_DUMP_PATH, 'md5') - if CONFIG_VARIABLES.MD5 != md5hash: - print "\n MD5 NOT matching." - print "\nargs: ", args - if args and len(args) == 4: - proceed_without_validation = args[1] - else: - proceed_without_validation = raw_input("MD5 not matching. Restoration not recommended.\n \ - Enter (y/Y) to continue ?") - if proceed_without_validation not in ['y', 'Y']: - log_file.write("\n Checksum validation Failed on dump data") - call_exit() - else: - print "\nValidation Success..!" - proceed_with_validation = '' - if args and len(args) == 4: - proceed_without_validation = args[1] - else: - proceed_with_validation = raw_input("MD5 Matching.\n \ - Enter (y/Y) to proceed to restoration") - if proceed_with_validation in ['y', 'Y']: - log_file.write("\n Checksum validation Success on dump data") - def get_file_path_with_id(node_id): file_name = (node_id + '.json') @@ -197,13 +149,14 @@ def check_group_availability(*args): log_file.write("\n Group Merge confirmed.") print " Proceeding to restore." - +''' def user_objs_restoration(*args): global USER_ID_MAP global DEFAULT_USER_ID global DEFAULT_USER_SET global log_file user_json_data = None + if CONFIG_VARIABLES.RESTORE_USER_DATA: user_dump_restore = raw_input("\n\tUser dump is available. \ Would you like to restore it (y/n) ?: ") @@ -243,7 +196,7 @@ def user_objs_restoration(*args): DEFAULT_USER_ID = 1 print "\n No RESTORE_USER_DATA available. Setting Default user with id: 1" log_file.write("\n No RESTORE_USER_DATA available. Setting Default user with id :" + str(DEFAULT_USER_SET)) - +''' def update_schema_id_for_triple(document_json): if SCHEMA_ID_MAP: global log_file @@ -580,15 +533,21 @@ def copy_media_data(media_path): def core_import(non_grp_root_node=None, *args): global log_file - log_file_path = create_log_file(DATA_RESTORE_PATH) - print "\n Log will be found at: ", log_file_path + global log_file_path + log_file_name = 'group_restore_' + str(CONFIG_VARIABLES.GROUP_ID)+ '.log' + log_file_path = create_log_file(log_file_name) + log_file = open(log_file_path, 'w+') + log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") log_file.write("\nUpdated CONFIG_VARIABLES: "+ str(CONFIG_VARIABLES)) print "\n Validating the data-dump" - validate_data_dump(*args) + print "\nDATA_DUMP_PATH: ", DATA_DUMP_PATH + validate_data_dump(DATA_DUMP_PATH,CONFIG_VARIABLES.MD5, *args) print "\n Checking the dump Group-id availability." check_group_availability(*args) print "\n User Restoration." - user_objs_restoration(*args) + user_json_file_path = os.path.join(DATA_DUMP_PATH, 'users_dump.json') + log_stmt = user_objs_restoration(CONFIG_VARIABLES.RESTORE_USER_DATA , user_json_file_path, DATA_DUMP_PATH, *args) + log_file.write(log_stmt) print "\n Factory Schema Restoration. Please wait.." # print "\n SCHEMA: ", SCHEMA_ID_MAP call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'),non_grp_root_node) @@ -600,6 +559,7 @@ def handle(self, *args, **options): global DATA_RESTORE_PATH global DATA_DUMP_PATH global SCHEMA_ID_MAP + global log_file_path if args and len(args) == 4: DATA_RESTORE_PATH = args[0] else: @@ -653,8 +613,7 @@ def handle(self, *args, **options): # print "\n each_gd_abs_path: ", os.path.join(DATA_RESTORE_PATH,each_gd_abs_path) print "*"*70 - # print "\n Export will be found at: ", DATA_EXPORT_PATH - print "\n This will take few minutes. Please be patient.\n" + print "\n Log will be found at: ", log_file_path print "*"*70 else: From a6a2945aca667d924478444f77b07750121c45aa Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Thu, 4 Jan 2018 17:12:51 +0530 Subject: [PATCH 06/13] user data export script successfully completed --- .../ndf/management/commands/import_logic.py | 880 ++++++++++++++++++ .../management/commands/user_data_export.py | 230 +++++ 2 files changed, 1110 insertions(+) create mode 100644 gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py create mode 100644 gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py new file mode 100644 index 0000000000..5c38ed2d67 --- /dev/null +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py @@ -0,0 +1,880 @@ +''' +Import can also be called using command line args as following: + python manage.py group_import + like: + python manage.py group_import y y y +''' + + + +import os +import json +import imp +import subprocess +from bson import json_util +import pathlib2 +try: + from bson import ObjectId +except ImportError: # old pymongo + from pymongo.objectid import ObjectId + +import time +import datetime + +# from bson.json_util import dumps,loads,object_hook +from django.core.management.base import BaseCommand, CommandError +from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection +from gnowsys_ndf.ndf.models import HistoryManager, RCS +from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID, RCS_REPO_DIR +from users_dump_restore import load_users_dump +from gnowsys_ndf.settings import RCS_REPO_DIR_HASH_LEVEL +from schema_mapping import update_factory_schema_mapper +from gnowsys_ndf.ndf.views.utils import replace_in_list, merge_lists_and_maintain_unique_ele + +# global variables declaration +DATA_RESTORE_PATH = None +DATA_DUMP_PATH = None +DEFAULT_USER_ID = 1 +DEFAULT_USER_SET = False + +USER_ID_MAP = {} +SCHEMA_ID_MAP = {} +log_file = None +CONFIG_VARIABLES = None +DATE_AT_IDS = [] +GROUP_CONTAINERS = ['Module'] +date_related_at_cur = node_collection.find({'_type': 'AttributeType', + 'name': {'$in': ["start_time", "end_time", "start_enroll", "end_enroll"]}}) +for each_date_related_at in date_related_at_cur: + DATE_AT_IDS.append(each_date_related_at._id) +history_manager = HistoryManager() +rcs = RCS() + + +''' +Following will be available: + CONFIG_VARIABLES.FORK=True + CONFIG_VARIABLES.CLONE=False + CONFIG_VARIABLES.RESTORE_USER_DATA=True + CONFIG_VARIABLES.GSTUDIO_INSTITUTE_ID='MZ-10' + CONFIG_VARIABLES.GROUP_ID='58dded48cc566201992f6e79' + CONFIG_VARIABLES.MD5='aeba0e3629fb0443861c699ae327d962' +''' + +def create_log_file(req_log_file_name): + ''' + Creates log file in gstudio-logs/ with + the name of the dump folder + ''' + log_file_name = req_log_file_name + '.log' + if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): + os.makedirs(GSTUDIO_LOGS_DIR_PATH) + log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) + return log_file_path + + +def call_exit(): + print "\n Exiting..." + os._exit(0) + +def read_config_file(): + """ + Read migration_configs.py file generated during + the export of group and load the variables in + CONFIG_VARIABLES to be accessible in entire program + """ + global CONFIG_VARIABLES + CONFIG_VARIABLES = imp.load_source('config_variables', + os.path.join(DATA_RESTORE_PATH,'migration_configs.py')) + +def validate_data_dump(dump, md5, *args): + """ + For validation of the exported dump and the + importing data-dump, calculate MD5 and + check with CONFIG_VARIABLES.MD5 + This will ensure the exported data is NOT altered + before importing + """ + global log_file + from checksumdir import dirhash + md5hash = dirhash(dump, 'md5') + if md5 != md5hash: + print "\n MD5 NOT matching." + print "\nargs: ", args + if args and len(args) == 4: + proceed_without_validation = args[1] + else: + proceed_without_validation = raw_input("MD5 not matching. Restoration not recommended.\n \ + Enter (y/Y) to continue ?") + if proceed_without_validation not in ['y', 'Y']: + log_file.write("\n Checksum validation Failed on dump data") + call_exit() + else: + print "\nValidation Success..!" + proceed_with_validation = '' + if args and len(args) == 4: + proceed_without_validation = args[1] + else: + proceed_with_validation = raw_input("MD5 Matching.\n \ + Enter (y/Y) to proceed to restoration") + if proceed_with_validation in ['y', 'Y']: + log_file.write("\n Checksum validation Success on dump data") + +def get_file_path_with_id(node_id): + file_name = (node_id + '.json') + + collection_dir = os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo', 'Nodes') + + # Example: + # if -- file_name := "523f59685a409213818e3ec6.json" + # then -- collection_hash_dirs := "6/c/3/8/ + # -- from last (2^0)pos/(2^1)pos/(2^2)pos/(2^3)pos/../(2^n)pos" + # here n := hash_level_num + collection_hash_dirs = "" + for pos in range(0, RCS_REPO_DIR_HASH_LEVEL): + collection_hash_dirs += \ + (node_id[-2**pos] + "/") + file_path = \ + os.path.join(collection_dir, \ + (collection_hash_dirs + file_name)) + # print "\n\nfilepath: ", file_path + return file_path + +''' +def check_group_availability(*args): + group_node = node_collection.one({'_id': ObjectId(CONFIG_VARIABLES.GROUP_ID)}) + global log_file + global DEFAULT_USER_ID + + print '\n\n Restoring Group' + log_file.write("\n Restoring Group") + if group_node: + print "\n Group with restoration ID already exists." + confirm_grp_data_merge = '' + if args and len(args) == 4: + confirm_grp_data_merge = args[2] + else: + confirm_grp_data_merge = raw_input("Dump Group already exists here. Would you like to merge the data ?") + if confirm_grp_data_merge not in ['y', 'Y']: + log_file.write("\n Group with Restore Group ID is FOUND on Target system.") + call_exit() + else: + fp = get_file_path_with_id(CONFIG_VARIABLES.GROUP_ID) + if fp: + if not fp.endswith(',v'): + fp = fp + ',v' + log_file.write("\n Restoring Group: " + str(fp)) + restore_node(fp) + group_node = node_collection.one({'_id': ObjectId(CONFIG_VARIABLES.GROUP_ID)}) + group_node.group_admin = [DEFAULT_USER_ID] + group_node.save() + log_file.write("\n Group Merge confirmed.") + print " Proceeding to restore." + else: + print "\n Group with restoration ID DOES NOT exists." + confirm_grp_data_restore = '' + if args and len(args) == 4: + confirm_grp_data_restore = args[2] + else: + confirm_grp_data_restore = raw_input("Proceed to restore ?") + if confirm_grp_data_restore not in ['y', 'Y']: + log_file.write("\n Group with Restore Group ID is NOT FOUND on Target system.") + print " Cancelling to restore." + call_exit() + else: + fp = get_file_path_with_id(CONFIG_VARIABLES.GROUP_ID) + if fp: + if not fp.endswith(',v'): + fp = fp + ',v' + log_file.write("\n Restoring Group: " + str(fp)) + restore_node(fp) + group_node = node_collection.one({'_id': ObjectId(CONFIG_VARIABLES.GROUP_ID)}) + group_node.group_admin = [DEFAULT_USER_ID] + group_node.save() + log_file.write("\n Group Merge confirmed.") + print " Proceeding to restore." +''' + +def user_objs_restoration(restore_users_flag, user_json_file_path, DATA_DUMP_PATH, *args): + global USER_ID_MAP + global DEFAULT_USER_ID + global DEFAULT_USER_SET + log_stmt = "" + user_json_data = None + if restore_users_flag: + user_dump_restore = raw_input("\n\tUser dump is available. \ + Would you like to restore it (y/n) ?: ") + if user_dump_restore in ['y', 'Y']: + log_stmt += "\n Request for users restoration : Yes." + + with open(user_json_file_path, 'rb+') as user_json_fin: + user_json_data = json.loads(user_json_fin.read()) + print "\n Restoring Users. Please wait.." + USER_ID_MAP = load_users_dump(DATA_DUMP_PATH, user_json_data) + log_stmt += "\n USER_ID_MAP: "+ str(USER_ID_MAP) + print "\n Completed Restoring Users." + else: + log_stmt += "\n Request for users restoration : No." + DEFAULT_USER_SET = True + default_user_confirmation = raw_input("\n\tRestoration will use default user-id=1. \ + \n\tEnter y to continue, or n if you want to use some other id?: ") + if default_user_confirmation in ['y', 'Y']: + log_stmt += "\n Request for Default user with id=1 : Yes." + DEFAULT_USER_ID = 1 + else: + log_stmt += "\n Request for Default user with id=1 : No." + DEFAULT_USER_ID = int(raw_input("Enter user-id: ")) + log_stmt += "\n Request for Setting Default user with id :" + str(DEFAULT_USER_SET) + else: + + print "*"*80 + user_dump_restore_default = '' + if args and len(args) == 4: + user_dump_restore_default = args[3] + else: + user_dump_restore_default = raw_input("\n\tUser dump is NOT available. \ + Would you like to use USER_ID=1 for restoration(y/n) ?: ") + if user_dump_restore_default in ['y', 'Y']: + DEFAULT_USER_SET = True + DEFAULT_USER_ID = 1 + print "\n No RESTORE_USER_DATA available. Setting Default user with id: 1" + log_stmt += "\n No RESTORE_USER_DATA available. Setting Default user with id :" + str(DEFAULT_USER_SET) + return log_stmt + +def update_schema_id_for_triple(document_json): + if SCHEMA_ID_MAP: + global log_file + log_file.write("\nUpdating schema_id for triple.") + if u'relation_type' in document_json and document_json[u'relation_type'] in SCHEMA_ID_MAP: + log_file.write("\nOLD relation_type id " + str(document_json[u'relation_type'])) + document_json[u'relation_type'] = SCHEMA_ID_MAP[document_json[u'relation_type']] + log_file.write("\nNEW relation_type id " + str(document_json[u'relation_type'])) + if u'attribute_type' in document_json and document_json[u'attribute_type'] in SCHEMA_ID_MAP: + log_file.write("\nOLD attribute_type id " + str(document_json[u'attribute_type'])) + document_json[u'attribute_type'] = SCHEMA_ID_MAP[document_json[u'attribute_type']] + log_file.write("\nNEW attribute_type id " + str(document_json[u'attribute_type'])) + return document_json + +def update_group_set(document_json): + if 'group_set' in document_json: + if ObjectId(CONFIG_VARIABLES.GROUP_ID) not in document_json['group_set']: + document_json['group_set'].append(ObjectId(CONFIG_VARIABLES.GROUP_ID)) + return document_json + +def _mapper(json_obj, key, MAP_obj, is_list=False): + log_file.write("\n Calling _mapper:\n\t " + str(json_obj)+ str(key)+ str(MAP_obj)+ str(is_list)) + + if key in json_obj: + if is_list: + for eu in json_obj[key]: + if eu in MAP_obj: + replace_in_list(json_obj[key],eu, MAP_obj[eu]) + else: + json_obj[key] = MAP_obj[json_obj[key]] + +def update_schema_and_user_ids(document_json): + log_file.write("\n Invoked update_schema_and_user_ids:\n\t " + str(document_json)) + global DEFAULT_USER_SET + global DEFAULT_USER_ID + if SCHEMA_ID_MAP: + _mapper(document_json, 'member_of', SCHEMA_ID_MAP, is_list=True) + _mapper(document_json, 'type_of', SCHEMA_ID_MAP, is_list=True) + + if DEFAULT_USER_SET: + document_json['contributors'] = [DEFAULT_USER_ID] + document_json['created_by'] = DEFAULT_USER_ID + document_json['modified_by'] = DEFAULT_USER_ID + if 'group_admin' in document_json: + document_json['group_admin'] = [DEFAULT_USER_ID] + if 'author_set' in document_json: + document_json['author_set'] = [DEFAULT_USER_ID] + + elif CONFIG_VARIABLES.RESTORE_USER_DATA and USER_ID_MAP: + _mapper(document_json, 'contributors', USER_ID_MAP, is_list=True) + _mapper(document_json, 'group_admin', USER_ID_MAP, is_list=True) + _mapper(document_json, 'author_set', USER_ID_MAP, is_list=True) + _mapper(document_json, 'created_by', USER_ID_MAP) + _mapper(document_json, 'modified_by', USER_ID_MAP) + + log_file.write("\n Finished update_schema_and_user_ids:\n\t " + str(document_json)) + return document_json + + ''' + else: + Schema is same. No updation required. + ''' + +def copy_version_file(filepath): + if os.path.exists(filepath): + cwd_path = os.getcwd() + posix_filepath = pathlib2.Path(filepath) + rcs_data_path = str(pathlib2.Path(*posix_filepath.parts[:7])) + rcs_file_path = str(pathlib2.Path(*posix_filepath.parts[7:])) + os.chdir(rcs_data_path) + cp = "cp -v " + rcs_file_path + " " +" --parents " + RCS_REPO_DIR + "/" + subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) + os.chdir(cwd_path) + + +def restore_filehive_objects(rcs_filehives_path): + print "\nRestoring Filehives.." + global log_file + log_file.write("\nRestoring Filehives. ") + for dir_, _, files in os.walk(rcs_filehives_path): + for filename in files: + filepath = os.path.join(dir_, filename) + fh_json= get_json_file(filepath) + fh_obj = filehive_collection.one({'_id': ObjectId(fh_json['_id'])}) + + if not fh_obj: + copy_version_file(filepath) + log_file.write("\nRCS file copied : \n\t" + str(filepath) ) + try: + log_file.write("\nInserting new Filehive Object : \n\tNew-obj: " + \ + str(fh_json)) + node_id = filehive_collection.collection.insert(fh_json) + # print "\n fh_json: ", fh_json + fh_obj = filehive_collection.one({'_id': node_id}) + + fh_obj.save() + log_file.write("\nUpdate RCS using save()") + except Exception as fh_insert_err: + log_file.write("\nError while inserting FH obj" + str(fh_insert_err)) + pass + else: + log_file.write("\nFound Existing Filehive Object : \n\tFound-obj: " + \ + str(fh_obj) + "\n\tExiting-obj: "+str(fh_json)) + +def restore_node_objects(rcs_nodes_path, non_grp_root_node=None): + print "\nRestoring Nodes.." + global log_file + log_file.write("\nRestoring Nodes. ") + for dir_, _, files in os.walk(rcs_nodes_path): + for filename in files: + filepath = os.path.join(dir_, filename) + restore_node(filepath,non_grp_root_node) + +def restore_triple_objects(rcs_triples_path): + print "\nRestoring Triples.." + global log_file + log_file.write("\nRestoring Triples. ") + for dir_, _, files in os.walk(rcs_triples_path): + for filename in files: + filepath = os.path.join(dir_, filename) + triple_json = get_json_file(filepath) + if triple_json and ('_id' in triple_json): + triple_obj = triple_collection.one({'_id': ObjectId(triple_json['_id'])}) + else: + triple_obj = None + + if triple_obj: + log_file.write("\n Found Existing Triple : \n\t " + str(triple_obj)) + triple_obj = update_schema_id_for_triple(triple_obj) + log_file.write("\n Updated Triple : \n\t " + str(triple_obj)) + triple_obj.save() + if triple_obj._type == "GRelation": + if triple_obj.right_subject != triple_json['right_subject']: + if type(triple_obj.right_subject) == list: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$addToSet': {'right_subject': triple_json['right_subject']}}, + multi=False, upsert=False) + else: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$set': {'right_subject': triple_json['right_subject']}}, + multi=False, upsert=False) + log_file.write("\n GRelation Updated : \n\t OLD: " + str(triple_obj), + "\n\tNew: "+str(triple_json)) + + if triple_obj._type == "GAttribute": + if triple_obj.object_value != triple_json['object_value']: + if type(triple_obj.object_value) == list: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$addToSet': {'object_value': triple_json['object_value']}}, + multi=False, upsert=False) + else: + triple_collection.collection.update( + {'_id': triple_obj._id}, + {'$set': {'object_value': triple_json['object_value']}}, + multi=False, upsert=False) + log_file.write("\n GAttribute Updated: \n\t OLD: " + str(triple_obj) + "\n\tNew: "+str(triple_json)) + else: + copy_version_file(filepath) + log_file.write("\n RCS file copied : \n\t" + str(filepath)) + + try: + log_file.write("\n Inserting Triple doc : " + str(triple_json)) + triple_json = update_schema_id_for_triple(triple_json) + + node_id = triple_collection.collection.insert(triple_json) + triple_obj = triple_collection.one({'_id': node_id}) + triple_node_RT_AT_id = None + # if 'attribute_type' in triple_json: + # triple_node_RT_AT_id = triple_json['attribute_type'] + # else: + # triple_node_RT_AT_id = triple_json['relation_type'] + # triple_node_RT_AT = node_collection.one({'_id': ObjectId(triple_node_RT_AT_id)}) + # triple_obj.save(triple_node=triple_node_RT_AT, triple_id=triple_node_RT_AT._id) + triple_obj.save() + log_file.write("\nUpdate RCS using save()") + except Exception as tr_insert_err: + log_file.write("\nError while inserting Triple obj" + str(tr_insert_err)) + pass + +def restore_counter_objects(rcs_counters_path): + print "\nRestoring Counters.." + global log_file + log_file.write("\nRestoring Counters. ") + for dir_, _, files in os.walk(rcs_counters_path): + for filename in files: + filepath = os.path.join(dir_, filename) + counter_json = get_json_file(filepath) + counter_obj = counter_collection.one({'_id': ObjectId(counter_json['_id'])}) + if counter_obj: + counter_changed = False + log_file.write("\nFound Existing Counter Object : " + str(counter_obj._id)) + + # if counter_obj.last_update != counter_json['last_update'] : + # counter_obj.last_update = counter_json['last_update'] + # counter_changed = True + + if counter_obj.is_group_member != counter_json['is_group_member'] : + counter_obj.is_group_member = counter_json['is_group_member'] + counter_changed = True + + if counter_obj.modules_completed != counter_json['modules_completed'] : + counter_obj.modules_completed = counter_json['modules_completed'] + counter_changed = True + + if counter_obj.course_score != counter_json['course_score'] : + counter_obj.course_score = counter_json['course_score'] + counter_changed = True + + if counter_obj.units_completed != counter_json['units_completed'] : + counter_obj.units_completed = counter_json['units_completed'] + counter_changed = True + + if counter_obj.no_comments_by_user != counter_json['no_comments_by_user'] : + counter_obj.no_comments_by_user = counter_json['no_comments_by_user'] + counter_changed = True + + if counter_obj.no_comments_for_user != counter_json['no_comments_for_user'] : + counter_obj.no_comments_for_user = counter_json['no_comments_for_user'] + counter_changed = True + + if counter_obj.no_files_created != counter_json['no_files_created'] : + counter_obj.no_files_created = counter_json['no_files_created'] + counter_changed = True + + if counter_obj.no_visits_gained_on_files != counter_json['no_visits_gained_on_files'] : + counter_obj.no_visits_gained_on_files = counter_json['no_visits_gained_on_files'] + counter_changed = True + + if counter_obj.no_comments_received_on_files != counter_json['no_comments_received_on_files'] : + counter_obj.no_comments_received_on_files = counter_json['no_comments_received_on_files'] + counter_changed = True + + if counter_obj.no_others_files_visited != counter_json['no_others_files_visited'] : + counter_obj.no_others_files_visited = counter_json['no_others_files_visited'] + counter_changed = True + + if counter_obj.no_comments_on_others_files != counter_json['no_comments_on_others_files'] : + counter_obj.no_comments_on_others_files = counter_json['no_comments_on_others_files'] + counter_changed = True + + if counter_obj.rating_count_received_on_files != counter_json['rating_count_received_on_files'] : + counter_obj.rating_count_received_on_files = counter_json['rating_count_received_on_files'] + counter_changed = True + + if counter_obj.avg_rating_received_on_files != counter_json['avg_rating_received_on_files'] : + counter_obj.avg_rating_received_on_files = counter_json['avg_rating_received_on_files'] + counter_changed = True + + if counter_obj.no_questions_attempted != counter_json['no_questions_attempted'] : + counter_obj.no_questions_attempted = counter_json['no_questions_attempted'] + counter_changed = True + + if counter_obj.no_correct_answers != counter_json['no_correct_answers'] : + counter_obj.no_correct_answers = counter_json['no_correct_answers'] + counter_changed = True + + if counter_obj.no_incorrect_answers != counter_json['no_incorrect_answers'] : + counter_obj.no_incorrect_answers = counter_json['no_incorrect_answers'] + counter_changed = True + + if counter_obj.no_notes_written != counter_json['no_notes_written'] : + counter_obj.no_notes_written = counter_json['no_notes_written'] + counter_changed = True + + if counter_obj.no_views_gained_on_notes != counter_json['no_views_gained_on_notes'] : + counter_obj.no_views_gained_on_notes = counter_json['no_views_gained_on_notes'] + counter_changed = True + + if counter_obj.no_others_notes_visited != counter_json['no_others_notes_visited'] : + counter_obj.no_others_notes_visited = counter_json['no_others_notes_visited'] + counter_changed = True + + if counter_obj.no_comments_received_on_notes != counter_json['no_comments_received_on_notes'] : + counter_obj.no_comments_received_on_notes = counter_json['no_comments_received_on_notes'] + counter_changed = True + + if counter_obj.no_comments_on_others_notes != counter_json['no_comments_on_others_notes'] : + counter_obj.no_comments_on_others_notes = counter_json['no_comments_on_others_notes'] + counter_changed = True + + if counter_obj.rating_count_received_on_notes != counter_json['rating_count_received_on_notes'] : + counter_obj.rating_count_received_on_notes = counter_json['rating_count_received_on_notes'] + counter_changed = True + + if counter_obj.avg_rating_received_on_notes != counter_json['avg_rating_received_on_notes'] : + counter_obj.avg_rating_received_on_notes = counter_json['avg_rating_received_on_notes'] + counter_changed = True + + if counter_obj.comments_by_others_on_files != counter_json['comments_by_others_on_files'] and counter_json['comments_by_others_on_files']: + n.comments_by_others_on_files.extend(counter_json['comments_by_others_on_files']) + counter_changed = True + + if counter_obj.comments_by_others_on_notes != counter_json['comments_by_others_on_notes'] and counter_json['comments_by_others_on_notes']: + n.comments_by_others_on_notes.extend(counter_json['comments_by_others_on_notes']) + counter_changed = True + + if counter_changed: + log_file.write("\n Counter Updated: \n\t OLD: " + str(counter_obj), + "\n\tNew: "+str(counter_json)) + counter_obj.save() + else: + try: + log_file.write("\n Inserting Counter doc : " + str(counter_json)) + node_id = counter_collection.collection.insert(counter_json) + except Exception as counter_insert_err: + log_file.write("\nError while inserting Counter obj" + str(counter_insert_err)) + pass + +def call_group_import(rcs_repo_path,non_grp_root_node=None): + + rcs_filehives_path = os.path.join(rcs_repo_path, "Filehives") + rcs_nodes_path = os.path.join(rcs_repo_path, "Nodes") + rcs_triples_path = os.path.join(rcs_repo_path, "Triples") + rcs_counters_path = os.path.join(rcs_repo_path, "Counters") + + # Following sequence is IMPORTANT + # restore_filehive_objects(rcs_filehives_path) + restore_node_objects(rcs_nodes_path, non_grp_root_node) + restore_triple_objects(rcs_triples_path) + + # skip foll. command katkamrachana 21Apr2017 + # Instead run python manage.py fillCounter + # restore_counter_objects(rcs_counters_path) + + +def copy_media_data(media_path): + # MEDIA_ROOT is destination usually: /data/media/ + # media_path is "dump-data/data/media" + if os.path.exists(media_path): + media_copy_cmd = "rsync -avzhP " + media_path + "/* " + MEDIA_ROOT + "/" + subprocess.Popen(media_copy_cmd,stderr=subprocess.STDOUT,shell=True) + log_file.write("\n Media Copied: " + str(media_path) ) + +# def core_import(non_grp_root_node=None, *args): +# global log_file +# log_file_path = create_log_file(DATA_RESTORE_PATH) +# print "\n Log will be found at: ", log_file_path +# log_file.write("\nUpdated CONFIG_VARIABLES: "+ str(CONFIG_VARIABLES)) +# print "\n Validating the data-dump" +# validate_data_dump(*args) +# print "\n Checking the dump Group-id availability." +# check_group_availability(*args) +# print "\n User Restoration." +# user_objs_restoration(*args) +# print "\n Factory Schema Restoration. Please wait.." +# # print "\n SCHEMA: ", SCHEMA_ID_MAP +# call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'),non_grp_root_node) +# copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media')) + +class Command(BaseCommand): + def handle(self, *args, **options): + + global DATA_RESTORE_PATH + global DATA_DUMP_PATH + global SCHEMA_ID_MAP + if args and len(args) == 4: + DATA_RESTORE_PATH = args[0] + else: + DATA_RESTORE_PATH = raw_input("\n\tEnter absolute path of data-dump folder to restore:") + print "\nDATA_RESTORE_PATH: ", DATA_RESTORE_PATH + if os.path.exists(DATA_RESTORE_PATH): + # Check if DATA_DUMP_PATH has dump, if not then its dump of Node holding Groups. + if os.path.exists(os.path.join(DATA_RESTORE_PATH, 'dump')): + # Single Group Dump + DATA_DUMP_PATH = os.path.join(DATA_RESTORE_PATH, 'dump') + SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) + read_config_file() + core_import(None,*args) + else: + # Multi Group Dump + # Get the dumps of Groups and loop over each dump to import + # gd == group-dump + print "\n***** NON Group Dump found. *****\n" + global GROUP_CONTAINERS + GRP_CONTAINERS_CUR = node_collection.find({'name': {'$in': GROUP_CONTAINERS}, + '_type': 'GSystemType'}) + GRP_CONTAINERS_IDS = [cont._id for cont in GRP_CONTAINERS_CUR] + SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) + dump_dir = [os.path.join(DATA_RESTORE_PATH,gd) for gd in os.listdir(DATA_RESTORE_PATH) if os.path.isdir(os.path.join(DATA_RESTORE_PATH,gd))] + print "\n Total Groups to be Restored: ", len(dump_dir) + for each_gd_abs_path in dump_dir: + # Call this tmw + # SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_DUMP_PATH) + DATA_DUMP_PATH = os.path.join(each_gd_abs_path, 'dump') + DATA_RESTORE_PATH = each_gd_abs_path + read_config_file() + + non_grp_root_node_obj = node_collection.one({ + '_id': ObjectId(CONFIG_VARIABLES.ROOT_DUMP_NODE_ID) + }) + if non_grp_root_node_obj: + core_import((CONFIG_VARIABLES.ROOT_DUMP_NODE_ID,CONFIG_VARIABLES.ROOT_DUMP_NODE_NAME),*args) + else: + non_grp_root_node_obj = node_collection.one({ + 'name': CONFIG_VARIABLES.ROOT_DUMP_NODE_NAME, + 'member_of': {'$in': GRP_CONTAINERS_IDS}}) + + if non_grp_root_node_obj: + # if non_grp_root_node_obj._id != ObjectId(CONFIG_VARIABLES.ROOT_DUMP_NODE_ID): + # # Module exists, but ID is different + # core_import(None,*args) + # else: + core_import((CONFIG_VARIABLES.ROOT_DUMP_NODE_ID,CONFIG_VARIABLES.ROOT_DUMP_NODE_NAME),*args) + else: + core_import(None,*args) + + # print "\n each_gd_abs_path: ", os.path.join(DATA_RESTORE_PATH,each_gd_abs_path) + print "*"*70 + # print "\n Export will be found at: ", DATA_EXPORT_PATH + print "\n This will take few minutes. Please be patient.\n" + print "*"*70 + + else: + print "\n No dump found at entered path." + call_exit() + +def restore_node(filepath, non_grp_root_node=None): + ''' + non_grp_root_node tuple (ObjectId, name) is used if the GSystem existing on target + and we intend to skip the dumped-node-id having the name + and member_of same but that differ in ObjectId. + (dumped_node_id, exisiting_node_id) + ''' + global log_file + log_file.write("\nRestoring Node: " + str(filepath)) + + node_json = get_json_file(filepath) + print node_json + proceed_flag = True + try: + if non_grp_root_node: + log_file.write("\n non_grp_root_node: " + str(non_grp_root_node)) + if non_grp_root_node[0] == node_json['_id']: + log_file.write("\n Found by ID non_grp_root_node: ") + root_node_obj = node_collection.one({'_type': 'GSystem', + '_id': ObjectId(node_json['_id']), + }) + merged_collection_set_ids = map(ObjectId,list(set(root_node_obj.collection_set + node_json['collection_set']))) + merged_collection_set_cur = node_collection.find({'_id': {'$in': merged_collection_set_ids}}) + valid_collection_set_id = [coll_set_node._id for coll_set_node in merged_collection_set_cur] + root_node_obj.collection_set = valid_collection_set_id + root_node_obj.save() + elif non_grp_root_node[1] == node_json['name']: + global GROUP_CONTAINERS + GRP_CONTAINERS_CUR = node_collection.find({'name': {'$in': GROUP_CONTAINERS}, + '_type': 'GSystemType'}) + GRP_CONTAINERS_IDS = [cont._id for cont in GRP_CONTAINERS_CUR] + log_file.write("\n Found by Name non_grp_root_node: ") + root_node_obj = node_collection.one({'_type': 'GSystem', + 'name': non_grp_root_node[1], + 'member_of': {'$in': GRP_CONTAINERS_IDS} + }) + merged_collection_set_ids = map(ObjectId,list(set(root_node_obj.collection_set + node_json['collection_set']))) + merged_collection_set_cur = node_collection.find({'_id': {'$in': merged_collection_set_ids}}) + valid_collection_set_id = [coll_set_node._id for coll_set_node in merged_collection_set_cur] + root_node_obj.collection_set = valid_collection_set_id + root_node_obj.save() + proceed_flag = False + if proceed_flag: + node_obj = node_collection.one({'_id': ObjectId(node_json['_id'])}) + if node_obj: + node_obj = update_schema_and_user_ids(node_obj) + if SCHEMA_ID_MAP: + _mapper(node_obj, 'member_of', SCHEMA_ID_MAP, is_list=True) + _mapper(node_obj, 'type_of', SCHEMA_ID_MAP, is_list=True) + + log_file.write("\nFound Existing Node : " + str(node_obj._id)) + node_changed = False + if node_obj.author_set != node_json['author_set'] and node_json['author_set']: + log_file.write("\n Old author_set :\n\t " + str(node_obj.author_set)) + node_obj.author_set = merge_lists_and_maintain_unique_ele(node_obj.author_set, + node_json['author_set']) + log_file.write("\n New author_set :\n\t "+ str(node_obj.author_set)) + node_changed = True + + if node_obj.relation_set != node_json['relation_set'] and node_json['relation_set']: + log_file.write("\n Old relation_set :\n\t "+ str(node_obj.relation_set)) + node_obj.relation_set = merge_lists_and_maintain_unique_ele(node_obj.relation_set, + node_json['relation_set'], advanced_merge=True) + log_file.write("\n New relation_set :\n\t "+ str(node_obj.relation_set)) + node_changed = True + + if node_obj.attribute_set != node_json['attribute_set'] and node_json['attribute_set']: + log_file.write("\n Old attribute_set :\n\t "+ str(node_obj.attribute_set)) + node_obj.attribute_set = merge_lists_and_maintain_unique_ele(node_obj.attribute_set, + node_json['attribute_set'], advanced_merge=True) + log_file.write("\n New attribute_set :\n\t "+ str(node_obj.attribute_set)) + node_changed = True + + if node_obj.post_node != node_json['post_node'] and node_json['post_node']: + log_file.write("\n Old post_node :\n\t "+ str(node_obj.post_node)) + node_obj.post_node = merge_lists_and_maintain_unique_ele(node_obj.post_node, + node_json['post_node']) + log_file.write("\n New post_node :\n\t "+ str(node_obj.post_node)) + node_changed = True + + # if node_obj.group_set != node_json['group_set'] and node_json['group_set']: + # log_file.write("\n Old group_set :\n\t "+ str(node_obj.group_set)) + # node_obj.group_set = merge_lists_and_maintain_unique_ele(node_obj.group_set, + # node_json['group_set']) + # log_file.write("\n New group_set :\n\t "+ str(node_obj.group_set)) + # node_changed = True + + if node_obj.prior_node != node_json['prior_node'] and node_json['prior_node']: + log_file.write("\n Old prior_node :\n\t "+ str(node_obj.prior_node)) + node_obj.prior_node = merge_lists_and_maintain_unique_ele(node_obj.prior_node, + node_json['prior_node']) + log_file.write("\n New prior_node :\n\t "+ str(node_obj.prior_node)) + node_changed = True + + if node_obj.origin != node_json['origin'] and node_json['origin']: + log_file.write("\n Old origin :\n\t "+ str(node_obj.origin)) + node_obj.origin = merge_lists_and_maintain_unique_ele(node_obj.origin, + node_json['origin']) + log_file.write("\n New origin :\n\t "+ str(node_obj.origin)) + node_changed = True + + # if node_obj.collection_set != node_json['collection_set'] and node_json['collection_set']: + # log_file.write("\n Old collection_set :\n\t "+ str(node_obj.collection_set)) + # log_file.write("\n Requested collection_set :\n\t "+ str(node_json['collection_set'])) + + # # node_obj.collection_set = merge_lists_and_maintain_unique_ele(node_obj.collection_set, + # # node_json['collection_set']) + # node_obj.collection_set = node_json['collection_set'] + # log_file.write("\n New collection_set :\n\t "+ str(node_obj.collection_set)) + # node_changed = True + + if node_obj.content != node_json['content'] and node_json['content']: + log_file.write("\n Old content :\n\t "+ str(node_obj.content)) + node_obj.content = node_json['content'] + node_changed = True + log_file.write("\n New content :\n\t "+ str(node_obj.content)) + + log_file.write("\n Old collection_set :\n\t "+ str(node_obj.collection_set)) + log_file.write("\n Requested collection_set :\n\t "+ str(node_json['collection_set'])) + + # node_obj.collection_set = merge_lists_and_maintain_unique_ele(node_obj.collection_set, + # node_json['collection_set']) + node_obj.collection_set = node_json['collection_set'] + log_file.write("\n New collection_set :\n\t "+ str(node_obj.collection_set)) + node_changed = True + + log_file.write("\n Old group_set :\n\t "+ str(node_obj.group_set)) + + if ObjectId(CONFIG_VARIABLES.GROUP_ID) not in node_obj.group_set: + node_obj.group_set.append(ObjectId(CONFIG_VARIABLES.GROUP_ID)) + + # node_obj.group_set = [ObjectId(CONFIG_VARIABLES.GROUP_ID)] + log_file.write("\n New group_set :\n\t "+ str(node_obj.group_set)) + node_obj.access_policy = u'PUBLIC' + log_file.write("\n Setting access_policy: u'PUBLIC'") + node_changed = True + + if node_changed: + log_file.write("\n Node Updated: \n\t OLD: " + str(node_obj) + "\n\tNew: "+str(node_json)) + node_obj.save() + else: + copy_version_file(filepath) + log_file.write("\n RCS file copied : \n\t" + str(filepath)) + node_json = update_schema_and_user_ids(node_json) + node_json = update_group_set(node_json) + try: + log_file.write("\n Inserting Node doc : \n\t" + str(node_json)) + node_id = node_collection.collection.insert(node_json) + node_obj = node_collection.one({'_id': node_id}) + node_obj.save(groupid=ObjectId(CONFIG_VARIABLES.GROUP_ID)) + log_file.write("\nUpdate RCS using save()") + except Exception as node_insert_err: + log_file.write("\nError while inserting Node obj" + str(node_insert_err)) + pass + except Exception as restore_node_obj_err: + print "\n Error in restore_node_obj_err: ", restore_node_obj_err + log_file.write("\nOuter Error while inserting Node obj" + str(restore_node_obj_err)) + pass + +# def parse_datetime_values(d): +# # This decoder will be moved to models next to class NodeJSONEncoder +# if u'uploaded_at' in d: +# d['uploaded_at'] = datetime.datetime.fromtimestamp(d['uploaded_at']/1e3) +# if u'last_update' in d: +# d['last_update'] = datetime.datetime.fromtimestamp(d['last_update']/1e3) +# if u'created_at' in d: +# d['created_at'] = datetime.datetime.fromtimestamp(d['created_at']/1e3) +# if u'attribute_type' in d or u'relation_type' in d: +# d = update_schema_id_for_triple(d) +# if u'attribute_type' in d: +# if d['attribute_type'] in DATE_AT_IDS: +# d['object_value'] = datetime.datetime.fromtimestamp(d['object_value']/1e3) +# if u'attribute_set' in d: +# for each_attr_dict in d['attribute_set']: +# for each_key, each_val in each_attr_dict.iteritems(): +# if each_key in ["start_time", "end_time", "start_enroll", "end_enroll"]: +# each_attr_dict[each_key] = datetime.datetime.fromtimestamp(each_val/1e3) +# return d + +def parse_json_values(d): + # This decoder will be moved to models next to class NodeJSONEncoder + if u'uploaded_at' in d: + d[u'uploaded_at'] = datetime.datetime.fromtimestamp(d[u'uploaded_at']/1e3) + if u'last_update' in d: + d[u'last_update'] = datetime.datetime.fromtimestamp(d[u'last_update']/1e3) + if u'created_at' in d: + d[u'created_at'] = datetime.datetime.fromtimestamp(d[u'created_at']/1e3) + if u'attribute_type' in d or u'relation_type' in d: + d = update_schema_id_for_triple(d) + if u'attribute_type' in d: + if d[u'attribute_type'] in DATE_AT_IDS: + d[u'object_value'] = datetime.datetime.fromtimestamp(d[u'object_value']/1e3) + if u'attribute_set' in d: + for each_attr_dict in d[u'attribute_set']: + for each_key, each_val in each_attr_dict.iteritems(): + if each_key in [u"start_time", u"end_time", u"start_enroll", u"end_enroll"]: + each_attr_dict[each_key] = datetime.datetime.fromtimestamp(each_val/1e3) + return d + + +def get_json_file(filepath): + + # this will create a .json file of the document(node) + # at manage.py level + # Returns json and rcs filepath + try: + rcs.checkout(filepath) + fp = filepath.split('/')[-1] + # fp = filepath + if fp.endswith(',v'): + fp = fp.split(',')[0] + with open(fp, 'r') as version_file: + obj_as_json = json.loads(version_file.read(), object_hook=json_util.object_hook) + parse_json_values(obj_as_json) + rcs.checkin(fp) + # os.remove(fp) + return obj_as_json + except Exception as get_json_err: + print "Exception while getting JSON: ", get_json_err + pass diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py new file mode 100644 index 0000000000..a75a3acab4 --- /dev/null +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py @@ -0,0 +1,230 @@ +import os +import datetime +try: + from bson import ObjectId +except ImportError: # old pymongo + from pymongo.objectid import ObjectId +from functools import reduce +import operator +from django.contrib.auth.models import User +from django.template.defaultfilters import slugify +from django.core.management.base import BaseCommand, CommandError +from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection, GSystemType +from gnowsys_ndf.ndf.models import HistoryManager +from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID, GSTUDIO_INSTITUTE_ID +from schema_mapping import create_factory_schema_mapper +from users_dump_restore import create_users_dump +from export_logic import create_log_file, write_md5_of_dump, get_counter_ids, dump_node +from gnowsys_ndf.ndf.views.methods import get_group_name_id +from gnowsys_ndf.ndf.templatetags.simple_filters import get_latest_git_hash, get_active_branch_name + +UNIT_IDS = [] +UNIT_NAMES = [] +GLOBAL_DICT = None +DUMP_PATH = None +TOP_PATH = None +GROUP_ID = None +DATA_EXPORT_PATH = None +MEDIA_EXPORT_PATH = None +SCHEMA_MAP_PATH = None +log_file_path = None + +def call_exit(): + print "\nExiting..." + os._exit(0) + + +def setup_dump_path(): + ''' + Creates factory_schema.json which will hold basic info + like ObjectId, name, type of TYPES_LIST and GSTUDIO_DEFAULT_GROUPS + ''' + global DUMP_PATH + global TOP_PATH + global DATA_EXPORT_PATH + global MEDIA_EXPORT_PATH + DUMP_PATH = TOP_PATH + DATA_EXPORT_PATH = os.path.join(DUMP_PATH, 'dump') + MEDIA_EXPORT_PATH = os.path.join(DATA_EXPORT_PATH, 'media_files') + if not os.path.exists(DATA_EXPORT_PATH): + os.makedirs(DATA_EXPORT_PATH) + if not os.path.exists(MEDIA_EXPORT_PATH): + os.makedirs(MEDIA_EXPORT_PATH) + return DATA_EXPORT_PATH + + +def update_globals(): + + global GLOBAL_DICT + global DUMP_PATH + global TOP_PATH + global DATA_EXPORT_PATH + global MEDIA_EXPORT_PATH + global RESTORE_USER_DATA + global SCHEMA_MAP_PATH + global log_file_path + + GLOBAL_DICT = { + "DUMP_PATH": DUMP_PATH, + "TOP_PATH": TOP_PATH, + "DATA_EXPORT_PATH": DATA_EXPORT_PATH, + "MEDIA_EXPORT_PATH": MEDIA_EXPORT_PATH, + "SCHEMA_MAP_PATH": SCHEMA_MAP_PATH, + "log_file_path": log_file_path, + } + +def create_configs_file(): + global DUMP_PATH + global UNIT_NAMES + global UNIT_IDS + configs_file_path = os.path.join(DUMP_PATH, "migration_configs.py") + with open(configs_file_path, 'w+') as configs_file_out: + configs_file_out.write("\nGSTUDIO_INSTITUTE_ID='" + str(GSTUDIO_INSTITUTE_ID) + "'") + configs_file_out.write("\nUNIT_NAMES='" + str(UNIT_NAMES) + "'") + configs_file_out.write("\nUNIT_IDS='" + str(UNIT_IDS) + "'") + configs_file_out.write("\nGIT_COMMIT_HASH='" + str(get_latest_git_hash()) + "'") + configs_file_out.write("\nGIT_BRANCH_NAME='" + str(get_active_branch_name()) + "'") + configs_file_out.write('\nSYSTEM_DETAILS="' + str(os.uname()) + '"') + return configs_file_path + +def pull_nodes(user_ids_list): + user_ids_list = map(int, user_ids_list) + all_nodes = node_collection.find({'_type': 'GSystem', 'created_by': {'$in': user_ids_list}, 'group_set': {'$in': UNIT_IDS}}) + print "\nArtifacts: ", all_nodes.count() + update_globals() + for each_node in all_nodes: + print ".", + dump_node(node=each_node,collection_name=node_collection,variables_dict=GLOBAL_DICT) + +class Command(BaseCommand): + def handle(self, *args, **options): + global UNIT_IDS + global UNIT_NAMES + global log_file + global log_file_path + global DATA_EXPORT_PATH + global SCHEMA_MAP_PATH + global TOP_PATH + print "\nUSER DATA EXPORT FOR : ", GSTUDIO_INSTITUTE_ID + ann_unit_gst_name, ann_unit_gst_id = GSystemType.get_gst_name_id(u"announced_unit") + if args: + try: + args_ids = map(ObjectId,args) + except Exception as e: + print "\n\nPlease enter Valid ObjectId." + call_exit() + all_ann_units_cur = node_collection.find({'_id': {'$in': args_ids}}) + for each_un in all_ann_units_cur: + UNIT_IDS.append(each_un._id) + UNIT_NAMES.append(each_un.name) + else: + all_ann_units_cur = node_collection.find({'member_of': ann_unit_gst_id}) + print "\nTotal Units : ", all_ann_units_cur.count() + for ind, each_ann_unit in enumerate(all_ann_units_cur, start=1): + unit_selection = raw_input("\n\t{0}. Unit: {1} \n\tEnter y/Y to select: ".format(ind, each_ann_unit.name)) + if unit_selection in ['y', 'Y']: + print "\t Yes" + UNIT_IDS.append(each_ann_unit._id) + UNIT_NAMES.append(each_ann_unit.name) + else: + print "\t No" + + print "\nUser Artifacts Data Export of following Units:" + print ("\n\t".join(["{0}. {1}".format(i,unit_name) for i, unit_name in enumerate(UNIT_NAMES, 1)])) + + proceed_flag = raw_input("\nEnter y/Y to Confirm: ") + if proceed_flag: + try: + + datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + log_file_name = 'artifacts_dump_' + str(GSTUDIO_INSTITUTE_ID) + "_"+ str(datetimestamp) + + TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export', log_file_name) + SCHEMA_MAP_PATH = TOP_PATH + + log_file_path = create_log_file(log_file_name) + setup_dump_path() + + + log_file = open(log_file_path, 'w+') + log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") + log_file.write("User Artifacts Data Export for Units: " + str(UNIT_IDS)) + + query = {'member_of': ann_unit_gst_id} + rec = node_collection.collection.aggregate([ + { "$match": query }, + { "$group": { + '_id': 0, + 'count': { '$sum': 1 } , + "author_set": { + "$addToSet": "$author_set" + }, + "group_admin": { + "$addToSet": "$group_admin" + } + },}, + + { "$project": { + '_id': 0, + 'total': '$count', + "user_ids": { + "$cond": [ + { + "$eq": [ + "$author_set", + [] + ] + }, + [], + "$author_set" + ] + }, + "admin_ids": { + "$cond": [ + { + "$eq": [ + "$group_admin", + [] + ] + }, + [], + "$group_admin" + ] + } + + } + } + ]) + + for e in rec['result']: + user_ids_lists = e['user_ids'] + admin_ids_lists = e['admin_ids'] + user_id_list = reduce(operator.concat, user_ids_lists) + admin_id_list = reduce(operator.concat, admin_ids_lists) + non_admin_user_id_list = list(set(user_id_list) - set(admin_id_list)) + + if non_admin_user_id_list: + log_file.write("Users ids: " + str(non_admin_user_id_list)) + pull_nodes(non_admin_user_id_list) + create_users_dump(DATA_EXPORT_PATH, user_id_list) + get_counter_ids(user_ids=user_id_list) + create_factory_schema_mapper(SCHEMA_MAP_PATH) + configs_file_path = create_configs_file() + write_md5_of_dump(DATA_EXPORT_PATH, configs_file_path) + else: + log_file.write("No users with non-admin rights found.") + except Exception as user_data_export_err: + log_file.write("Error occurred: " + str(user_data_export_err)) + pass + finally: + log_file.write("\n*************************************************************") + log_file.write("\n######### Script Completed at : " + str(datetime.datetime.now()) + " #########\n\n") + print "\nSTART : ", str(datetimestamp) + print "\nEND : ", str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + print "*"*70 + print "\n Log will be found at: ", log_file_path + print "*"*70 + log_file.close() + call_exit() + else: + call_exit() \ No newline at end of file From d59ff71033a3542f6373bcc1f180ca71e3248fd4 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Thu, 4 Jan 2018 17:49:43 +0530 Subject: [PATCH 07/13] import script modularization. --- .../ndf/management/commands/group_import.py | 84 +++++++------- .../ndf/management/commands/import_logic.py | 103 +++++------------- 2 files changed, 66 insertions(+), 121 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py index 5f2f2dc78c..39be17a14b 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py @@ -66,35 +66,26 @@ def call_exit(): print "\n Exiting..." os._exit(0) -def read_config_file(): - """ - Read migration_configs.py file generated during - the export of group and load the variables in - CONFIG_VARIABLES to be accessible in entire program - """ - global CONFIG_VARIABLES - CONFIG_VARIABLES = imp.load_source('config_variables', - os.path.join(DATA_RESTORE_PATH,'migration_configs.py')) - -def get_file_path_with_id(node_id): - file_name = (node_id + '.json') + +# def get_file_path_with_id(node_id): +# file_name = (node_id + '.json') - collection_dir = os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo', 'Nodes') - - # Example: - # if -- file_name := "523f59685a409213818e3ec6.json" - # then -- collection_hash_dirs := "6/c/3/8/ - # -- from last (2^0)pos/(2^1)pos/(2^2)pos/(2^3)pos/../(2^n)pos" - # here n := hash_level_num - collection_hash_dirs = "" - for pos in range(0, RCS_REPO_DIR_HASH_LEVEL): - collection_hash_dirs += \ - (node_id[-2**pos] + "/") - file_path = \ - os.path.join(collection_dir, \ - (collection_hash_dirs + file_name)) - # print "\n\nfilepath: ", file_path - return file_path +# collection_dir = os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo', 'Nodes') + +# # Example: +# # if -- file_name := "523f59685a409213818e3ec6.json" +# # then -- collection_hash_dirs := "6/c/3/8/ +# # -- from last (2^0)pos/(2^1)pos/(2^2)pos/(2^3)pos/../(2^n)pos" +# # here n := hash_level_num +# collection_hash_dirs = "" +# for pos in range(0, RCS_REPO_DIR_HASH_LEVEL): +# collection_hash_dirs += \ +# (node_id[-2**pos] + "/") +# file_path = \ +# os.path.join(collection_dir, \ +# (collection_hash_dirs + file_name)) +# # print "\n\nfilepath: ", file_path +# return file_path def check_group_availability(*args): group_node = node_collection.one({'_id': ObjectId(CONFIG_VARIABLES.GROUP_ID)}) @@ -114,7 +105,7 @@ def check_group_availability(*args): log_file.write("\n Group with Restore Group ID is FOUND on Target system.") call_exit() else: - fp = get_file_path_with_id(CONFIG_VARIABLES.GROUP_ID) + fp = get_file_path_with_id(CONFIG_VARIABLES.GROUP_ID, DATA_DUMP_PATH) if fp: if not fp.endswith(',v'): fp = fp + ',v' @@ -137,7 +128,7 @@ def check_group_availability(*args): print " Cancelling to restore." call_exit() else: - fp = get_file_path_with_id(CONFIG_VARIABLES.GROUP_ID) + fp = get_file_path_with_id(CONFIG_VARIABLES.GROUP_ID, DATA_DUMP_PATH) if fp: if not fp.endswith(',v'): fp = fp + ',v' @@ -196,7 +187,8 @@ def user_objs_restoration(*args): DEFAULT_USER_ID = 1 print "\n No RESTORE_USER_DATA available. Setting Default user with id: 1" log_file.write("\n No RESTORE_USER_DATA available. Setting Default user with id :" + str(DEFAULT_USER_SET)) -''' + + def update_schema_id_for_triple(document_json): if SCHEMA_ID_MAP: global log_file @@ -210,6 +202,7 @@ def update_schema_id_for_triple(document_json): document_json[u'attribute_type'] = SCHEMA_ID_MAP[document_json[u'attribute_type']] log_file.write("\nNEW attribute_type id " + str(document_json[u'attribute_type'])) return document_json +''' def update_group_set(document_json): if 'group_set' in document_json: @@ -217,6 +210,7 @@ def update_group_set(document_json): document_json['group_set'].append(ObjectId(CONFIG_VARIABLES.GROUP_ID)) return document_json +''' def _mapper(json_obj, key, MAP_obj, is_list=False): log_file.write("\n Calling _mapper:\n\t " + str(json_obj)+ str(key)+ str(MAP_obj)+ str(is_list)) @@ -227,7 +221,9 @@ def _mapper(json_obj, key, MAP_obj, is_list=False): replace_in_list(json_obj[key],eu, MAP_obj[eu]) else: json_obj[key] = MAP_obj[json_obj[key]] +''' +''' def update_schema_and_user_ids(document_json): log_file.write("\n Invoked update_schema_and_user_ids:\n\t " + str(document_json)) global DEFAULT_USER_SET @@ -255,10 +251,8 @@ def update_schema_and_user_ids(document_json): log_file.write("\n Finished update_schema_and_user_ids:\n\t " + str(document_json)) return document_json - ''' - else: - Schema is same. No updation required. - ''' + # else: + # Schema is same. No updation required. def copy_version_file(filepath): if os.path.exists(filepath): @@ -271,7 +265,6 @@ def copy_version_file(filepath): subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) os.chdir(cwd_path) - def restore_filehive_objects(rcs_filehives_path): print "\nRestoring Filehives.." global log_file @@ -505,9 +498,11 @@ def restore_counter_objects(rcs_counters_path): except Exception as counter_insert_err: log_file.write("\nError while inserting Counter obj" + str(counter_insert_err)) pass +''' def call_group_import(rcs_repo_path,non_grp_root_node=None): + global DATA_RESTORE_PATH rcs_filehives_path = os.path.join(rcs_repo_path, "Filehives") rcs_nodes_path = os.path.join(rcs_repo_path, "Nodes") rcs_triples_path = os.path.join(rcs_repo_path, "Triples") @@ -515,14 +510,14 @@ def call_group_import(rcs_repo_path,non_grp_root_node=None): # Following sequence is IMPORTANT # restore_filehive_objects(rcs_filehives_path) - restore_node_objects(rcs_nodes_path, non_grp_root_node) + restore_node_objects(rcs_nodes_path, log_file_path, DATA_RESTORE_PATH, non_grp_root_node) restore_triple_objects(rcs_triples_path) # skip foll. command katkamrachana 21Apr2017 # Instead run python manage.py fillCounter # restore_counter_objects(rcs_counters_path) - +''' def copy_media_data(media_path): # MEDIA_ROOT is destination usually: /data/media/ # media_path is "dump-data/data/media" @@ -530,11 +525,13 @@ def copy_media_data(media_path): media_copy_cmd = "rsync -avzhP " + media_path + "/* " + MEDIA_ROOT + "/" subprocess.Popen(media_copy_cmd,stderr=subprocess.STDOUT,shell=True) log_file.write("\n Media Copied: " + str(media_path) ) +''' def core_import(non_grp_root_node=None, *args): global log_file global log_file_path - log_file_name = 'group_restore_' + str(CONFIG_VARIABLES.GROUP_ID)+ '.log' + datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + log_file_name = 'artifacts_restore_' + str(GSTUDIO_INSTITUTE_ID) + "_"+ str(datetimestamp) log_file_path = create_log_file(log_file_name) log_file = open(log_file_path, 'w+') log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") @@ -560,6 +557,7 @@ def handle(self, *args, **options): global DATA_DUMP_PATH global SCHEMA_ID_MAP global log_file_path + global CONFIG_VARIABLES if args and len(args) == 4: DATA_RESTORE_PATH = args[0] else: @@ -571,7 +569,7 @@ def handle(self, *args, **options): # Single Group Dump DATA_DUMP_PATH = os.path.join(DATA_RESTORE_PATH, 'dump') SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) - read_config_file() + CONFIG_VARIABLES = read_config_file(DATA_RESTORE_PATH) core_import(None,*args) else: # Multi Group Dump @@ -590,7 +588,7 @@ def handle(self, *args, **options): # SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_DUMP_PATH) DATA_DUMP_PATH = os.path.join(each_gd_abs_path, 'dump') DATA_RESTORE_PATH = each_gd_abs_path - read_config_file() + CONFIG_VARIABLES = read_config_file(DATA_RESTORE_PATH) non_grp_root_node_obj = node_collection.one({ '_id': ObjectId(CONFIG_VARIABLES.ROOT_DUMP_NODE_ID) @@ -798,7 +796,7 @@ def restore_node(filepath, non_grp_root_node=None): # if each_key in ["start_time", "end_time", "start_enroll", "end_enroll"]: # each_attr_dict[each_key] = datetime.datetime.fromtimestamp(each_val/1e3) # return d - +''' def parse_json_values(d): # This decoder will be moved to models next to class NodeJSONEncoder if u'uploaded_at' in d: @@ -819,7 +817,6 @@ def parse_json_values(d): each_attr_dict[each_key] = datetime.datetime.fromtimestamp(each_val/1e3) return d - def get_json_file(filepath): # this will create a .json file of the document(node) @@ -840,3 +837,4 @@ def get_json_file(filepath): except Exception as get_json_err: print "Exception while getting JSON: ", get_json_err pass +''' diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py index 5c38ed2d67..f2148ed150 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py @@ -77,7 +77,7 @@ def call_exit(): print "\n Exiting..." os._exit(0) -def read_config_file(): +def read_config_file(DATA_RESTORE_PATH): """ Read migration_configs.py file generated during the export of group and load the variables in @@ -86,6 +86,8 @@ def read_config_file(): global CONFIG_VARIABLES CONFIG_VARIABLES = imp.load_source('config_variables', os.path.join(DATA_RESTORE_PATH,'migration_configs.py')) + print "\nCONFIG_VARIABLES: ", CONFIG_VARIABLES + return CONFIG_VARIABLES def validate_data_dump(dump, md5, *args): """ @@ -120,7 +122,7 @@ def validate_data_dump(dump, md5, *args): if proceed_with_validation in ['y', 'Y']: log_file.write("\n Checksum validation Success on dump data") -def get_file_path_with_id(node_id): +def get_file_path_with_id(node_id, DATA_DUMP_PATH): file_name = (node_id + '.json') collection_dir = os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo', 'Nodes') @@ -255,11 +257,11 @@ def update_schema_id_for_triple(document_json): log_file.write("\nNEW attribute_type id " + str(document_json[u'attribute_type'])) return document_json -def update_group_set(document_json): - if 'group_set' in document_json: - if ObjectId(CONFIG_VARIABLES.GROUP_ID) not in document_json['group_set']: - document_json['group_set'].append(ObjectId(CONFIG_VARIABLES.GROUP_ID)) - return document_json +# def update_group_set(document_json): +# if 'group_set' in document_json: +# if ObjectId(CONFIG_VARIABLES.GROUP_ID) not in document_json['group_set']: +# document_json['group_set'].append(ObjectId(CONFIG_VARIABLES.GROUP_ID)) +# return document_json def _mapper(json_obj, key, MAP_obj, is_list=False): log_file.write("\n Calling _mapper:\n\t " + str(json_obj)+ str(key)+ str(MAP_obj)+ str(is_list)) @@ -345,14 +347,20 @@ def restore_filehive_objects(rcs_filehives_path): log_file.write("\nFound Existing Filehive Object : \n\tFound-obj: " + \ str(fh_obj) + "\n\tExiting-obj: "+str(fh_json)) -def restore_node_objects(rcs_nodes_path, non_grp_root_node=None): +def restore_node_objects(rcs_nodes_path, req_log_file_path, data_restore_path, non_grp_root_node=None): print "\nRestoring Nodes.." global log_file + global DATA_RESTORE_PATH + DATA_RESTORE_PATH = data_restore_path + global log_file_path + log_file_path = req_log_file_path + log_file = open(log_file_path, 'w+') + log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") log_file.write("\nRestoring Nodes. ") for dir_, _, files in os.walk(rcs_nodes_path): for filename in files: filepath = os.path.join(dir_, filename) - restore_node(filepath,non_grp_root_node) + restore_node(filepath, non_grp_root_node) def restore_triple_objects(rcs_triples_path): print "\nRestoring Triples.." @@ -564,7 +572,7 @@ def call_group_import(rcs_repo_path,non_grp_root_node=None): # skip foll. command katkamrachana 21Apr2017 # Instead run python manage.py fillCounter - # restore_counter_objects(rcs_counters_path) + restore_counter_objects(rcs_counters_path) def copy_media_data(media_path): @@ -591,72 +599,6 @@ def copy_media_data(media_path): # call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'),non_grp_root_node) # copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media')) -class Command(BaseCommand): - def handle(self, *args, **options): - - global DATA_RESTORE_PATH - global DATA_DUMP_PATH - global SCHEMA_ID_MAP - if args and len(args) == 4: - DATA_RESTORE_PATH = args[0] - else: - DATA_RESTORE_PATH = raw_input("\n\tEnter absolute path of data-dump folder to restore:") - print "\nDATA_RESTORE_PATH: ", DATA_RESTORE_PATH - if os.path.exists(DATA_RESTORE_PATH): - # Check if DATA_DUMP_PATH has dump, if not then its dump of Node holding Groups. - if os.path.exists(os.path.join(DATA_RESTORE_PATH, 'dump')): - # Single Group Dump - DATA_DUMP_PATH = os.path.join(DATA_RESTORE_PATH, 'dump') - SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) - read_config_file() - core_import(None,*args) - else: - # Multi Group Dump - # Get the dumps of Groups and loop over each dump to import - # gd == group-dump - print "\n***** NON Group Dump found. *****\n" - global GROUP_CONTAINERS - GRP_CONTAINERS_CUR = node_collection.find({'name': {'$in': GROUP_CONTAINERS}, - '_type': 'GSystemType'}) - GRP_CONTAINERS_IDS = [cont._id for cont in GRP_CONTAINERS_CUR] - SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) - dump_dir = [os.path.join(DATA_RESTORE_PATH,gd) for gd in os.listdir(DATA_RESTORE_PATH) if os.path.isdir(os.path.join(DATA_RESTORE_PATH,gd))] - print "\n Total Groups to be Restored: ", len(dump_dir) - for each_gd_abs_path in dump_dir: - # Call this tmw - # SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_DUMP_PATH) - DATA_DUMP_PATH = os.path.join(each_gd_abs_path, 'dump') - DATA_RESTORE_PATH = each_gd_abs_path - read_config_file() - - non_grp_root_node_obj = node_collection.one({ - '_id': ObjectId(CONFIG_VARIABLES.ROOT_DUMP_NODE_ID) - }) - if non_grp_root_node_obj: - core_import((CONFIG_VARIABLES.ROOT_DUMP_NODE_ID,CONFIG_VARIABLES.ROOT_DUMP_NODE_NAME),*args) - else: - non_grp_root_node_obj = node_collection.one({ - 'name': CONFIG_VARIABLES.ROOT_DUMP_NODE_NAME, - 'member_of': {'$in': GRP_CONTAINERS_IDS}}) - - if non_grp_root_node_obj: - # if non_grp_root_node_obj._id != ObjectId(CONFIG_VARIABLES.ROOT_DUMP_NODE_ID): - # # Module exists, but ID is different - # core_import(None,*args) - # else: - core_import((CONFIG_VARIABLES.ROOT_DUMP_NODE_ID,CONFIG_VARIABLES.ROOT_DUMP_NODE_NAME),*args) - else: - core_import(None,*args) - - # print "\n each_gd_abs_path: ", os.path.join(DATA_RESTORE_PATH,each_gd_abs_path) - print "*"*70 - # print "\n Export will be found at: ", DATA_EXPORT_PATH - print "\n This will take few minutes. Please be patient.\n" - print "*"*70 - - else: - print "\n No dump found at entered path." - call_exit() def restore_node(filepath, non_grp_root_node=None): ''' @@ -666,10 +608,15 @@ def restore_node(filepath, non_grp_root_node=None): (dumped_node_id, exisiting_node_id) ''' global log_file + global SCHEMA_ID_MAP + global DATA_RESTORE_PATH + if not SCHEMA_ID_MAP: + SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) + log_file.write("\nRestoring Node: " + str(filepath)) node_json = get_json_file(filepath) - print node_json + # print node_json proceed_flag = True try: if non_grp_root_node: @@ -802,7 +749,7 @@ def restore_node(filepath, non_grp_root_node=None): copy_version_file(filepath) log_file.write("\n RCS file copied : \n\t" + str(filepath)) node_json = update_schema_and_user_ids(node_json) - node_json = update_group_set(node_json) + # node_json = update_group_set(node_json) try: log_file.write("\n Inserting Node doc : \n\t" + str(node_json)) node_id = node_collection.collection.insert(node_json) From c7a03c676592250844938744f79f829d07d62ac0 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Thu, 4 Jan 2018 18:27:28 +0530 Subject: [PATCH 08/13] import call moved to core-import --- .../ndf/management/commands/group_import.py | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py index 39be17a14b..eb0095c764 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py @@ -91,6 +91,7 @@ def check_group_availability(*args): group_node = node_collection.one({'_id': ObjectId(CONFIG_VARIABLES.GROUP_ID)}) global log_file global DEFAULT_USER_ID + global DATA_RESTORE_PATH print '\n\n Restoring Group' log_file.write("\n Restoring Group") @@ -110,7 +111,7 @@ def check_group_availability(*args): if not fp.endswith(',v'): fp = fp + ',v' log_file.write("\n Restoring Group: " + str(fp)) - restore_node(fp) + restore_node(fp, None, DATA_RESTORE_PATH, log_file_path) group_node = node_collection.one({'_id': ObjectId(CONFIG_VARIABLES.GROUP_ID)}) group_node.group_admin = [DEFAULT_USER_ID] group_node.save() @@ -133,7 +134,7 @@ def check_group_availability(*args): if not fp.endswith(',v'): fp = fp + ',v' log_file.write("\n Restoring Group: " + str(fp)) - restore_node(fp) + restore_node(fp, None, DATA_RESTORE_PATH, log_file_path) group_node = node_collection.one({'_id': ObjectId(CONFIG_VARIABLES.GROUP_ID)}) group_node.group_admin = [DEFAULT_USER_ID] group_node.save() @@ -500,22 +501,22 @@ def restore_counter_objects(rcs_counters_path): pass ''' -def call_group_import(rcs_repo_path,non_grp_root_node=None): +# def call_group_import(rcs_repo_path,non_grp_root_node=None): - global DATA_RESTORE_PATH - rcs_filehives_path = os.path.join(rcs_repo_path, "Filehives") - rcs_nodes_path = os.path.join(rcs_repo_path, "Nodes") - rcs_triples_path = os.path.join(rcs_repo_path, "Triples") - rcs_counters_path = os.path.join(rcs_repo_path, "Counters") +# global DATA_RESTORE_PATH +# rcs_filehives_path = os.path.join(rcs_repo_path, "Filehives") +# rcs_nodes_path = os.path.join(rcs_repo_path, "Nodes") +# rcs_triples_path = os.path.join(rcs_repo_path, "Triples") +# rcs_counters_path = os.path.join(rcs_repo_path, "Counters") - # Following sequence is IMPORTANT - # restore_filehive_objects(rcs_filehives_path) - restore_node_objects(rcs_nodes_path, log_file_path, DATA_RESTORE_PATH, non_grp_root_node) - restore_triple_objects(rcs_triples_path) +# # Following sequence is IMPORTANT +# # restore_filehive_objects(rcs_filehives_path) +# restore_node_objects(rcs_nodes_path, log_file_path, DATA_RESTORE_PATH, non_grp_root_node) +# restore_triple_objects(rcs_triples_path) - # skip foll. command katkamrachana 21Apr2017 - # Instead run python manage.py fillCounter - # restore_counter_objects(rcs_counters_path) +# # skip foll. command katkamrachana 21Apr2017 +# # Instead run python manage.py fillCounter +# # restore_counter_objects(rcs_counters_path) ''' def copy_media_data(media_path): @@ -547,7 +548,9 @@ def core_import(non_grp_root_node=None, *args): log_file.write(log_stmt) print "\n Factory Schema Restoration. Please wait.." # print "\n SCHEMA: ", SCHEMA_ID_MAP - call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'),non_grp_root_node) + call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'), + log_file_path, DATA_RESTORE_PATH, non_grp_root_node) + # call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'),non_grp_root_node) copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media')) class Command(BaseCommand): @@ -617,14 +620,12 @@ def handle(self, *args, **options): else: print "\n No dump found at entered path." call_exit() - +''' def restore_node(filepath, non_grp_root_node=None): - ''' - non_grp_root_node tuple (ObjectId, name) is used if the GSystem existing on target - and we intend to skip the dumped-node-id having the name - and member_of same but that differ in ObjectId. - (dumped_node_id, exisiting_node_id) - ''' + # non_grp_root_node tuple (ObjectId, name) is used if the GSystem existing on target + # and we intend to skip the dumped-node-id having the name + # and member_of same but that differ in ObjectId. + # (dumped_node_id, exisiting_node_id) global log_file log_file.write("\nRestoring Node: " + str(filepath)) @@ -776,7 +777,7 @@ def restore_node(filepath, non_grp_root_node=None): print "\n Error in restore_node_obj_err: ", restore_node_obj_err log_file.write("\nOuter Error while inserting Node obj" + str(restore_node_obj_err)) pass - +''' # def parse_datetime_values(d): # # This decoder will be moved to models next to class NodeJSONEncoder # if u'uploaded_at' in d: From 5cf45d295392eee9f1baded996bf516a3e4c5d67 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Thu, 4 Jan 2018 18:27:45 +0530 Subject: [PATCH 09/13] counter obj import by parsing every field value --- .../ndf/management/commands/import_logic.py | 191 ++++++++++++------ 1 file changed, 129 insertions(+), 62 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py index f2148ed150..ed07beba0d 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py @@ -347,15 +347,9 @@ def restore_filehive_objects(rcs_filehives_path): log_file.write("\nFound Existing Filehive Object : \n\tFound-obj: " + \ str(fh_obj) + "\n\tExiting-obj: "+str(fh_json)) -def restore_node_objects(rcs_nodes_path, req_log_file_path, data_restore_path, non_grp_root_node=None): +def restore_node_objects(rcs_nodes_path, non_grp_root_node=None): print "\nRestoring Nodes.." global log_file - global DATA_RESTORE_PATH - DATA_RESTORE_PATH = data_restore_path - global log_file_path - log_file_path = req_log_file_path - log_file = open(log_file_path, 'w+') - log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") log_file.write("\nRestoring Nodes. ") for dir_, _, files in os.walk(rcs_nodes_path): for filename in files: @@ -447,105 +441,163 @@ def restore_counter_objects(rcs_counters_path): # counter_obj.last_update = counter_json['last_update'] # counter_changed = True - if counter_obj.is_group_member != counter_json['is_group_member'] : - counter_obj.is_group_member = counter_json['is_group_member'] + if counter_obj.assessment != counter_json['assessment'] : + counter_obj.assessment = counter_json['assessment'] counter_changed = True - if counter_obj.modules_completed != counter_json['modules_completed'] : - counter_obj.modules_completed = counter_json['modules_completed'] + if counter_obj.assessment != counter_json['assessment'] : + counter_obj.assessment = counter_json['assessment'] counter_changed = True - if counter_obj.course_score != counter_json['course_score'] : - counter_obj.course_score = counter_json['course_score'] + if counter_obj['course']['modules']['completed'] != counter_json['course']['modules']['completed'] : + counter_obj['course']['modules']['completed'] = counter_json['course']['modules']['completed'] counter_changed = True - - if counter_obj.units_completed != counter_json['units_completed'] : - counter_obj.units_completed = counter_json['units_completed'] + if counter_obj['course']['units']['completed'] != counter_json['course']['units']['completed'] : + counter_obj['course']['units']['completed'] = counter_json['course']['units']['completed'] counter_changed = True - if counter_obj.no_comments_by_user != counter_json['no_comments_by_user'] : - counter_obj.no_comments_by_user = counter_json['no_comments_by_user'] + if counter_obj.group_points != counter_json['group_points'] : + counter_obj.group_points = counter_json['group_points'] counter_changed = True - if counter_obj.no_comments_for_user != counter_json['no_comments_for_user'] : - counter_obj.no_comments_for_user = counter_json['no_comments_for_user'] + if counter_obj.total_comments_by_user != counter_json['total_comments_by_user'] : + counter_obj.total_comments_by_user = counter_json['total_comments_by_user'] counter_changed = True - if counter_obj.no_files_created != counter_json['no_files_created'] : - counter_obj.no_files_created = counter_json['no_files_created'] + if counter_obj.visited_nodes != counter_json['visited_nodes'] : + counter_obj.visited_nodes = counter_json['visited_nodes'] counter_changed = True - if counter_obj.no_visits_gained_on_files != counter_json['no_visits_gained_on_files'] : - counter_obj.no_visits_gained_on_files = counter_json['no_visits_gained_on_files'] + if counter_obj['file']['avg_rating_gained'] != counter_json['file']['avg_rating_gained']: + counter_obj['file']['avg_rating_gained'] = counter_json['file']['avg_rating_gained'] counter_changed = True - if counter_obj.no_comments_received_on_files != counter_json['no_comments_received_on_files'] : - counter_obj.no_comments_received_on_files = counter_json['no_comments_received_on_files'] + if counter_obj['file']['commented_on_others_res'] != counter_json['file']['commented_on_others_res']: + counter_obj['file']['commented_on_others_res'] = counter_json['file']['commented_on_others_res'] counter_changed = True - if counter_obj.no_others_files_visited != counter_json['no_others_files_visited'] : - counter_obj.no_others_files_visited = counter_json['no_others_files_visited'] + if counter_obj['file']['comments_by_others_on_res'] != counter_json['file']['comments_by_others_on_res']: + counter_obj['file']['comments_by_others_on_res'] = counter_json['file']['comments_by_others_on_res'] counter_changed = True - if counter_obj.no_comments_on_others_files != counter_json['no_comments_on_others_files'] : - counter_obj.no_comments_on_others_files = counter_json['no_comments_on_others_files'] + if counter_obj['file']['comments_gained'] != counter_json['file']['comments_gained']: + counter_obj['file']['comments_gained'] = counter_json['file']['comments_gained'] counter_changed = True - if counter_obj.rating_count_received_on_files != counter_json['rating_count_received_on_files'] : - counter_obj.rating_count_received_on_files = counter_json['rating_count_received_on_files'] + if counter_obj['file']['created'] != counter_json['file']['created']: + counter_obj['file']['created'] = counter_json['file']['created'] counter_changed = True - if counter_obj.avg_rating_received_on_files != counter_json['avg_rating_received_on_files'] : - counter_obj.avg_rating_received_on_files = counter_json['avg_rating_received_on_files'] + if counter_obj['file']['rating_count_received'] != counter_json['file']['rating_count_received']: + counter_obj['file']['rating_count_received'] = counter_json['file']['rating_count_received'] counter_changed = True - if counter_obj.no_questions_attempted != counter_json['no_questions_attempted'] : - counter_obj.no_questions_attempted = counter_json['no_questions_attempted'] + if counter_obj['file']['visits_gained'] != counter_json['file']['visits_gained']: + counter_obj['file']['visits_gained'] = counter_json['file']['visits_gained'] counter_changed = True - if counter_obj.no_correct_answers != counter_json['no_correct_answers'] : - counter_obj.no_correct_answers = counter_json['no_correct_answers'] + if counter_obj['file']['visits_on_others_res'] != counter_json['file']['visits_on_others_res']: + counter_obj['file']['visits_on_others_res'] = counter_json['file']['visits_on_others_res'] counter_changed = True - if counter_obj.no_incorrect_answers != counter_json['no_incorrect_answers'] : - counter_obj.no_incorrect_answers = counter_json['no_incorrect_answers'] + if counter_obj['page']['blog']['avg_rating_gained'] != counter_json['page']['blog']['avg_rating_gained']: + counter_obj['page']['blog']['avg_rating_gained'] = counter_json['page']['blog']['avg_rating_gained'] counter_changed = True - if counter_obj.no_notes_written != counter_json['no_notes_written'] : - counter_obj.no_notes_written = counter_json['no_notes_written'] + if counter_obj['page']['blog']['commented_on_others_res'] != counter_json['page']['blog']['commented_on_others_res']: + counter_obj['page']['blog']['commented_on_others_res'] = counter_json['page']['blog']['commented_on_others_res'] counter_changed = True - if counter_obj.no_views_gained_on_notes != counter_json['no_views_gained_on_notes'] : - counter_obj.no_views_gained_on_notes = counter_json['no_views_gained_on_notes'] + if counter_obj['page']['blog']['comments_by_others_on_res'] != counter_json['page']['blog']['comments_by_others_on_res']: + counter_obj['page']['blog']['comments_by_others_on_res'] = counter_json['page']['blog']['comments_by_others_on_res'] counter_changed = True - if counter_obj.no_others_notes_visited != counter_json['no_others_notes_visited'] : - counter_obj.no_others_notes_visited = counter_json['no_others_notes_visited'] + if counter_obj['page']['blog']['comments_gained'] != counter_json['page']['blog']['comments_gained']: + counter_obj['page']['blog']['comments_gained'] = counter_json['page']['blog']['comments_gained'] counter_changed = True - if counter_obj.no_comments_received_on_notes != counter_json['no_comments_received_on_notes'] : - counter_obj.no_comments_received_on_notes = counter_json['no_comments_received_on_notes'] + if counter_obj['page']['blog']['created'] != counter_json['page']['blog']['created']: + counter_obj['page']['blog']['created'] = counter_json['page']['blog']['created'] counter_changed = True - if counter_obj.no_comments_on_others_notes != counter_json['no_comments_on_others_notes'] : - counter_obj.no_comments_on_others_notes = counter_json['no_comments_on_others_notes'] + if counter_obj['page']['blog']['rating_count_received'] != counter_json['page']['blog']['rating_count_received']: + counter_obj['page']['blog']['rating_count_received'] = counter_json['page']['blog']['rating_count_received'] counter_changed = True - if counter_obj.rating_count_received_on_notes != counter_json['rating_count_received_on_notes'] : - counter_obj.rating_count_received_on_notes = counter_json['rating_count_received_on_notes'] + if counter_obj['page']['blog']['visits_gained'] != counter_json['page']['blog']['visits_gained']: + counter_obj['page']['blog']['visits_gained'] = counter_json['page']['blog']['visits_gained'] counter_changed = True - if counter_obj.avg_rating_received_on_notes != counter_json['avg_rating_received_on_notes'] : - counter_obj.avg_rating_received_on_notes = counter_json['avg_rating_received_on_notes'] + if counter_obj['page']['blog']['visits_on_others_res'] != counter_json['page']['blog']['visits_on_others_res']: + counter_obj['page']['blog']['visits_on_others_res'] = counter_json['page']['blog']['visits_on_others_res'] counter_changed = True - if counter_obj.comments_by_others_on_files != counter_json['comments_by_others_on_files'] and counter_json['comments_by_others_on_files']: - n.comments_by_others_on_files.extend(counter_json['comments_by_others_on_files']) - counter_changed = True + if counter_obj['page']['info']['avg_rating_gained'] != counter_json['page']['info']['avg_rating_gained']: + counter_obj['page']['info']['avg_rating_gained'] = counter_json['page']['info']['avg_rating_gained'] + counter_changed = True + + if counter_obj['page']['info']['commented_on_others_res'] != counter_json['page']['info']['commented_on_others_res']: + counter_obj['page']['info']['commented_on_others_res'] = counter_json['page']['info']['commented_on_others_res'] + counter_changed = True + + if counter_obj['page']['info']['comments_by_others_on_res'] != counter_json['page']['info']['comments_by_others_on_res']: + counter_obj['page']['info']['comments_by_others_on_res'] = counter_json['page']['info']['comments_by_others_on_res'] + counter_changed = True - if counter_obj.comments_by_others_on_notes != counter_json['comments_by_others_on_notes'] and counter_json['comments_by_others_on_notes']: - n.comments_by_others_on_notes.extend(counter_json['comments_by_others_on_notes']) + if counter_obj['page']['info']['comments_gained'] != counter_json['page']['info']['comments_gained']: + counter_obj['page']['info']['comments_gained'] = counter_json['page']['info']['comments_gained'] + counter_changed = True + + if counter_obj['page']['info']['created'] != counter_json['page']['info']['created']: + counter_obj['page']['info']['created'] = counter_json['page']['info']['created'] + counter_changed = True + + if counter_obj['page']['info']['rating_count_received'] != counter_json['page']['info']['rating_count_received']: + counter_obj['page']['info']['rating_count_received'] = counter_json['page']['info']['rating_count_received'] + counter_changed = True + + if counter_obj['page']['info']['visits_gained'] != counter_json['page']['info']['visits_gained']: + counter_obj['page']['info']['visits_gained'] = counter_json['page']['info']['visits_gained'] + counter_changed = True + + if counter_obj['page']['info']['visits_on_others_res'] != counter_json['page']['info']['visits_on_others_res']: + counter_obj['page']['info']['visits_on_others_res'] = counter_json['page']['info']['visits_on_others_res'] + counter_changed = True + + if counter_obj['page']['wiki']['avg_rating_gained'] != counter_json['page']['wiki']['avg_rating_gained']: + counter_obj['page']['wiki']['avg_rating_gained'] = counter_json['page']['wiki']['avg_rating_gained'] + counter_changed = True + if counter_obj['page']['wiki']['commented_on_others_res'] != counter_json['page']['wiki']['commented_on_others_res']: + counter_obj['page']['wiki']['commented_on_others_res'] = counter_json['page']['wiki']['commented_on_others_res'] + counter_changed = True + if counter_obj['page']['wiki']['comments_by_others_on_res'] != counter_json['page']['wiki']['comments_by_others_on_res']: + counter_obj['page']['wiki']['comments_by_others_on_res'] = counter_json['page']['wiki']['comments_by_others_on_res'] + counter_changed = True + if counter_obj['page']['wiki']['comments_gained'] != counter_json['page']['wiki']['comments_gained']: + counter_obj['page']['wiki']['comments_gained'] = counter_json['page']['wiki']['comments_gained'] counter_changed = True + if counter_obj['page']['wiki']['created'] != counter_json['page']['wiki']['created']: + counter_obj['page']['wiki']['created'] = counter_json['page']['wiki']['created'] + counter_changed = True + if counter_obj['page']['wiki']['rating_count_received'] != counter_json['page']['wiki']['rating_count_received']: + counter_obj['page']['wiki']['rating_count_received'] = counter_json['page']['wiki']['rating_count_received'] + counter_changed = True + if counter_obj['page']['wiki']['visits_gained'] != counter_json['page']['wiki']['visits_gained']: + counter_obj['page']['wiki']['visits_gained'] = counter_json['page']['wiki']['visits_gained'] + counter_changed = True + if counter_obj['page']['wiki']['visits_on_others_res'] != counter_json['page']['wiki']['visits_on_others_res']: + counter_obj['page']['wiki']['visits_on_others_res'] = counter_json['page']['wiki']['visits_on_others_res'] + counter_changed = True + if counter_obj['quiz']['attempted'] != counter_json['quiz']['attempted']: + counter_obj['quiz']['attempted'] = counter_json['quiz']['attempted'] + counter_changed = True + if counter_obj['quiz']['correct'] != counter_json['quiz']['correct']: + counter_obj['quiz']['correct'] = counter_json['quiz']['correct'] + counter_changed = True + if counter_obj['quiz']['incorrect'] != counter_json['quiz']['incorrect']: + counter_obj['quiz']['incorrect'] = counter_json['quiz']['incorrect'] + counter_changed = True + if counter_changed: log_file.write("\n Counter Updated: \n\t OLD: " + str(counter_obj), + "\n\tNew: "+str(counter_json)) @@ -558,7 +610,14 @@ def restore_counter_objects(rcs_counters_path): log_file.write("\nError while inserting Counter obj" + str(counter_insert_err)) pass -def call_group_import(rcs_repo_path,non_grp_root_node=None): +def call_group_import(rcs_repo_path, req_log_file_path, data_restore_path, non_grp_root_node=None): + + global log_file_path + global DATA_RESTORE_PATH + log_file_path = req_log_file_path + DATA_RESTORE_PATH = data_restore_path + log_file = open(log_file_path, 'a+') + log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") rcs_filehives_path = os.path.join(rcs_repo_path, "Filehives") rcs_nodes_path = os.path.join(rcs_repo_path, "Nodes") @@ -600,7 +659,7 @@ def copy_media_data(media_path): # copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media')) -def restore_node(filepath, non_grp_root_node=None): +def restore_node(filepath, non_grp_root_node=None, data_restore_path=None, req_log_file_path=None): ''' non_grp_root_node tuple (ObjectId, name) is used if the GSystem existing on target and we intend to skip the dumped-node-id having the name @@ -610,6 +669,13 @@ def restore_node(filepath, non_grp_root_node=None): global log_file global SCHEMA_ID_MAP global DATA_RESTORE_PATH + global log_file_path + if not DATA_RESTORE_PATH and data_restore_path: + DATA_RESTORE_PATH = data_restore_path + if not log_file or log_file_path and req_log_file_path: + log_file_path = req_log_file_path + log_file = open(log_file_path, 'a+') + if not SCHEMA_ID_MAP: SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) @@ -754,7 +820,8 @@ def restore_node(filepath, non_grp_root_node=None): log_file.write("\n Inserting Node doc : \n\t" + str(node_json)) node_id = node_collection.collection.insert(node_json) node_obj = node_collection.one({'_id': node_id}) - node_obj.save(groupid=ObjectId(CONFIG_VARIABLES.GROUP_ID)) + if 'GROUP_ID' in CONFIG_VARIABLES: + node_obj.save(groupid=ObjectId(CONFIG_VARIABLES.GROUP_ID)) log_file.write("\nUpdate RCS using save()") except Exception as node_insert_err: log_file.write("\nError while inserting Node obj" + str(node_insert_err)) From 0e0f30279dfe116ecd8282655b4edf7fe50fb23b Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Fri, 5 Jan 2018 12:09:12 +0530 Subject: [PATCH 10/13] import logic for counters impl --- .../ndf/management/commands/group_import.py | 2 +- .../ndf/management/commands/import_logic.py | 15 +++++++++------ .../ndf/management/commands/user_data_export.py | 8 +++++--- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py index eb0095c764..380cf77a4b 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py @@ -539,7 +539,7 @@ def core_import(non_grp_root_node=None, *args): log_file.write("\nUpdated CONFIG_VARIABLES: "+ str(CONFIG_VARIABLES)) print "\n Validating the data-dump" print "\nDATA_DUMP_PATH: ", DATA_DUMP_PATH - validate_data_dump(DATA_DUMP_PATH,CONFIG_VARIABLES.MD5, *args) + log_file.write(validate_data_dump(DATA_DUMP_PATH,CONFIG_VARIABLES.MD5, *args)) print "\n Checking the dump Group-id availability." check_group_availability(*args) print "\n User Restoration." diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py index ed07beba0d..daa953626b 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py @@ -97,7 +97,7 @@ def validate_data_dump(dump, md5, *args): This will ensure the exported data is NOT altered before importing """ - global log_file + log_stmt = "" from checksumdir import dirhash md5hash = dirhash(dump, 'md5') if md5 != md5hash: @@ -109,7 +109,7 @@ def validate_data_dump(dump, md5, *args): proceed_without_validation = raw_input("MD5 not matching. Restoration not recommended.\n \ Enter (y/Y) to continue ?") if proceed_without_validation not in ['y', 'Y']: - log_file.write("\n Checksum validation Failed on dump data") + log_stmt += "\n Checksum validation Failed on dump data" call_exit() else: print "\nValidation Success..!" @@ -120,7 +120,8 @@ def validate_data_dump(dump, md5, *args): proceed_with_validation = raw_input("MD5 Matching.\n \ Enter (y/Y) to proceed to restoration") if proceed_with_validation in ['y', 'Y']: - log_file.write("\n Checksum validation Success on dump data") + log_stmt += "\n Checksum validation Success on dump data" + return log_stmt def get_file_path_with_id(node_id, DATA_DUMP_PATH): file_name = (node_id + '.json') @@ -600,7 +601,7 @@ def restore_counter_objects(rcs_counters_path): if counter_changed: - log_file.write("\n Counter Updated: \n\t OLD: " + str(counter_obj), + "\n\tNew: "+str(counter_json)) + log_file.write("\n Counter Updated: \n\t OLD: " + str(counter_obj) + "\n\tNew: "+str(counter_json)) counter_obj.save() else: try: @@ -613,6 +614,7 @@ def restore_counter_objects(rcs_counters_path): def call_group_import(rcs_repo_path, req_log_file_path, data_restore_path, non_grp_root_node=None): global log_file_path + global log_file global DATA_RESTORE_PATH log_file_path = req_log_file_path DATA_RESTORE_PATH = data_restore_path @@ -799,8 +801,9 @@ def restore_node(filepath, non_grp_root_node=None, data_restore_path=None, req_l log_file.write("\n Old group_set :\n\t "+ str(node_obj.group_set)) - if ObjectId(CONFIG_VARIABLES.GROUP_ID) not in node_obj.group_set: - node_obj.group_set.append(ObjectId(CONFIG_VARIABLES.GROUP_ID)) + # if 'GROUP_ID' in CONFIG_VARIABLES: + # if ObjectId(CONFIG_VARIABLES.GROUP_ID) not in node_obj.group_set: + # node_obj.group_set.append(ObjectId(CONFIG_VARIABLES.GROUP_ID)) # node_obj.group_set = [ObjectId(CONFIG_VARIABLES.GROUP_ID)] log_file.write("\n New group_set :\n\t "+ str(node_obj.group_set)) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py index a75a3acab4..80105b48d6 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_export.py @@ -25,6 +25,7 @@ TOP_PATH = None GROUP_ID = None DATA_EXPORT_PATH = None +RESTORE_USER_DATA = True MEDIA_EXPORT_PATH = None SCHEMA_MAP_PATH = None log_file_path = None @@ -80,8 +81,9 @@ def create_configs_file(): configs_file_path = os.path.join(DUMP_PATH, "migration_configs.py") with open(configs_file_path, 'w+') as configs_file_out: configs_file_out.write("\nGSTUDIO_INSTITUTE_ID='" + str(GSTUDIO_INSTITUTE_ID) + "'") - configs_file_out.write("\nUNIT_NAMES='" + str(UNIT_NAMES) + "'") - configs_file_out.write("\nUNIT_IDS='" + str(UNIT_IDS) + "'") + configs_file_out.write("\nRESTORE_USER_DATA=" + str(RESTORE_USER_DATA)) + configs_file_out.write('\nUNIT_NAMES="' + str(UNIT_NAMES) + '"') + configs_file_out.write('\nUNIT_IDS="' + str(UNIT_IDS) + '"') configs_file_out.write("\nGIT_COMMIT_HASH='" + str(get_latest_git_hash()) + "'") configs_file_out.write("\nGIT_BRANCH_NAME='" + str(get_active_branch_name()) + "'") configs_file_out.write('\nSYSTEM_DETAILS="' + str(os.uname()) + '"') @@ -135,7 +137,7 @@ def handle(self, *args, **options): proceed_flag = raw_input("\nEnter y/Y to Confirm: ") if proceed_flag: try: - + datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") log_file_name = 'artifacts_dump_' + str(GSTUDIO_INSTITUTE_ID) + "_"+ str(datetimestamp) From 4ed6adb95cbc1dbca7a46e02aa897b02816c8e20 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Mon, 7 May 2018 16:02:41 +0530 Subject: [PATCH 11/13] user-data-import script --- .../management/commands/user_data_import.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_import.py diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_import.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_import.py new file mode 100644 index 0000000000..04b26e06b0 --- /dev/null +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_import.py @@ -0,0 +1,102 @@ +# uncompyle6 version 2.14.1 +# Python bytecode 2.7 (62211) +# Decompiled from: Python 2.7.6 (default, Oct 26 2016, 20:30:19) +# [GCC 4.8.4] +# Embedded file name: /home/docker/code/gstudio/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/user_data_import.py +# Compiled at: 2018-01-05 11:49:45 +""" +Import can also be called using command line args as following: + python manage.py group_import + like: + python manage.py group_import y y y +""" +import os, json, imp, subprocess +from bson import json_util +import pathlib2 +try: + from bson import ObjectId +except ImportError: + from pymongo.objectid import ObjectId + +import time, datetime +from django.core.management.base import BaseCommand, CommandError +from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection +from gnowsys_ndf.ndf.models import HistoryManager, RCS +from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID, RCS_REPO_DIR +from users_dump_restore import load_users_dump +from import_logic import * +from gnowsys_ndf.settings import RCS_REPO_DIR_HASH_LEVEL +from schema_mapping import update_factory_schema_mapper +from gnowsys_ndf.ndf.views.utils import replace_in_list, merge_lists_and_maintain_unique_ele +DATA_RESTORE_PATH = None +DATA_DUMP_PATH = None +DEFAULT_USER_ID = 1 +DEFAULT_USER_SET = False +USER_ID_MAP = {} +SCHEMA_ID_MAP = {} +log_file = None +CONFIG_VARIABLES = None +DATE_AT_IDS = [] +GROUP_CONTAINERS = ['Module'] +date_related_at_cur = node_collection.find({'_type': 'AttributeType','name': {'$in': ['start_time', 'end_time', 'start_enroll', 'end_enroll']}}) +for each_date_related_at in date_related_at_cur: + DATE_AT_IDS.append(each_date_related_at._id) + +history_manager = HistoryManager() +rcs = RCS() + +def call_exit(): + print '\n Exiting...' + os._exit(0) + + +def core_import(*args): + global DATA_RESTORE_PATH + global log_file + global log_file_path + global DATA_DUMP_PATH + global CONFIG_VARIABLES + datetimestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + log_file_name = 'artifacts_restore_' + str(GSTUDIO_INSTITUTE_ID) + '_' + str(datetimestamp) + log_file_path = create_log_file(log_file_name) + log_file = open(log_file_path, 'w+') + log_file.write('\n######### Script ran on : ' + str(datetime.datetime.now()) + ' #########\n\n') + log_file.write('\nUpdated CONFIG_VARIABLES: ' + str(CONFIG_VARIABLES)) + print '\n Validating the data-dump' + print '\nDATA_DUMP_PATH: ', DATA_DUMP_PATH + log_file.write(validate_data_dump(DATA_DUMP_PATH, CONFIG_VARIABLES.MD5, *args)) + print '\n Checking the dump Group-id availability.' + print '\n User Restoration.' + user_json_file_path = os.path.join(DATA_DUMP_PATH, 'users_dump.json') + log_stmt = user_objs_restoration(True, user_json_file_path, DATA_DUMP_PATH, *args) + log_file.write(log_stmt) + print '\n Factory Schema Restoration. Please wait..' + call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'), log_file_path, DATA_RESTORE_PATH, None) + copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media')) + return + + +class Command(BaseCommand): + + def handle(self, *args, **options): + global SCHEMA_ID_MAP + global DATA_RESTORE_PATH + global DATA_DUMP_PATH + global CONFIG_VARIABLES + if args and len(args) == 4: + DATA_RESTORE_PATH = args[0] + else: + DATA_RESTORE_PATH = raw_input('\n\tEnter absolute path of data-dump folder to restore:') + print '\nDATA_RESTORE_PATH: ', DATA_RESTORE_PATH + if os.path.exists(DATA_RESTORE_PATH): + if os.path.exists(os.path.join(DATA_RESTORE_PATH, 'dump')): + DATA_DUMP_PATH = os.path.join(DATA_RESTORE_PATH, 'dump') + SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) + CONFIG_VARIABLES = read_config_file(DATA_RESTORE_PATH) + core_import(*args) + print '*' * 70 + print '\n Log will be found at: ', log_file_path + print '*' * 70 + else: + print '\n No dump found at entered path.' + call_exit() \ No newline at end of file From 538aaee0f5ec11f45c7597f2c88552ed86970197 Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Mon, 7 May 2018 17:57:05 +0530 Subject: [PATCH 12/13] node import and export --- .../ndf/management/commands/node_export.py | 24 +- .../ndf/management/commands/node_import.py | 565 +----------------- 2 files changed, 49 insertions(+), 540 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py index ce46604136..66080cc425 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py @@ -146,7 +146,12 @@ def handle(self, *args, **options): global ROOT_DUMP_NODE_NAME global MULTI_DUMP global GLOBAL_DICT - input_name_or_id = raw_input("\n\tPlease enter ObjectID of the Node: ") + input_name_or_id = None + if args: + input_name_or_id = args[0] + else: + input_name_or_id = raw_input("\n\tPlease enter ObjectID of the Node: ") + dump_node_obj = node_collection.one({'_id': ObjectId(input_name_or_id), '_type': 'GSystem'}) if dump_node_obj: @@ -157,9 +162,13 @@ def handle(self, *args, **options): global TOP_PATH global DUMP_NODES_LIST datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") - TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export', slugify(dump_node_obj.name) + "_"+ str(datetimestamp)) + TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export', str(dump_node_obj._id) + "_"+ str(datetimestamp)) SCHEMA_MAP_PATH = TOP_PATH - print "\tRequest received for Export of : ", dump_node_obj.name , ' | ObjectId: ', dump_node_obj._id + print "\tRequest received for Export of : \n\t\tObjectId: ", dump_node_obj._id + try: + print "\t\tName : ", dump_node_obj.name + except Exception as e: + pass global RESTORE_USER_DATA user_data_dump = raw_input("\n\tDo you want to include Users in this export ? Enter y/Y to continue:\t ") if user_data_dump in ['y', 'Y']: @@ -176,19 +185,20 @@ def handle(self, *args, **options): create_users_dump(group_dump_path, dump_node_obj.contributors) configs_file_path = create_configs_file(dump_node_obj._id) - write_md5_of_dump(group_dump_path, configs_file_path) global log_file + update_globals() + dump_node(node=dump_node_obj,collection_name=node_collection, variables_dict=GLOBAL_DICT) + create_factory_schema_mapper(SCHEMA_MAP_PATH) log_file.write("\n*************************************************************") log_file.write("\n######### Script Completed at : " + str(datetime.datetime.now()) + " #########\n\n") print "END : ", str(datetime.datetime.now()) - update_globals() - dump_node(node=dump_node_obj,collection_name=node_collection, variables_dict=GLOBAL_DICT) - create_factory_schema_mapper(SCHEMA_MAP_PATH) + write_md5_of_dump(group_dump_path, configs_file_path) print "*"*70 print "\n This will take few minutes. Please be patient.\n" print "\n Log will be found at: ", log_file_path + print "\n Log will be found at: ", TOP_PATH print "*"*70 log_file.close() call_exit() \ No newline at end of file diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py index 7476f3d491..f198c5d7e4 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py @@ -20,6 +20,7 @@ # from bson.json_util import dumps,loads,object_hook from django.core.management.base import BaseCommand, CommandError +from gnowsys_ndf.ndf.management.commands.import_logic import * from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection from gnowsys_ndf.ndf.models import HistoryManager, RCS from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID, RCS_REPO_DIR @@ -37,12 +38,6 @@ SCHEMA_ID_MAP = {} log_file = None CONFIG_VARIABLES = None -DATE_AT_IDS = [] -GROUP_CONTAINERS = ['Module'] -date_related_at_cur = node_collection.find({'_type': 'AttributeType', - 'name': {'$in': ["start_time", "end_time", "start_enroll", "end_enroll"]}}) -for each_date_related_at in date_related_at_cur: - DATE_AT_IDS.append(each_date_related_at._id) history_manager = HistoryManager() rcs = RCS() @@ -57,548 +52,52 @@ CONFIG_VARIABLES.MD5='aeba0e3629fb0443861c699ae327d962' ''' -def create_log_file(restore_path): - ''' - Creates log file in gstudio-logs/ with - the name of the dump folder - ''' - restore_path = restore_path.split("/")[-1] - log_file_name = 'node_import_' + str(CONFIG_VARIABLES.NODE_ID)+ '.log' - if not os.path.exists(GSTUDIO_LOGS_DIR_PATH): - os.makedirs(GSTUDIO_LOGS_DIR_PATH) - - log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name) - global log_file - log_file = open(log_file_path, 'w+') - log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n") - return log_file_path - - -def call_exit(): - print "\n Exiting..." - os._exit(0) - -def read_config_file(): - """ - Read migration_configs.py file generated during - the export of group and load the variables in - CONFIG_VARIABLES to be accessible in entire program - """ - global CONFIG_VARIABLES - CONFIG_VARIABLES = imp.load_source('config_variables', - os.path.join(DATA_RESTORE_PATH,'migration_configs.py')) - -def validate_data_dump(*args): - """ - For validation of the exported dump and the - importing data-dump, calculate MD5 and - check with CONFIG_VARIABLES.MD5 - This will ensure the exported data is NOT altered - before importing - """ - global log_file - from checksumdir import dirhash - md5hash = dirhash(DATA_DUMP_PATH, 'md5') - if CONFIG_VARIABLES.MD5 != md5hash: - print "\n MD5 NOT matching." - print "\nargs: ", args - if args and len(args) == 4: - proceed_without_validation = args[1] - else: - proceed_without_validation = raw_input("MD5 not matching. Restoration not recommended.\n \ - Enter (y/Y) to continue ?") - if proceed_without_validation not in ['y', 'Y']: - log_file.write("\n Checksum validation Failed on dump data") - call_exit() - else: - print "\nValidation Success..!" - proceed_with_validation = '' - if args and len(args) == 4: - proceed_without_validation = args[1] - else: - proceed_with_validation = raw_input("MD5 Matching.\n \ - Enter (y/Y) to proceed to restoration") - if proceed_with_validation in ['y', 'Y']: - log_file.write("\n Checksum validation Success on dump data") - -def get_file_path_with_id(node_id): - file_name = (node_id + '.json') - - collection_dir = os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo', 'Nodes') - - # Example: - # if -- file_name := "523f59685a409213818e3ec6.json" - # then -- collection_hash_dirs := "6/c/3/8/ - # -- from last (2^0)pos/(2^1)pos/(2^2)pos/(2^3)pos/../(2^n)pos" - # here n := hash_level_num - collection_hash_dirs = "" - for pos in range(0, RCS_REPO_DIR_HASH_LEVEL): - collection_hash_dirs += \ - (node_id[-2**pos] + "/") - file_path = \ - os.path.join(collection_dir, \ - (collection_hash_dirs + file_name)) - # print "\n\nfilepath: ", file_path - return file_path - -def user_objs_restoration(*args): - global USER_ID_MAP - global DEFAULT_USER_ID - global DEFAULT_USER_SET - global log_file - user_json_data = None - if CONFIG_VARIABLES.RESTORE_USER_DATA: - user_dump_restore = raw_input("\n\tUser dump is available. \ - Would you like to restore it (y/n) ?: ") - if user_dump_restore in ['y', 'Y']: - log_file.write("\n Request for users restoration : Yes.") - - user_json_file_path = os.path.join(DATA_DUMP_PATH, 'users_dump.json') - with open(user_json_file_path, 'rb+') as user_json_fin: - user_json_data = json.loads(user_json_fin.read()) - print "\n Restoring Users. Please wait.." - USER_ID_MAP = load_users_dump(DATA_DUMP_PATH, user_json_data) - log_file.write("\n USER_ID_MAP: "+ str(USER_ID_MAP)) - print "\n Completed Restoring Users." - else: - log_file.write("\n Request for users restoration : No.") - DEFAULT_USER_SET = True - default_user_confirmation = raw_input("\n\tRestoration will use default user-id=1. \ - \n\tEnter y to continue, or n if you want to use some other id?: ") - if default_user_confirmation in ['y', 'Y']: - log_file.write("\n Request for Default user with id=1 : Yes.") - DEFAULT_USER_ID = 1 - else: - log_file.write("\n Request for Default user with id=1 : No.") - DEFAULT_USER_ID = int(raw_input("Enter user-id: ")) - log_file.write("\n Request for Setting Default user with id :" + str(DEFAULT_USER_SET)) - else: - - print "*"*80 - user_dump_restore_default = '' - if args and len(args) == 4: - user_dump_restore_default = args[3] - else: - user_dump_restore_default = raw_input("\n\tUser dump is NOT available. \ - Would you like to use USER_ID=1 for restoration(y/n) ?: ") - if user_dump_restore_default in ['y', 'Y']: - DEFAULT_USER_SET = True - DEFAULT_USER_ID = 1 - print "\n No RESTORE_USER_DATA available. Setting Default user with id: 1" - log_file.write("\n No RESTORE_USER_DATA available. Setting Default user with id :" + str(DEFAULT_USER_SET)) - -def update_schema_id_for_triple(document_json): - if SCHEMA_ID_MAP: - global log_file - log_file.write("\nUpdating schema_id for triple.") - if u'relation_type' in document_json and document_json[u'relation_type'] in SCHEMA_ID_MAP: - log_file.write("\nOLD relation_type id " + str(document_json[u'relation_type'])) - document_json[u'relation_type'] = SCHEMA_ID_MAP[document_json[u'relation_type']] - log_file.write("\nNEW relation_type id " + str(document_json[u'relation_type'])) - if u'attribute_type' in document_json and document_json[u'attribute_type'] in SCHEMA_ID_MAP: - log_file.write("\nOLD attribute_type id " + str(document_json[u'attribute_type'])) - document_json[u'attribute_type'] = SCHEMA_ID_MAP[document_json[u'attribute_type']] - log_file.write("\nNEW attribute_type id " + str(document_json[u'attribute_type'])) - return document_json - -def _mapper(json_obj, key, MAP_obj, is_list=False): - log_file.write("\n Calling _mapper:\n\t " + str(json_obj)+ str(key)+ str(MAP_obj)+ str(is_list)) - - if key in json_obj: - if is_list: - for eu in json_obj[key]: - if eu in MAP_obj: - replace_in_list(json_obj[key],eu, MAP_obj[eu]) - else: - json_obj[key] = MAP_obj[json_obj[key]] - -def update_schema_and_user_ids(document_json): - log_file.write("\n Invoked update_schema_and_user_ids:\n\t " + str(document_json)) - global DEFAULT_USER_SET - global DEFAULT_USER_ID - if SCHEMA_ID_MAP: - _mapper(document_json, 'member_of', SCHEMA_ID_MAP, is_list=True) - _mapper(document_json, 'type_of', SCHEMA_ID_MAP, is_list=True) - - if DEFAULT_USER_SET: - document_json['contributors'] = [DEFAULT_USER_ID] - document_json['created_by'] = DEFAULT_USER_ID - document_json['modified_by'] = DEFAULT_USER_ID - if 'group_admin' in document_json: - document_json['group_admin'] = [DEFAULT_USER_ID] - if 'author_set' in document_json: - document_json['author_set'] = [DEFAULT_USER_ID] - - elif CONFIG_VARIABLES.RESTORE_USER_DATA and USER_ID_MAP: - _mapper(document_json, 'contributors', USER_ID_MAP, is_list=True) - _mapper(document_json, 'group_admin', USER_ID_MAP, is_list=True) - _mapper(document_json, 'author_set', USER_ID_MAP, is_list=True) - _mapper(document_json, 'created_by', USER_ID_MAP) - _mapper(document_json, 'modified_by', USER_ID_MAP) - - log_file.write("\n Finished update_schema_and_user_ids:\n\t " + str(document_json)) - return document_json - - ''' - else: - Schema is same. No updation required. - ''' - -def copy_version_file(filepath): - if os.path.exists(filepath): - cwd_path = os.getcwd() - posix_filepath = pathlib2.Path(filepath) - rcs_data_path = str(pathlib2.Path(*posix_filepath.parts[:7])) - rcs_file_path = str(pathlib2.Path(*posix_filepath.parts[7:])) - os.chdir(rcs_data_path) - cp = "cp -v " + rcs_file_path + " " +" --parents " + RCS_REPO_DIR + "/" - subprocess.Popen(cp,stderr=subprocess.STDOUT,shell=True) - os.chdir(cwd_path) - - -def restore_filehive_objects(rcs_filehives_path): - print "\nRestoring Filehives.." - global log_file - log_file.write("\nRestoring Filehives. ") - for dir_, _, files in os.walk(rcs_filehives_path): - for filename in files: - filepath = os.path.join(dir_, filename) - fh_json= get_json_file(filepath) - fh_obj = filehive_collection.one({'_id': ObjectId(fh_json['_id'])}) - - if not fh_obj: - copy_version_file(filepath) - log_file.write("\nRCS file copied : \n\t" + str(filepath) ) - try: - log_file.write("\nInserting new Filehive Object : \n\tNew-obj: " + \ - str(fh_json)) - node_id = filehive_collection.collection.insert(fh_json) - # print "\n fh_json: ", fh_json - fh_obj = filehive_collection.one({'_id': node_id}) - - fh_obj.save() - log_file.write("\nUpdate RCS using save()") - except Exception as fh_insert_err: - log_file.write("\nError while inserting FH obj" + str(fh_insert_err)) - pass - else: - log_file.write("\nFound Existing Filehive Object : \n\tFound-obj: " + \ - str(fh_obj) + "\n\tExiting-obj: "+str(fh_json)) - -def restore_node_objects(rcs_nodes_path): - print "\nRestoring Nodes.." - global log_file - log_file.write("\nRestoring Nodes. ") - for dir_, _, files in os.walk(rcs_nodes_path): - for filename in files: - filepath = os.path.join(dir_, filename) - restore_node(filepath) - -def restore_triple_objects(rcs_triples_path): - print "\nRestoring Triples.." - global log_file - log_file.write("\nRestoring Triples. ") - for dir_, _, files in os.walk(rcs_triples_path): - for filename in files: - filepath = os.path.join(dir_, filename) - triple_json = get_json_file(filepath) - triple_obj = None - if triple_json and ('_id' in triple_json): - triple_obj = triple_collection.one({'_id': ObjectId(triple_json['_id'])}) - - if triple_obj: - log_file.write("\n Found Existing Triple : \n\t " + str(triple_obj)) - - - - - - triple_obj = update_schema_id_for_triple(triple_obj) - log_file.write("\n Updated Triple : \n\t " + str(triple_obj)) - triple_obj.save() - if triple_obj._type == "GRelation": - if triple_obj.right_subject != triple_json['right_subject']: - if type(triple_obj.right_subject) == list: - triple_collection.collection.update( - {'_id': triple_obj._id}, - {'$addToSet': {'right_subject': triple_json['right_subject']}}, - multi=False, upsert=False) - else: - triple_collection.collection.update( - {'_id': triple_obj._id}, - {'$set': {'right_subject': triple_json['right_subject']}}, - multi=False, upsert=False) - log_file.write("\n GRelation Updated : \n\t OLD: " + str(triple_obj), + "\n\tNew: "+str(triple_json)) - elif triple_obj.status == u"DELETED" and triple_json['status'] == u"PUBLISHED": - triple_obj.status = triple_json['status'] - triple_collection.collection.update( - {'subject': triple_obj.subject, 'relation_type': triple_json['relation_type'], '_id': {'$ne': triple_obj._id}}, - {'$set': {'status': u'DELETED'}}, - multi=True, upsert=False) - - - if triple_obj._type == "GAttribute": - if triple_obj.object_value != triple_json['object_value']: - if type(triple_obj.object_value) == list: - triple_collection.collection.update( - {'_id': triple_obj._id}, - {'$addToSet': {'object_value': triple_json['object_value']}}, - multi=False, upsert=False) - else: - triple_collection.collection.update( - {'_id': triple_obj._id}, - {'$set': {'object_value': triple_json['object_value']}}, - multi=False, upsert=False) - log_file.write("\n GAttribute Updated: \n\t OLD: " + str(triple_obj) + "\n\tNew: "+str(triple_json)) - triple_obj.save() - else: - copy_version_file(filepath) - log_file.write("\n RCS file copied : \n\t" + str(filepath)) - - try: - log_file.write("\n Inserting Triple doc : " + str(triple_json)) - triple_json = update_schema_id_for_triple(triple_json) - - node_id = triple_collection.collection.insert(triple_json) - triple_obj = triple_collection.one({'_id': node_id}) - triple_node_RT_AT_id = None - # if 'attribute_type' in triple_json: - # triple_node_RT_AT_id = triple_json['attribute_type'] - # else: - # triple_node_RT_AT_id = triple_json['relation_type'] - # triple_node_RT_AT = node_collection.one({'_id': ObjectId(triple_node_RT_AT_id)}) - # triple_obj.save(triple_node=triple_node_RT_AT, triple_id=triple_node_RT_AT._id) - triple_obj.save() - log_file.write("\nUpdate RCS using save()") - except Exception as tr_insert_err: - log_file.write("\nError while inserting Triple obj" + str(tr_insert_err)) - pass - - -def call_group_import(rcs_repo_path): - - rcs_filehives_path = os.path.join(rcs_repo_path, "Filehives") - rcs_nodes_path = os.path.join(rcs_repo_path, "Nodes") - rcs_triples_path = os.path.join(rcs_repo_path, "Triples") - rcs_counters_path = os.path.join(rcs_repo_path, "Counters") - - # Following sequence is IMPORTANT - # restore_filehive_objects(rcs_filehives_path) - restore_node_objects(rcs_nodes_path) - restore_triple_objects(rcs_triples_path) - - # skip foll. command katkamrachana 21Apr2017 - # Instead run python manage.py fillCounter - # restore_counter_objects(rcs_counters_path) - - -def copy_media_data(media_path): - # MEDIA_ROOT is destination usually: /data/media/ - # media_path is "dump-data/data/media" - if os.path.exists(media_path): - media_copy_cmd = "rsync -avzhP " + media_path + "/* " + MEDIA_ROOT + "/" - subprocess.Popen(media_copy_cmd,stderr=subprocess.STDOUT,shell=True) - log_file.write("\n Media Copied: " + str(media_path) ) - def core_import(*args): + global DATA_RESTORE_PATH global log_file - log_file_path = create_log_file(DATA_RESTORE_PATH) - print "\n Log will be found at: ", log_file_path - log_file.write("\nUpdated CONFIG_VARIABLES: "+ str(CONFIG_VARIABLES)) - print "\n Validating the data-dump" - validate_data_dump(*args) - print "\n User Restoration." - user_objs_restoration(*args) - print "\n Factory Schema Restoration. Please wait.." - # print "\n SCHEMA: ", SCHEMA_ID_MAP - call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo')) + global log_file_path + global DATA_DUMP_PATH + global CONFIG_VARIABLES + datetimestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + log_file_name = 'node_import_' + str(CONFIG_VARIABLES.NODE_ID) + '_' + str(datetimestamp) + log_file_path = create_log_file(log_file_name) + log_file = open(log_file_path, 'w+') + log_file.write('\n######### Script ran on : ' + str(datetime.datetime.now()) + ' #########\n\n') + log_file.write('\nUpdated CONFIG_VARIABLES: ' + str(CONFIG_VARIABLES)) + print '\n Validating the data-dump' + log_file.write(validate_data_dump(DATA_DUMP_PATH, CONFIG_VARIABLES.MD5, *args)) + print '\n Node Restoration.' + user_json_file_path = os.path.join(DATA_DUMP_PATH, 'users_dump.json') + log_stmt = user_objs_restoration(True, user_json_file_path, DATA_DUMP_PATH, *args) + log_file.write(log_stmt) + print '\n Factory Schema Restoration. Please wait..' + call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'), log_file_path, DATA_RESTORE_PATH, None) copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media')) + return + class Command(BaseCommand): - def handle(self, *args, **options): + def handle(self, *args, **options): + global SCHEMA_ID_MAP global DATA_RESTORE_PATH global DATA_DUMP_PATH - global SCHEMA_ID_MAP - if args and len(args) == 4: + global CONFIG_VARIABLES + if args: DATA_RESTORE_PATH = args[0] else: - DATA_RESTORE_PATH = raw_input("\n\tEnter absolute path of data-dump folder to restore:") - print "\nDATA_RESTORE_PATH: ", DATA_RESTORE_PATH + DATA_RESTORE_PATH = raw_input('\n\tEnter absolute path of data-dump folder to restore:') + print '\nDATA_RESTORE_PATH: ', DATA_RESTORE_PATH if os.path.exists(DATA_RESTORE_PATH): - # Check if DATA_DUMP_PATH has dump, if not then its dump of Node holding Groups. if os.path.exists(os.path.join(DATA_RESTORE_PATH, 'dump')): - # Single Group Dump DATA_DUMP_PATH = os.path.join(DATA_RESTORE_PATH, 'dump') SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH) - read_config_file() + CONFIG_VARIABLES = read_config_file(DATA_RESTORE_PATH) core_import(*args) - print "*"*70 - # print "\n Export will be found at: ", DATA_EXPORT_PATH - print "\n This will take few minutes. Please be patient.\n" - print "*"*70 - + print '*' * 70 + print '\n Log will be found at: ', log_file_path + print '*' * 70 else: - print "\n No dump found at entered path." + print '\n No dump found at entered path.' call_exit() -def restore_node(filepath): - global log_file - log_file.write("\nRestoring Node: " + str(filepath)) - - node_json = get_json_file(filepath) - print node_json - try: - node_obj = node_collection.one({'_id': ObjectId(node_json['_id'])}) - if node_obj: - node_obj = update_schema_and_user_ids(node_obj) - if SCHEMA_ID_MAP: - _mapper(node_obj, 'member_of', SCHEMA_ID_MAP, is_list=True) - _mapper(node_obj, 'type_of', SCHEMA_ID_MAP, is_list=True) - - log_file.write("\nFound Existing Node : " + str(node_obj._id)) - node_changed = False - if node_obj.author_set != node_json['author_set'] and node_json['author_set']: - log_file.write("\n Old author_set :\n\t " + str(node_obj.author_set)) - node_obj.author_set = merge_lists_and_maintain_unique_ele(node_obj.author_set, - node_json['author_set']) - log_file.write("\n New author_set :\n\t "+ str(node_obj.author_set)) - node_changed = True - - if node_obj.relation_set != node_json['relation_set'] and node_json['relation_set']: - log_file.write("\n Old relation_set :\n\t "+ str(node_obj.relation_set)) - node_obj.relation_set = merge_lists_and_maintain_unique_ele(node_obj.relation_set, - node_json['relation_set'], advanced_merge=True) - log_file.write("\n New relation_set :\n\t "+ str(node_obj.relation_set)) - node_changed = True - - if node_obj.attribute_set != node_json['attribute_set'] and node_json['attribute_set']: - log_file.write("\n Old attribute_set :\n\t "+ str(node_obj.attribute_set)) - node_obj.attribute_set = merge_lists_and_maintain_unique_ele(node_obj.attribute_set, - node_json['attribute_set'], advanced_merge=True) - log_file.write("\n New attribute_set :\n\t "+ str(node_obj.attribute_set)) - node_changed = True - - if node_obj.post_node != node_json['post_node'] and node_json['post_node']: - log_file.write("\n Old post_node :\n\t "+ str(node_obj.post_node)) - node_obj.post_node = merge_lists_and_maintain_unique_ele(node_obj.post_node, - node_json['post_node']) - log_file.write("\n New post_node :\n\t "+ str(node_obj.post_node)) - node_changed = True - - # if node_obj.group_set != node_json['group_set'] and node_json['group_set']: - # log_file.write("\n Old group_set :\n\t "+ str(node_obj.group_set)) - # node_obj.group_set = merge_lists_and_maintain_unique_ele(node_obj.group_set, - # node_json['group_set']) - # log_file.write("\n New group_set :\n\t "+ str(node_obj.group_set)) - # node_changed = True - - if node_obj.prior_node != node_json['prior_node'] and node_json['prior_node']: - log_file.write("\n Old prior_node :\n\t "+ str(node_obj.prior_node)) - node_obj.prior_node = merge_lists_and_maintain_unique_ele(node_obj.prior_node, - node_json['prior_node']) - log_file.write("\n New prior_node :\n\t "+ str(node_obj.prior_node)) - node_changed = True - - if node_obj.origin != node_json['origin'] and node_json['origin']: - log_file.write("\n Old origin :\n\t "+ str(node_obj.origin)) - node_obj.origin = merge_lists_and_maintain_unique_ele(node_obj.origin, - node_json['origin']) - log_file.write("\n New origin :\n\t "+ str(node_obj.origin)) - node_changed = True - - # if node_obj.collection_set != node_json['collection_set'] and node_json['collection_set']: - # log_file.write("\n Old collection_set :\n\t "+ str(node_obj.collection_set)) - # log_file.write("\n Requested collection_set :\n\t "+ str(node_json['collection_set'])) - - # # node_obj.collection_set = merge_lists_and_maintain_unique_ele(node_obj.collection_set, - # # node_json['collection_set']) - # node_obj.collection_set = node_json['collection_set'] - # log_file.write("\n New collection_set :\n\t "+ str(node_obj.collection_set)) - # node_changed = True - - if node_obj.content != node_json['content'] and node_json['content']: - log_file.write("\n Old content :\n\t "+ str(node_obj.content)) - node_obj.content = node_json['content'] - node_changed = True - log_file.write("\n New content :\n\t "+ str(node_obj.content)) - - log_file.write("\n Old collection_set :\n\t "+ str(node_obj.collection_set)) - log_file.write("\n Requested collection_set :\n\t "+ str(node_json['collection_set'])) - - # node_obj.collection_set = merge_lists_and_maintain_unique_ele(node_obj.collection_set, - # node_json['collection_set']) - node_obj.collection_set = node_json['collection_set'] - log_file.write("\n New collection_set :\n\t "+ str(node_obj.collection_set)) - node_changed = True - - log_file.write("\n Old group_set :\n\t "+ str(node_obj.group_set)) - - log_file.write("\n New group_set :\n\t "+ str(node_obj.group_set)) - node_obj.access_policy = u'PUBLIC' - log_file.write("\n Setting access_policy: u'PUBLIC'") - node_changed = True - - if node_changed: - log_file.write("\n Node Updated: \n\t OLD: " + str(node_obj) + "\n\tNew: "+str(node_json)) - node_obj.save() - else: - copy_version_file(filepath) - log_file.write("\n RCS file copied : \n\t" + str(filepath)) - node_json = update_schema_and_user_ids(node_json) - node_json = update_group_set(node_json) - try: - log_file.write("\n Inserting Node doc : \n\t" + str(node_json)) - node_id = node_collection.collection.insert(node_json) - node_obj = node_collection.one({'_id': node_id}) - node_obj.save(groupid=ObjectId(node_obj.group_set[0])) - log_file.write("\nUpdate RCS using save()") - except Exception as node_insert_err: - log_file.write("\nError while inserting Node obj" + str(node_insert_err)) - pass - except Exception as restore_node_obj_err: - print "\n Error in restore_node_obj_err: ", restore_node_obj_err - log_file.write("\nOuter Error while inserting Node obj" + str(restore_node_obj_err)) - pass - -def parse_json_values(d): - # This decoder will be moved to models next to class NodeJSONEncoder - if u'uploaded_at' in d: - d[u'uploaded_at'] = datetime.datetime.fromtimestamp(d[u'uploaded_at']/1e3) - if u'last_update' in d: - d[u'last_update'] = datetime.datetime.fromtimestamp(d[u'last_update']/1e3) - if u'created_at' in d: - d[u'created_at'] = datetime.datetime.fromtimestamp(d[u'created_at']/1e3) - if u'attribute_type' in d or u'relation_type' in d: - d = update_schema_id_for_triple(d) - if u'attribute_type' in d: - if d[u'attribute_type'] in DATE_AT_IDS: - d[u'object_value'] = datetime.datetime.fromtimestamp(d[u'object_value']/1e3) - if u'attribute_set' in d: - for each_attr_dict in d[u'attribute_set']: - for each_key, each_val in each_attr_dict.iteritems(): - if each_key in [u"start_time", u"end_time", u"start_enroll", u"end_enroll"]: - each_attr_dict[each_key] = datetime.datetime.fromtimestamp(each_val/1e3) - return d - - -def get_json_file(filepath): - - # this will create a .json file of the document(node) - # at manage.py level - # Returns json and rcs filepath - try: - rcs.checkout(filepath) - fp = filepath.split('/')[-1] - # fp = filepath - if fp.endswith(',v'): - fp = fp.split(',')[0] - with open(fp, 'r') as version_file: - obj_as_json = json.loads(version_file.read(), object_hook=json_util.object_hook) - parse_json_values(obj_as_json) - rcs.checkin(fp) - # os.remove(fp) - return obj_as_json - except Exception as get_json_err: - print "Exception while getting JSON: ", get_json_err - pass From 42fba0f1a760f5f690cd2ab0786a14b05d9a160d Mon Sep 17 00:00:00 2001 From: katkamrachana Date: Mon, 7 May 2018 17:57:49 +0530 Subject: [PATCH 13/13] consider name and altnames for node-import --- .../ndf/management/commands/import_logic.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py index daa953626b..b6d4530afb 100644 --- a/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py +++ b/gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py @@ -101,8 +101,7 @@ def validate_data_dump(dump, md5, *args): from checksumdir import dirhash md5hash = dirhash(dump, 'md5') if md5 != md5hash: - print "\n MD5 NOT matching." - print "\nargs: ", args + # print "\nargs: ", args if args and len(args) == 4: proceed_without_validation = args[1] else: @@ -627,7 +626,7 @@ def call_group_import(rcs_repo_path, req_log_file_path, data_restore_path, non_g rcs_counters_path = os.path.join(rcs_repo_path, "Counters") # Following sequence is IMPORTANT - # restore_filehive_objects(rcs_filehives_path) + restore_filehive_objects(rcs_filehives_path) restore_node_objects(rcs_nodes_path, non_grp_root_node) restore_triple_objects(rcs_triples_path) @@ -790,6 +789,18 @@ def restore_node(filepath, non_grp_root_node=None, data_restore_path=None, req_l node_changed = True log_file.write("\n New content :\n\t "+ str(node_obj.content)) + if node_obj.name != node_json['name'] and node_json['name']: + log_file.write("\n Old name :\n\t "+ str(node_obj.name)) + node_obj.name = unicode(node_json['name']) + node_changed = True + log_file.write("\n New name :\n\t "+ str(node_obj.name)) + + if node_obj.altnames != node_json['altnames'] and node_json['altnames']: + log_file.write("\n Old altnames :\n\t "+ str(node_obj.altnames)) + node_obj.altnames = unicode(node_json['altnames']) + node_changed = True + log_file.write("\n New altnames :\n\t "+ str(node_obj.altnames)) + log_file.write("\n Old collection_set :\n\t "+ str(node_obj.collection_set)) log_file.write("\n Requested collection_set :\n\t "+ str(node_json['collection_set']))