Skip to content

Commit

Permalink
Merge pull request #98 from grand-mother/dev_database
Browse files Browse the repository at this point in the history
Dev database
  • Loading branch information
ifleg authored Jun 26, 2024
2 parents 7c77b69 + 179207f commit a14d553
Show file tree
Hide file tree
Showing 18 changed files with 886 additions and 212 deletions.
Empty file modified data/download_data_grand.py
100755 → 100644
Empty file.
2 changes: 1 addition & 1 deletion grand/dataio/version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.1
1.0.2
17 changes: 8 additions & 9 deletions granddb/config.ini → granddb/config.ini.example
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ socket_timeout = 5
; At least one localdir (incoming) is needed.
; incoming directory must be an absolute path
[directories]
localdir = ["/home/fleg/DEV/myincoming","/home/fleg/DEV/GRAND/incoming","/home/fleg/"]
localdir = ["/home/fleg/incoming/"]

; remote repositories to search for data if not present in local directories
; repositories are given as list :
Expand All @@ -22,8 +22,8 @@ localdir = ["/home/fleg/DEV/myincoming","/home/fleg/DEV/GRAND/incoming","/home/f
; repository CCIN2P3 is already defined in the database (so it's not necessary to define it here), but credentials for it have
; to be supplied in the [credentials] section below
[repositories]
CC = ["ssh","cca.in2p3.fr",22,["/sps/grand/pengxiong/GP81_interpolation/GP81_1000m/SignalTimeSeries/","/sps/grand/pengxiong/Proton_ROOT/","/sps/trend/fleg/INCOMING"]]
WEB = [ "https", "github.com" , 443, ["/grand-mother/data_challenge1/raw/main/coarse_subei_traces_root/"]]
CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]]


; Credentials for repositories given as :
; Name = [user, keyfile]
Expand All @@ -34,20 +34,19 @@ WEB = [ "https", "github.com" , 443, ["/grand-mother/data_challenge1/raw/main/co
; For ssh protocol, it's highly encouraged to use an ssh-agent (to avoid to have to provide passwd interactively at each run)
; To run an ssh-agent just do : `eval $(ssh-agent)` and `ssh-add .ssh/id_rsa`
[credentials]
CC = ["legrand",""]
CCIN2P3 = ["legrand",""]
SSHTUNNEL = ["fleg",""]
CCIN2P3 = ["john",""]
SSHTUNNEL = ["joe",""]

; database to use (only one database can be defined)
; Name = [server, port, database, login, passwd, sshtunnel_server, sshtunnel_port, sshtunnel_credentials ]
[database]
database = ["localhost", "" ,"granddb", "grandadmin", "popo","", "", ""]
#database = ["ccpgsqlexpe.in2p3.fr",6550,"granddb","grandadmin","rSM8X7vmB7Up2ngR","lpnclaude.in2p3.fr", 22, "SSHTUNNEL"]
database = ["localhost", "" ,"granddb", "grandadmin", "password","", "", ""]


; The following section is optional.
; it defines the repository where registered files need to go.
; repository_name = "path"
; if not provided, the files will go to the incoming section provided in section directories
; Useful only if you want to work on "localdir" but register files in a remote directory
#[registerer]
#CC = "/sps/grand/fleg/INCOMING"
#CCIN2P3 = "/sps/grand/data/auger/GRANDfiles"
29 changes: 14 additions & 15 deletions granddb/docker/pgsync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ groups:
repository_access: "where id_repository < (select max_value::BIGINT / 2 from pg_sequences where sequencename='repository_id_repository_seq') and id_protocol < (select max_value::BIGINT / 2 from pg_sequences where sequencename='protocol_id_protocol_seq')"
provider: "where id_provider < (select max_value::BIGINT / 2 from pg_sequences where sequencename='provider_id_provider_seq')"
file: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq')"
file_content: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq')"
dataset: "where id_dataset < (select max_value::BIGINT / 2 from pg_sequences where sequencename='dataset_id_dataset_seq')"
dataset_location: "where id_dataset < (select max_value::BIGINT / 2 from pg_sequences where sequencename='dataset_id_dataset_seq')"
file_location: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq') and id_repository < (select max_value::BIGINT / 2 from pg_sequences where sequencename='repository_id_repository_seq')"
tree_type: "where id_tree_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tree_type_id_tree_type_seq')"
file_trees: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq') and id_tree_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tree_type_id_tree_type_seq')"
modification_software: "where id_modification_software < (select max_value::BIGINT / 2 from pg_sequences where sequencename='modification_software_id_modification_software_seq')"
data_source: "where id_data_source < (select max_value::BIGINT / 2 from pg_sequences where sequencename='data_source_id_data_source_seq')"
data_generator: "where id_data_generator < (select max_value::BIGINT / 2 from pg_sequences where sequencename='data_generator_id_data_generator_seq')"
Expand All @@ -29,17 +31,14 @@ groups:
particule_type: "where id_particule_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='particule_type_id_particule_type_seq')"
event_type: "where id_event_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='event_type_id_event_type_seq')"
atmos_model: "where id_atmos_model < (select max_value::BIGINT / 2 from pg_sequences where sequencename='atmos_model_id_atmos_model_seq')"
trun: "where id_trun < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trun_seq')"
trunnoise: "where id_trunnoise < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trunnoise_seq')"
trunshowersim: "where id_trunshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trunshowersim_seq')"
trunefieldsim: "where id_trunefieldsim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trunefieldsim_seq')"
tshower: "where id_tshower < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tshower_seq')"
tshowersim: "where id_tshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tshowersim_seq')"
tvoltage: "where id_tvoltage < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tvoltage_seq')"
trawvoltage: "where id_trawvoltage < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trawvoltage_seq')"
tadc: "where id_tadc < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tadc_seq')"
tefield: "where id_tefield < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tefield_seq')"
#run: "where id_run < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_run_seq')"
#event: "where id_event < (select max_value::BIGINT / 2 from pg_sequences where sequencename='event_id_event_seq')"
#file_contains: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq') and id_run < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_run_seq') and id_event < (select max_value::BIGINT / 2 from pg_sequences where sequencename='event_id_event_seq')"

trun: "where id_trun < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trun_id_trun_seq')"
trunnoise: "where id_trunnoise < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunnoise_id_trunnoise_seq')"
trunshowersim: "where id_trunshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunshowersim_id_trunshowersim_seq')"
trunefieldsim: "where id_trunefieldsim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunfieldsim_id_trunefieldsim_seq')"
trunvoltage: "where id_trunvoltage < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunvoltage_id_trunvoltage_seq')"
tshower: "where id_tshower < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tshower_id_tshower_seq')"
tshowersim: "where id_tshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tshowersim_id_tshowersim_seq')"
events: "where id_event < (select max_value::BIGINT / 2 from pg_sequences where sequencename='events_id_event_seq')"
transfer: "where id_raw_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='rawfile_id_raw_file_seq')"
rawfile: "where id_raw_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='rawfile_id_raw_file_seq')"
convertion: "where id_raw_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='rawfile_id_raw_file_seq')"
39 changes: 29 additions & 10 deletions granddb/granddatalib.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class DataManager:
_database = None
_provider = None

def __init__(self, file="config.ini"):
def __init__(self, file=os.path.join(os.path.dirname(__file__), 'config.ini')):
configur = ConfigParser()
# by default configparser convert all keys to lowercase... but we don't want !
configur.optionxform = lambda option: option
Expand Down Expand Up @@ -97,7 +97,9 @@ def __init__(self, file="config.ini"):
self._directories.append(Datasource("localdir", "local", "localhost", "", dirlist, self.incoming()))
# We also append localdirs to repositories... so search method will first look at local dirs before searching on remote locations
# self._repositories.append(Datasource("localdir", "local", "localhost", "", dirlist, self.incoming()))
self._repositories["localdir"] = Datasource("localdir", "local", "localhost", "", dirlist, self.incoming())
# But instead of localhost and localdir we use the name of the machine
hostname = socket.getfqdn(os.environ["HOSTNAME"])
self._repositories["localdir"] = Datasource("localdir", "local", hostname, "", dirlist, self.incoming())
else:
logger.error(f"Section directories is mandatory in config file {file}")
exit(1)
Expand Down Expand Up @@ -184,17 +186,24 @@ def SearchFileInDB(self, filename):
# If not, search first in localdirs and then in remote repositories. First match is returned.
def get(self, file, repository=None, path=None):
res = None
# Check if file is a simple name or full path name
if (os.path.dirname(file) != ""):
if (not (path is None) and (path != os.path.dirname(file))):
logger.warning(f"path given in filename ({os.path.dirname(file)}) and in repository path ({path}) are different ! The path {os.path.dirname(file)} from file will be used !")
path = os.path.dirname(file)
file = os.path.basename(file)

# if repository is given we get file directly from this repo
if not (repository is None):
rep = self.getrepo(repository)
if not (rep is None):
logger.debug(f"search in repository {rep.name()}")
logger.debug(f"search in repository {rep.name()} {path}")
res = rep.get(file, path)
# if no repo specified, we search everywhere
else:
for name, rep in self.repositories().items():
logger.debug(f"search in repository {rep.name()}")
res = rep.get(file)
logger.debug(f"search in repository {rep.name()} {path}")
res = rep.get(file, path)
if not (res is None):
break

Expand All @@ -221,11 +230,21 @@ def getrepo(self, repo):


##Function to register a file into the database. Returns the path to the file in the repository where the file was registered.
def register_file(self,filename):
def register_file(self,filename, repository=None, path=None):
newfilename = None
file = self.get(filename)
file = self.get(filename,repository,path)
if file is not None:
newfilename = self.referer().copy(file)
# If filename in referer repository then keep it
#print(os.path.basename(filename)+" "+self.referer().name()+" "+os.path.dirname(filename))
newfilename = self.get(os.path.basename(filename),self.referer().name())

if newfilename is None:
newfilename = self.referer().copy(file)
else:
newfilename = str(newfilename)

#print("newfilename = "+str(newfilename))

self.database().register_file(file, newfilename, self.referer().id_repository, self.provider())
return newfilename

Expand Down Expand Up @@ -384,7 +403,7 @@ def get(self, file, path=None):
else:
# No path given : we recursively search in all dirs and subdirs
for path in self.paths():
logger.debug(f"search in localdir {path}{file}")
logger.debug(f"search in localdir {path} for file {file}")

#my_file = Path(path + file)
my_file = None
Expand All @@ -404,7 +423,7 @@ def get(self, file, path=None):
if not found_file is None:
logger.debug(f"file found in localdir {found_file}")

return found_file
return str(found_file)

def copy(self, pathfile):
newname = self.incoming() + uniquename(pathfile)
Expand Down
Loading

0 comments on commit a14d553

Please sign in to comment.