tests and documentation changes, prepare to pip

cdppirap · Mar 24, 2021 · b5a01cf · b5a01cf
1 parent 7bf13d8
commit b5a01cf
Show file tree

Hide file tree

Showing 7 changed files with 326 additions and 19 deletions.
diff --git a/amdapy/amdaWSClient/client.py b/amdapy/amdaWSClient/client.py
@@ -244,14 +244,15 @@ def get_dataset(self, token, starttime, stoptime, datasetid,
             'timeFormat': timeformat,
             'gzip': compression
         }
-
         result = self.__get_request('getDataset.php', params=self.__merge_params(params, optional_params))
         if not result:
+            print("Error retrieving data, try a smaller time period")
             return None
 
         result_json = result.json()
         if not result_json or ('success' not in result_json) or (not result_json['success']) \
                 or ('dataFileURLs' not in result_json) or (result_json['dataFileURLs'] == ''):
+            print("Error retrieving data, try a smaller time period")
             return None
 
         return result_json['dataFileURLs']
@@ -381,17 +382,20 @@ def get_parameter(self, token, starttime, stoptime, paramid,
         result = self.__get_request(method='getParameter.php', params=self.__merge_params(params, optional_params))
 
         if not result:
+            print("Error getting parameter data, try a smaller time period")
             return None
 
         result_json = result.json()
 
         if not result_json or ('success' not in result_json) or (not result_json['success']):
+            print("Error getting parameter data, try a smaller time period")
             return None
 
         if ('status' in result_json) and (result_json['status'] == 'in progress'):
             #In batch mode
             while True:
                 #Wait for 30 seconds
+                print("Download in progress, please do not interrupt")
                 time.sleep(30)
                 result = self.get_status(result_json['id'])
                 if not result:
@@ -516,7 +520,6 @@ def get_parameter(param_id, start_date, stop_date, col_names, date_parser=None):
       print("Error getting authentification token")
       return
     pfu=client.get_parameter(t,start,stop,param_id)
-    print("in amdapy.amdaWSClient.client.get_parameter : pfu: {}".format(pfu))
     resp=requests.get(pfu)
     dparser=date_parser
     if dparser is None:
@@ -539,6 +542,9 @@ def get_dataset(dataset_id, start_date, stop_date, date_parser=None):
         print("Error getting authentification token")
         return
     pfu=client.get_dataset(t,start,stop,dataset_id)
+    if pfu is None:
+      return None
+    #print("In amdaWSClient. get_dataset : pfu: {}".format(pfu))
     resp=requests.get(pfu)
     data=pd.read_csv(io.StringIO(resp.text), comment="#", header=None, sep="\s+")
     return data

diff --git a/amdapy/docs/index.rst b/amdapy/docs/index.rst
@@ -65,9 +65,13 @@ AMDA stores its data structures and how we propose to navigate it.
 Installation
 ------------
 
-The current packaged version of :program:`amdapy` can be downloaded :download:`amdapy tar <../../dist/amdapy-0.1.tar.gz>`. 
+The current packaged version of :program:`amdapy` can be downloaded :download:`amdapy tar <../../dist/amdapy-0.1.1.tar.gz>`. 
 
-To install the package with pip use the following::
+Install by executing the following ::
+  
+  pip3 install -i https://test.pypi.org/simple amdapy
+ 
+To install the package with pip from the source archive use the following::
   
   pip3 install amdapy.tar.gz
 
@@ -187,14 +191,7 @@ A simple plot example :
 .. code-block:: python
    
    >>> import matplotlib.pyplot as plt
-   >>> parameter = dataset["density"]
-   >>> fig = plt.figure()
-   >>> plt.title("dataset: {}, parameter: {}".format("tao-ura-sw", parameter.name))
-   >>> plt.xlabel("Time")
-   >>> plt.ylabel("{} ({})".format(parameter.name, parameter.units))
-   >>> plt.grid(True)
-   >>> plt.plot(dataset["density"][:])
-   >>> fig.autofmt_xdate()
+   >>> fig = dataset["density"].plot(dataset_id="tao-ura-sw")
    >>> plt.show()
 
 .. figure:: img/simple_plot.png

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = amdapy
-version = 0.1
+version = 0.1.1
 author = Alexandre Schulz, Benjamin Renard
 author_email = [email protected], [email protected]
 url = https://gitlab.irap.omp.eu/aschulz/amdapy
@@ -23,22 +23,33 @@ classifiers =
     License :: OSI Approved :: GNU General Public License v3 (GPLv3)
     Operating System :: OS Independent
     Programming Language :: Python
+    Programming Language :: Python :: 3.5
     Programming Language :: Python :: 3.6
     Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
 
 [options]
-python_requires = >=3.6
+python_requires = >=3.5
 
 setup_requires =
     setuptools >=38.3
 
+#install_requires =
+#    requests >=2.22
+#    pandas >=1.*
+#    numpy >=1.19.*
+#    netCDF4 >=1.5.*
+#    lxml >=4.6.2
+#    matplotlib >= 3.3.4
+
+
 install_requires =
-    requests >=2.22
-    pandas >=1.*
-    numpy >=1.19.*
-    netCDF4 >=1.5.*
-    lxml >=4.6.2
+    requests
+    pandas 
+    numpy 
+    netCDF4
+    lxml 
+    matplotlib
 
 package_dir =
     . = .

diff --git a/tests/ML_GTL_IsSOR_AL_cat_0.txt b/tests/ML_GTL_IsSOR_AL_cat_0.txt
@@ -0,0 +1,89 @@
+# Name: ML_GTL_IsSOR_AL_cat_0;
+# Historic: ;
+# Creation Date: 2020-05-03T20:20:16;
+# Parameter 1: id:cat_param_id_1; name:column_1; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 2: id:cat_param_id_2; name:column_2; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 3: id:cat_param_id_3; name:column_3; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 4: id:cat_param_id_4; name:column_4; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 5: id:cat_param_id_5; name:column_5; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 6: id:cat_param_id_6; name:column_6; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 7: id:cat_param_id_7; name:column_7; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 8: id:cat_param_id_8; name:column_8; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 9: id:cat_param_id_9; name:column_9; size:1; type:double; unit:; description:; ucd:; utype:;
+# Parameter 10: id:cat_param_id_10; name:column_10; size:1; type:double; unit:; description:; ucd:; utype:;
+1992-10-13T06:27:26 1992-10-13T06:30:06 23 0 0 0 0 0 0 0 0 0
+1992-10-21T06:09:50 1992-10-21T06:14:48 6 0 0 0 0 0 0 0 0 0
+1992-10-21T06:45:14 1992-10-21T06:49:29 22 0 0 0 0 0 0 0 0 0
+1992-10-21T15:45:12 1992-10-21T15:48:02 6 0 0 0 0 0 0 0 0 0
+1992-10-21T16:35:25 1992-10-21T16:39:39 12 0 0 0 0 0 0 0 0 0
+1992-11-05T06:22:42 1992-11-05T06:26:57 23 0 0 0 0 0 0 0 0 0
+1992-11-08T13:49:43 1992-11-08T13:53:57 10 0 0 0 0 0 0 0 0 0
+1992-11-10T11:29:12 1992-11-10T11:32:02 10 0 0 0 0 0 0 0 0 0
+1992-11-10T15:54:59 1992-11-10T16:00:38 10 0 0 0 0 0 0 0 0 0
+1992-11-12T08:18:24 1992-11-12T08:21:56 7 0 0 0 0 0 0 0 0 0
+1992-11-12T09:20:47 1992-11-12T09:25:44 22 0 0 0 0 0 0 0 0 0
+1992-11-13T10:32:30 1992-11-13T10:36:37 6 0 0 0 0 0 0 0 0 0
+1992-11-13T11:14:11 1992-11-13T11:31:12 14 0 0 0 0 0 0 0 0 0
+1992-11-15T05:28:23 1992-11-15T05:32:31 20 0 0 0 0 0 0 0 0 0
+1992-11-15T11:50:57 1992-11-15T11:53:35 10 0 0 0 0 0 0 0 0 0
+1992-11-19T04:13:36 1992-11-19T04:17:19 23 0 0 0 0 0 0 0 0 0
+1992-11-20T19:51:15 1992-11-20T19:56:34 10 0 0 0 0 0 0 0 0 0
+1992-11-22T10:52:15 1992-11-22T10:56:30 10 0 0 0 0 0 0 0 0 0
+1992-11-25T17:11:18 1992-11-25T17:13:26 6 0 0 0 0 0 0 0 0 0
+1992-11-25T17:28:50 1992-11-25T17:32:01 22 0 0 0 0 0 0 0 0 0
+1992-11-26T08:06:14 1992-11-26T08:08:53 7 0 0 0 0 0 0 0 0 0
+1992-11-26T19:56:44 1992-11-26T19:59:56 20 0 0 0 0 0 0 0 0 0
+1992-11-28T07:21:27 1992-11-28T07:28:25 13 0 0 0 0 0 0 0 0 0
+1992-11-28T16:26:29 1992-11-28T16:31:17 10 0 0 0 0 0 0 0 0 0
+1992-11-29T11:18:16 1992-11-29T11:25:14 10 0 0 0 0 0 0 0 0 0
+1992-11-30T07:21:47 1992-11-30T07:24:27 10 0 0 0 0 0 0 0 0 0
+1992-12-03T09:50:34 1992-12-03T09:53:13 13 0 0 0 0 0 0 0 0 0
+1992-12-06T00:29:40 1992-12-06T00:32:52 20 0 0 0 0 0 0 0 0 0
+1992-12-06T07:04:05 1992-12-06T07:08:22 5 0 0 0 0 0 0 0 0 0
+1992-12-06T14:40:59 1992-12-06T14:50:35 10 0 0 0 0 0 0 0 0 0
+1992-12-10T07:53:45 1992-12-10T07:56:56 10 0 0 0 0 0 0 0 0 0
+1992-12-12T11:34:04 1992-12-12T11:36:12 10 0 0 0 0 0 0 0 0 0
+1992-12-12T13:12:45 1992-12-12T13:14:53 15 0 0 0 0 0 0 0 0 0
+1992-12-12T22:33:01 1992-12-12T22:35:08 10 0 0 0 0 0 0 0 0 0
+1992-12-13T07:30:12 1992-12-13T07:32:19 10 0 0 0 0 0 0 0 0 0
+1992-12-15T09:50:39 1992-12-15T09:53:19 23 0 0 0 0 0 0 0 0 0
+1992-12-17T07:14:11 1992-12-17T07:16:19 10 0 0 0 0 0 0 0 0 0
+1992-12-24T02:55:00 1992-12-24T02:57:40 10 0 0 0 0 0 0 0 0 0
+1992-12-25T10:36:03 1992-12-25T10:43:08 23 0 0 0 0 0 0 0 0 0
+1992-12-26T06:41:59 1992-12-26T06:43:35 7 0 0 0 0 0 0 0 0 0
+1992-12-26T07:48:53 1992-12-26T07:52:04 12 0 0 0 0 0 0 0 0 0
+1992-12-27T19:21:48 1992-12-27T19:24:59 6 0 0 0 0 0 0 0 0 0
+1992-12-27T22:22:19 1992-12-27T22:25:30 25 0 0 0 0 0 0 0 0 0
+1993-01-11T07:37:21 1993-01-11T08:00:32 14 0 0 0 0 0 0 0 0 0
+1993-01-12T09:51:53 1993-01-12T09:54:32 10 0 0 0 0 0 0 0 0 0
+1993-01-18T08:17:55 1993-01-18T08:20:34 13 0 0 0 0 0 0 0 0 0
+1993-01-20T23:48:21 1993-01-20T23:52:05 13 0 0 0 0 0 0 0 0 0
+1993-01-22T04:12:16 1993-01-22T04:15:28 10 0 0 0 0 0 0 0 0 0
+1993-01-25T11:05:10 1993-01-25T11:07:49 5 0 0 0 0 0 0 0 0 0
+1993-01-25T13:01:24 1993-01-25T13:11:26 15 0 0 0 0 0 0 0 0 0
+1993-01-26T09:55:22 1993-01-26T10:00:07 10 0 0 0 0 0 0 0 0 0
+1993-01-27T03:17:15 1993-01-27T03:21:14 10 0 0 0 0 0 0 0 0 0
+1993-01-27T10:32:40 1993-01-27T10:41:24 15 0 0 0 0 0 0 0 0 0
+1993-01-28T03:05:45 1993-01-28T03:10:31 20 0 0 0 0 0 0 0 0 0
+1993-01-29T05:54:02 1993-01-29T05:58:48 25 0 0 0 0 0 0 0 0 0
+1993-01-30T22:33:04 1993-01-30T22:40:13 15 0 0 0 0 0 0 0 0 0
+1993-02-01T06:51:59 1993-02-01T06:58:20 15 0 0 0 0 0 0 0 0 0
+1993-02-03T12:36:07 1993-02-03T12:39:17 7 0 0 0 0 0 0 0 0 0
+1993-02-03T15:17:15 1993-02-03T15:22:01 5 0 0 0 0 0 0 0 0 0
+1993-02-03T18:28:10 1993-02-03T18:32:08 20 0 0 0 0 0 0 0 0 0
+1993-02-04T16:08:28 1993-02-04T16:20:22 10 0 0 0 0 0 0 0 0 0
+1993-02-05T16:14:49 1993-02-05T16:18:47 6 0 0 0 0 0 0 0 0 0
+1993-02-05T16:30:41 1993-02-05T16:34:39 22 0 0 0 0 0 0 0 0 0
+1993-02-06T19:34:27 1993-02-06T19:39:13 6 0 0 0 0 0 0 0 0 0
+1993-02-06T20:19:42 1993-02-06T20:22:53 22 0 0 0 0 0 0 0 0 0
+1993-02-07T03:38:18 1993-02-07T03:42:16 20 0 0 0 0 0 0 0 0 0
+1993-02-08T16:10:15 1993-02-08T16:15:47 23 0 0 0 0 0 0 0 0 0
+1993-02-10T11:04:02 1993-02-10T11:08:47 15 0 0 0 0 0 0 0 0 0
+1993-02-11T01:28:30 1993-02-11T01:33:16 25 0 0 0 0 0 0 0 0 0
+1993-02-12T10:56:53 1993-02-12T11:00:51 25 0 0 0 0 0 0 0 0 0
+1993-02-12T17:23:52 1993-02-12T17:27:51 7 0 0 0 0 0 0 0 0 0
+1993-02-12T23:23:52 1993-02-12T23:28:38 25 0 0 0 0 0 0 0 0 0
+1993-02-13T07:26:07 1993-02-13T07:31:41 15 0 0 0 0 0 0 0 0 0
+1993-02-13T20:08:35 1993-02-13T20:11:46 6 0 0 0 0 0 0 0 0 0
+1993-02-13T21:13:41 1993-02-13T21:18:27 22 0 0 0 0 0 0 0 0 0
+1993-02-15T01:28:54 1993-02-15T01:33:40 30 0 0 0 0 0 0 0 0 0
diff --git a/tests/test_ml1.py b/tests/test_ml1.py
@@ -0,0 +1,141 @@
+import os
+import sys
+sys.path.insert(0,"/home/aschulz/amdapy")
+from amdapy.amda import AMDA
+import matplotlib.pyplot as plt
+import datetime
+from datetime import timedelta
+import numpy as np
+import pickle as pkl
+
+def load_labels(filename):
+  a=[]
+  with open(filename,"r") as f:
+    d=f.read()
+    for line in d.split("\n"):
+      ld=line.split(" ")
+      # try to convert first two elements to datetime objects
+      try:
+        ld[0]=datetime.datetime.strptime(ld[0], "%Y-%m-%dT%H:%M:%S")
+        ld[1]=datetime.datetime.strptime(ld[1], "%Y-%m-%dT%H:%M:%S")
+        ld[2]=int(int(ld[2])!=0)
+        a.append(ld[:3])
+      except:
+        pass
+  return np.array(a)
+def get_train_data(start, stop):
+  amda=AMDA()
+  parameter_id="al"
+  parameter_desc=amda.collection.find(parameter_id)
+  dataset_desc=amda.collection.find(parameter_desc.dataset_id)
+  dataset=amda.get(dataset_desc, start=start, stop=stop)
+  return dataset["AL"][:]
+def get_local_minima(x):
+  a=[]
+  n=x.shape[0]
+  for i in range(n):
+    if i==0:
+      if x[i]<x[1]:
+        a.append([i,x[i]])
+    elif i==n-1:
+      if x[i]<x[i-1]:
+        a.append([i,x[i]])
+    else:
+      if x[i]<x[i-1] and x[i]<x[i+1]:
+        a.append([i,x[i]])
+  return np.array(a)
+def plot_hist(x,bins=1000, title=None):
+  plt.figure()
+  if not title is None:
+    plt.title(title)
+  plt.hist(x, bins=bins)
+  plt.show()
+# label file
+label_filename="/home/aschulz/amdapy/tests/ML_GTL_IsSOR_AL_cat_0.txt"
+labels=load_labels(label_filename)
+print("Number of events in label file : {}".format(labels.shape[0]))
+print("First event                    : {}".format(labels[0,:]))
+print("Last event                     : {}".format(labels[-1,:]))
+# take data from 5 hours before and after the event bounds (start of first event and end of last)
+default_timedelta=timedelta(hours=5)
+data_start=labels[0,0]-default_timedelta
+data_stop=labels[-1,1]+default_timedelta
+print("Getting data from {} to {}".format(data_start, data_stop))
+# get training data
+if not os.path.exists("train_data.pkl"):
+  train_data=get_train_data(data_start, data_stop)
+  pkl.dump(train_data, open("train_data.pkl","wb"))
+else:
+  train_data=pkl.load(open("train_data.pkl","rb"))
+train_data_np=train_data.to_numpy()
+diff_train_data=np.diff(train_data_np)
+print("Training data shape : {}".format(train_data.shape))
+# count local minima
+local_min=get_local_minima(train_data)
+print("Local minima in train data : {}".format(local_min.shape[0]))
+diff_local_min=get_local_minima(diff_train_data)
+print("Local minima in diff of train data : {}".format(diff_local_min.shape[0]))
+# plot local minima value distribution
+plot_hist(train_data_np[local_min[:,0].astype(int)], title="local min dist in train data")
+plot_hist(diff_train_data[diff_local_min[:,0].astype(int)], title="diff local min dist in train data")
+# check the local minima position in increasing order
+increasing_value_local_min=local_min[np.argsort(local_min[:,1]),:]
+print(increasing_value_local_min[:10,:])
+# plot training data an place vertical line on the first 76 local minima (sorted by increasing value)
+f, (ax1, ax2) = plt.subplots(2,1,sharex=True)
+ax1.plot(train_data)
+#plt.vlines(increasing_value_local_min[:76,0], -1000, 1000)
+for i in range(76):
+  ax1.axvline(train_data.index[increasing_value_local_min[i,0].astype(int)],c="r")
+for i in range(labels.shape[0]):
+  ax1.axvline(labels[i,0],c="b")
+  ax1.axvline(labels[i,1],c="g")
+from scipy import stats
+w_s=100
+print("doing normal tests")
+a=np.array([stats.normaltest(np.diff(train_data[i-w_s:i+w_s]))[1] for i in range(w_s,train_data.shape[0]-w_s)])
+print("done")
+pos_pos=train_data<0.
+tmp=train_data[pos_pos]
+aa=np.abs(np.diff(tmp))/tmp[:-1]
+ax2.plot(train_data.index[w_s:-w_s],a)
+f.autofmt_xdate()
+
+plt.show()
+exit()
+# plot the training data and the corresponding events
+fig=plt.figure()
+plt.title("Training data : AL index")
+plt.xlabel("Time")
+plt.ylabel("AL (nT)")
+plt.plot(train_data)
+fig.autofmt_xdate()
+plt.show()
+exit()
+amda=AMDA()
+
+parameter_id="al"
+parameter_desc=amda.collection.find(parameter_id)
+dataset_desc=amda.collection.find(parameter_desc.dataset_id)
+start=dataset_desc.starttime
+start=datetime.datetime(year=2008, month=1, day=4, hour=6 , minute=0)
+dataset=amda.get(dataset_desc, start=start, stop=start+timedelta(hours=4))
+
+print(dataset)
+
+import numpy as np
+
+# label file /mlPlasmas-model-substorms/data/...
+al=dataset["AL"][:].to_numpy()
+au=dataset["AU"][:].to_numpy()
+ao=dataset["AO"][:].to_numpy()
+ae=dataset["AE"][:].to_numpy()
+
+print("al shape {}".format(al.shape))
+print("au shape {}".format(au.shape))
+print("al nans : {}".format(np.sum(np.isnan(al))/al.shape[0]))
+print("au nans : {}".format(np.sum(np.isnan(au))/au.shape[0]))
+print("ao nans : {}".format(np.sum(np.isnan(ao))/ao.shape[0]))
+print("ae nans : {}".format(np.sum(np.isnan(ae))/ae.shape[0]))
+dataset["AL"].plot()
+plt.show()
diff --git a/tests/test_plot.py b/tests/test_plot.py
@@ -0,0 +1,13 @@
+import matplotlib.pyplot as plt
+import sys
+sys.path.insert(0,"/home/aschulz/amdapy")
+
+from amdapy.amda import AMDA
+
+amda=AMDA()
+
+dataset_id="tao-ura-sw"
+dataset_desc=amda.collection.find(dataset_id)
+dataset=amda.get(dataset_desc)
+dataset["velocity"].plot(dataset_id="tao-ura-sw")
+plt.show()