diff --git a/.travis.yml b/.travis.yml index e4728659..7378c56b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,7 +33,7 @@ install: - sudo apt-get install libblas-dev liblapack-dev libatlas-base-dev gfortran # python libs - sudo pip3 install --upgrade pip - - sudo pip3 install numpy scipy h5py "tensorflow==2.0.0" + - sudo pip3 install numpy scipy h5py "tensorflow==2.1.0" - echo "Version numbers of TensorFlow and Keras:" - python3 -c "import tensorflow as tf; import tensorflow.keras; print(tf.__version__); print(tensorflow.keras.__version__)" # FunctionalPlus diff --git a/CMakeLists.txt b/CMakeLists.txt index 81a83e28..d4f0560e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ list(APPEND CMAKE_MODULE_PATH "${FDEEP_TOP_DIR}/cmake") include(cmake/hunter.cmake) # default off -project(frugally-deep VERSION 0.12.1) +project(frugally-deep VERSION 0.13.0) message(STATUS "===( ${PROJECT_NAME} ${PROJECT_VERSION} )===") diff --git a/INSTALL.md b/INSTALL.md index 860af1bc..ed4c9262 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -63,7 +63,7 @@ Just add a *conanfile.txt* with frugally-deep as a requirement and chose the gen ``` [requires] -frugally-deep/v0.12.1-p0@dobiasd/stable +frugally-deep/v0.13.0-p0@dobiasd/stable [generators] cmake diff --git a/README.md b/README.md index 09aa8f16..7eb2d613 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ Requirements and Installation - A **C++14**-compatible compiler: Compilers from these versions on are fine: GCC 4.9, Clang 3.7 (libc++ 3.7) and Visual C++ 2015. - Python 3.5 or higher. -- TensorFlow 2.0.0 +- TensorFlow 2.1.0 Guides for different ways to install frugally-deep can be found in [`INSTALL.md`](INSTALL.md). diff --git a/include/fdeep/layers/bidirectional_layer.hpp b/include/fdeep/layers/bidirectional_layer.hpp index 31d46a57..4290fdff 100644 --- a/include/fdeep/layers/bidirectional_layer.hpp +++ b/include/fdeep/layers/bidirectional_layer.hpp @@ -56,17 +56,23 @@ class bidirectional_layer : public layer forward_state_h_(stateful ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()), forward_state_c_(stateful && wrapped_layer_type_has_state_c(wrapped_layer_type) ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()), backward_state_h_(stateful ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()), - backward_state_c_(stateful && wrapped_layer_type_has_state_c(wrapped_layer_type) ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()) + backward_state_c_(stateful && wrapped_layer_type_has_state_c(wrapped_layer_type) ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()), + use_avail_input_state_for_stateful_(true) + { } void reset_states() override { + // TF 2.1 Bug: reset_states() does nothing in TF 2.1. + // the implementation below is how TF 2.1 should behave. + // to match TF 2.1, just comment out the code below. if (is_stateful()) { forward_state_h_ = tensor(tensor_shape(n_units_), static_cast(0)); forward_state_c_ = tensor(tensor_shape(n_units_), static_cast(0)); backward_state_h_ = tensor(tensor_shape(n_units_), static_cast(0)); backward_state_c_ = tensor(tensor_shape(n_units_), static_cast(0)); + use_avail_input_state_for_stateful_ = true; } } @@ -110,29 +116,26 @@ class bidirectional_layer : public layer assertion(inputs.size() == 1 || inputs.size() == 5, "Invalid number of input tensors."); - tensor forward_state_h = inputs.size() == 5 - ? inputs[1] - : is_stateful() - ? forward_state_h_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); - - tensor forward_state_c = inputs.size() == 5 - ? inputs[2] - : is_stateful() - ? forward_state_c_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); - - tensor backward_state_h = inputs.size() == 5 - ? inputs[3] - : is_stateful() - ? backward_state_h_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); - - tensor backward_state_c = inputs.size() == 5 - ? inputs[4] - : is_stateful() - ? backward_state_c_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); + bool initial_state_provided = inputs.size() == 5; + bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_; + bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state; + // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state; + + tensor forward_state_h = use_input_initial_state ? inputs[1] : + use_last_state_for_initial_state ? forward_state_h_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state + + tensor forward_state_c = use_input_initial_state ? inputs[2] : + use_last_state_for_initial_state ? forward_state_c_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state + + tensor backward_state_h = use_input_initial_state ? inputs[3] : + use_last_state_for_initial_state ? backward_state_h_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state + + tensor backward_state_c = use_input_initial_state ? inputs[4] : + use_last_state_for_initial_state ? backward_state_c_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state result_forward = lstm_impl(input, forward_state_h, forward_state_c, n_units_, use_bias_, return_sequences_, stateful_, @@ -147,6 +150,7 @@ class bidirectional_layer : public layer forward_state_c_ = forward_state_c; backward_state_h_ = backward_state_h; backward_state_c_ = backward_state_c; + use_avail_input_state_for_stateful_ = false; } } else if (wrapped_layer_type_ == "GRU" || wrapped_layer_type_ == "CuDNNGRU") @@ -154,17 +158,18 @@ class bidirectional_layer : public layer assertion(inputs.size() == 1 || inputs.size() == 3, "Invalid number of input tensors."); - tensor forward_state_h = inputs.size() == 3 - ? inputs[1] - : is_stateful() - ? forward_state_h_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); + bool initial_state_provided = inputs.size() == 3; + bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_; + bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state; + // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state; + + tensor forward_state_h = use_input_initial_state ? inputs[1] : + use_last_state_for_initial_state ? forward_state_h_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state - tensor backward_state_h = inputs.size() == 3 - ? inputs[2] - : is_stateful() - ? backward_state_h_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); + tensor backward_state_h = use_input_initial_state ? inputs[2] : + use_last_state_for_initial_state ? backward_state_h_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state result_forward = gru_impl(input, forward_state_h, n_units_, use_bias_, reset_after_, return_sequences_, false, forward_weights_, forward_recurrent_weights_, @@ -175,6 +180,7 @@ class bidirectional_layer : public layer if (is_stateful()) { forward_state_h_ = forward_state_h; backward_state_h_ = backward_state_h; + use_avail_input_state_for_stateful_ = false; } } else @@ -223,6 +229,7 @@ class bidirectional_layer : public layer mutable fplus::maybe forward_state_c_; mutable fplus::maybe backward_state_h_; mutable fplus::maybe backward_state_c_; + mutable bool use_avail_input_state_for_stateful_; }; } // namespace internal diff --git a/include/fdeep/layers/gru_layer.hpp b/include/fdeep/layers/gru_layer.hpp index e561f772..3e5125db 100644 --- a/include/fdeep/layers/gru_layer.hpp +++ b/include/fdeep/layers/gru_layer.hpp @@ -44,7 +44,8 @@ class gru_layer : public layer weights_(weights), recurrent_weights_(recurrent_weights), bias_(bias), - state_h_(stateful ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()) + state_h_(stateful ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()), + use_avail_input_state_for_stateful_(true) { } @@ -53,6 +54,7 @@ class gru_layer : public layer { if (is_stateful()) { state_h_ = tensor(tensor_shape(n_units_), static_cast(0)); + use_avail_input_state_for_stateful_ = true; } } @@ -77,17 +79,26 @@ class gru_layer : public layer assertion(inputs.size() == 1 || inputs.size() == 2, "Invalid number of input tensors."); - tensor state_h = inputs.size() == 2 - ? inputs[1] - : is_stateful() - ? state_h_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); - + // RNN behaivor since TF 2.1: + // If an *initial state input is provided*, this is used always for non-stateful models + // but only on reset for stateful models (including the very first call) + // If *no input state is provided*, then initial state is 0 for non-stateful + // and, for stateful, it carries the state from previous call, unless state-reset, in which case it set to 0 + bool initial_state_provided = inputs.size() == 2; + bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_; + bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state; + // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state; + + tensor state_h = use_input_initial_state ? inputs[1] : + use_last_state_for_initial_state ? state_h_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state + const auto result = gru_impl(input, state_h, n_units_, use_bias_, reset_after_, return_sequences_, return_state_, weights_, recurrent_weights_, bias_, activation_, recurrent_activation_); if (is_stateful()) { state_h_ = state_h; + use_avail_input_state_for_stateful_ = false; } return result; } @@ -104,6 +115,7 @@ class gru_layer : public layer const float_vec recurrent_weights_; const float_vec bias_; mutable fplus::maybe state_h_; + mutable bool use_avail_input_state_for_stateful_; }; } // namespace internal diff --git a/include/fdeep/layers/lstm_layer.hpp b/include/fdeep/layers/lstm_layer.hpp index 9dade051..acd464d9 100644 --- a/include/fdeep/layers/lstm_layer.hpp +++ b/include/fdeep/layers/lstm_layer.hpp @@ -43,7 +43,9 @@ class lstm_layer : public layer recurrent_weights_(recurrent_weights), bias_(bias), state_h_(stateful ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()), - state_c_(stateful ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()) + state_c_(stateful ? tensor(tensor_shape(n_units), static_cast(0)) : fplus::nothing()), + use_avail_input_state_for_stateful_(true) + { } @@ -52,6 +54,7 @@ class lstm_layer : public layer if (is_stateful()) { state_h_ = tensor(tensor_shape(n_units_), static_cast(0)); state_c_ = tensor(tensor_shape(n_units_), static_cast(0)); + use_avail_input_state_for_stateful_ = true; } } @@ -74,17 +77,23 @@ class lstm_layer : public layer assertion(inputs.size() == 1 || inputs.size() == 3, "Invalid number of input tensors."); - tensor state_h = inputs.size() == 3 - ? inputs[1] - : is_stateful() - ? state_h_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); + // RNN behaivor since TF 2.1: + // If an *initial state input is provided*, this is used always for non-stateful models + // but only on reset for stateful models (including the very first call) + // If *no input state is provided*, then initial state is 0 for non-stateful + // and, for stateful, it carries the state from previous call, unless state-reset, in which case it set to 0 + bool initial_state_provided = inputs.size() == 3; + bool use_last_state_for_initial_state = is_stateful() && !use_avail_input_state_for_stateful_; + bool use_input_initial_state = initial_state_provided && !use_last_state_for_initial_state; + // bool use_zero_initial_state = !use_input_initial_state && !use_last_state_for_initial_state; + + tensor state_h = use_input_initial_state ? inputs[1] : + use_last_state_for_initial_state ? state_h_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state - tensor state_c = inputs.size() == 3 - ? inputs[2] - : is_stateful() - ? state_c_.unsafe_get_just() - : tensor(tensor_shape(n_units_), static_cast(0)); + tensor state_c = use_input_initial_state ? inputs[2] : + use_last_state_for_initial_state ? state_c_.unsafe_get_just() : + tensor(tensor_shape(n_units_), static_cast(0)); // use_zero_initial_state const auto result = lstm_impl(input, state_h, state_c, n_units_, use_bias_, return_sequences_, return_state_, weights_, @@ -92,6 +101,7 @@ class lstm_layer : public layer if (is_stateful()) { state_h_ = state_h; state_c_ = state_c; + use_avail_input_state_for_stateful_ = false; } return result; } @@ -108,6 +118,7 @@ class lstm_layer : public layer const float_vec bias_; mutable fplus::maybe state_h_; mutable fplus::maybe state_c_; + mutable bool use_avail_input_state_for_stateful_; }; } // namespace internal diff --git a/test/stateful_test/stateful_recurrent_tests.cpp b/test/stateful_test/stateful_recurrent_tests.cpp index 57ad200e..cc3a0f6b 100644 --- a/test/stateful_test/stateful_recurrent_tests.cpp +++ b/test/stateful_test/stateful_recurrent_tests.cpp @@ -1,5 +1,5 @@ #include "fdeep/fdeep.hpp" -#include // looks like we need this too (edit by π) +#include using namespace fdeep; @@ -16,13 +16,23 @@ int main() const std::vector x_inf_0 = {2.1, -1.2, 3.14, 1.2}; const std::vector x_inf_1 = {1, 3, -2, 10}; - const std::vector state_0 = {1.1, -2.1}; - const std::vector state_1 = {2.7, 3.1}; - const std::vector state_2 = {-2.5, 3.0}; - const std::vector state_3 = {-2.0, -10.0}; + const std::vector state_0 = {40.1, -25.1}; + const std::vector state_1 = {34.7, 56.1}; + const std::vector state_2 = {-62.5, 12.0}; + const std::vector state_3 = {-33.0, -100.0}; + + + + // const std::vector state_0 = {1.1, -2.1}; + // const std::vector state_1 = {2.7, 3.1}; + // const std::vector state_2 = {-2.5, 3.0}; + // const std::vector state_3 = {-2.0, -10.0}; std::vector all_results = {}; std::vector one_result = {}; +// [40.1, -25.1, 34.7, 56.1, -62.5, 12.0, -33.0, -100.0] +// [1.1, -2.1, 2.7, 3.1, -2.5, 3.0, -2.0, -10.0] + const shared_float_vec xt0(fplus::make_shared_ref(x_inf_0)); const shared_float_vec xt1(fplus::make_shared_ref(x_inf_1)); const shared_float_vec st0(fplus::make_shared_ref(state_0)); @@ -158,16 +168,16 @@ int main() vec_append(all_results, *result[0].as_vector()); // ************************* BIDIRECTIONAL TESTS ************************* // - #define TF_BIDI_BUG_FIXED false + #define TF_BIDI_STATE_RESET_WORKS false // *********** TEST 9: "bidi-GRU_nonstateful_no_init_state.json" *********** model = load_model("./models/bidi-GRU_nonstateful_no_init_state.json"); /// state_reset = true result = model.predict({test_in_0}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict({test_in_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict({test_in_0}); vec_append(all_results, *result[0].as_vector()); @@ -179,10 +189,10 @@ int main() /// state_reset = true result = model.predict({test_in_0, test_state_0, test_state_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict({test_in_1, test_state_0, test_state_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict({test_in_0, test_state_0, test_state_1}); vec_append(all_results, *result[0].as_vector()); @@ -194,10 +204,10 @@ int main() /// state_reset = true result = model.predict_stateful({test_in_0}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict_stateful({test_in_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict_stateful({test_in_0}); vec_append(all_results, *result[0].as_vector()); @@ -209,10 +219,10 @@ int main() /// state_reset = true result = model.predict_stateful({test_in_0, test_state_0, test_state_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict_stateful({test_in_1, test_state_0, test_state_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict_stateful({test_in_0, test_state_0, test_state_1}); vec_append(all_results, *result[0].as_vector()); @@ -224,10 +234,10 @@ int main() /// state_reset = true result = model.predict({test_in_0}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict({test_in_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict({test_in_0}); vec_append(all_results, *result[0].as_vector()); @@ -239,10 +249,10 @@ int main() /// state_reset = true result = model.predict({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict({test_in_1, test_state_0, test_state_1, test_state_2, test_state_3}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); vec_append(all_results, *result[0].as_vector()); @@ -254,10 +264,10 @@ int main() /// state_reset = true result = model.predict_stateful({test_in_0}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict_stateful({test_in_1}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict_stateful({test_in_0}); vec_append(all_results, *result[0].as_vector()); @@ -265,21 +275,21 @@ int main() vec_append(all_results, *result[0].as_vector()); // *********** TEST 16: "bidi-LSTM_stateful_init_state.json" *********** - model = load_model("./models/bidi-LSTM_nonstateful_init_state.json"); + model = load_model("./models/bidi-LSTM_stateful_init_state.json"); /// state_reset = true result = model.predict_stateful({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); result = model.predict_stateful({test_in_1, test_state_0, test_state_1, test_state_2, test_state_3}); vec_append(all_results, *result[0].as_vector()); - if(TF_BIDI_BUG_FIXED) model.reset_states(); + if(TF_BIDI_STATE_RESET_WORKS) model.reset_states(); /// state_reset = false result = model.predict_stateful({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); vec_append(all_results, *result[0].as_vector()); result = model.predict_stateful({test_in_1, test_state_0, test_state_1, test_state_2, test_state_3}); vec_append(all_results, *result[0].as_vector()); - #undef TF_BIDI_BUG_FIXED + #undef TF_BIDI_STATE_RESET_WORKS if(verbose){ std::cout << "\n\nOUTPUT ***" << std::endl; diff --git a/test/stateful_test/stateful_recurrent_tests.py b/test/stateful_test/stateful_recurrent_tests.py index 5d5defb0..d5940016 100644 --- a/test/stateful_test/stateful_recurrent_tests.py +++ b/test/stateful_test/stateful_recurrent_tests.py @@ -1,3 +1,8 @@ +# to hide any GPUs. +# import os +# os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' +# os.environ['CUDA_VISIBLE_DEVICES']='' + import errno import os import sys @@ -194,7 +199,8 @@ def main(): x_inf = np.asarray([2.1, -1.2, 3.14, 1.2, 1, 3, -2, 10], dtype=np.float32) # simple x_inf = x_inf.reshape((2, train_seq_length, 1)) - initial_states = np.asarray([1.1, -2.1, 2.7, 3.1, -2.5, 3.0, -2.0, -10.0], dtype=np.float32) + initial_states = np.asarray([40.1, -25.1, 34.7, 56.1, -62.5, 12.0, -33.0, -100.0], dtype=np.float32) + # initial_states = np.asarray([1.1, -2.1, 2.7, 3.1, -2.5, 3.0, -2.0, -10.0], dtype=np.float32) initial_states = initial_states.reshape((4, 1, 2)) model_file_names = [] @@ -253,6 +259,7 @@ def main(): print('********* FAILED !!!!!!!!!!!!\n\n') print('Keras: ', all_results[i], '\n') print('Frugally-deep: ', frugally_deep_results[i], '\n') + print('Difference: ', all_results[i] - frugally_deep_results[i], '\n') all_tests_passed = False if not all_tests_passed: @@ -260,6 +267,5 @@ def main(): sys.exit(errno.EIO) print('\n\nPassed all stateful tests') - if __name__ == "__main__": main() diff --git a/test/stateful_test/tf_behaivor_scripts/temp_bidi_no_state_in.py b/test/stateful_test/tf_behaivor_scripts/temp_bidi_no_state_in.py new file mode 100644 index 00000000..3ec648d5 --- /dev/null +++ b/test/stateful_test/tf_behaivor_scripts/temp_bidi_no_state_in.py @@ -0,0 +1,48 @@ +import os +os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' +os.environ['CUDA_VISIBLE_DEVICES']='' + +import numpy as np +from tensorflow.keras.layers import Input, Dense, SimpleRNN, GRU, LSTM, Bidirectional +from tensorflow.keras.models import Model + +REC = LSTM + +sequence_length = 3 +feature_dim = 1 +features_in = Input(batch_shape=(1, sequence_length, feature_dim)) + +rnn_out = Bidirectional( REC(1, activation=None, use_bias=False, return_sequences=True, return_state=False, stateful=False))(features_in) +stateless_model = Model(inputs=[features_in], outputs=[rnn_out]) + +stateful_rnn_out = Bidirectional( REC(1, activation=None, use_bias=False, return_sequences=True, return_state=False, stateful=True))(features_in) +stateful_model = Model(inputs=features_in, outputs=stateful_rnn_out) + +stateful_model.set_weights( stateless_model.get_weights() ) + +x_in = np.random.normal(0,10,sequence_length) +x_in = x_in.reshape( (1, sequence_length, feature_dim) ) + +def print_bidi_out(non_stateful_out, stateful_out): + fb = ['FWD::', 'BWD::'] + + for i in range(2): + print(fb[i]) + print(f'non_stateful: {non_stateful_out.T[i]}') + print(f'stateful: {stateful_out.T[i]}') + print(f'delta: {stateful_out.T[i]-non_stateful_out.T[i]}') + + +non_stateful_out = stateless_model.predict(x_in).reshape((sequence_length,2)) +stateful_out = stateful_model.predict(x_in).reshape((sequence_length,2)) +print_bidi_out(non_stateful_out, stateful_out) + +non_stateful_out = stateless_model.predict(x_in).reshape((sequence_length,2)) +stateful_out = stateful_model.predict(x_in).reshape((sequence_length,2)) +print_bidi_out(non_stateful_out, stateful_out) + +print('\n** RESETING STATES in STATEFUL MODEL **\n') +stateful_model.reset_states() +non_stateful_out = stateless_model.predict(x_in).reshape((sequence_length,2)) +stateful_out = stateful_model.predict(x_in).reshape((sequence_length,2)) +print_bidi_out(non_stateful_out, stateful_out) diff --git a/test/stateful_test/tf_behaivor_scripts/temp_bidi_state_in.cpp b/test/stateful_test/tf_behaivor_scripts/temp_bidi_state_in.cpp new file mode 100644 index 00000000..3e266a9f --- /dev/null +++ b/test/stateful_test/tf_behaivor_scripts/temp_bidi_state_in.cpp @@ -0,0 +1,72 @@ +#include "fdeep/fdeep.hpp" +#include + +using namespace fdeep; + +int main() +{ + + // x_in = np.random.normal(0,10,sequence_length) + // x_in = np.asarray([1,0,0]) + // x_in = x_in.reshape( (1, sequence_length, feature_dim) ) + + // fwd_initial_h = np.asarray(2.75).reshape(1,1) + // fwd_initial_c = np.asarray(1.3).reshape(1,1) + // bwd_initial_h = np.asarray(-2.0).reshape(1,1) + // bwd_initial_c = np.asarray(-1.2).reshape(1,1) + + const std::vector x_inf_0 = {1.0, 0.0, 0.0}; + const std::vector state_0 = {2.75}; + const std::vector state_1 = {1.3}; + const std::vector state_2 = {-2.0}; + const std::vector state_3 = {-1.2}; + + const shared_float_vec xt0(fplus::make_shared_ref(x_inf_0)); + const shared_float_vec st0(fplus::make_shared_ref(state_0)); + const shared_float_vec st1(fplus::make_shared_ref(state_1)); + const shared_float_vec st2(fplus::make_shared_ref(state_2)); + const shared_float_vec st3(fplus::make_shared_ref(state_3)); + + const tensor test_in_0(tensor_shape(3, 1), xt0); + const tensor test_state_0(tensor_shape(static_cast(1)), st0); + const tensor test_state_1(tensor_shape(static_cast(1)), st1); + const tensor test_state_2(tensor_shape(static_cast(1)), st2); + const tensor test_state_3(tensor_shape(static_cast(1)), st3); + + + std::cout << "loading models" << std::endl; + auto stateful_model = load_model("temp_stateful.json"); + auto stateless_model = load_model("temp_stateless.json"); + + // input for GRU: {test_in_0, test_state_0, test_state_2}; + // input for LSTM: {test_in_0, test_state_0, test_state_1, test_state_2, test_state_3} + + // A + std::cout << "starting A" << std::endl; + auto non_stateful_out = stateless_model.predict({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); + auto stateful_out = stateful_model.predict_stateful({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); + std::cout << "Non-Stateful" << std::endl; + std::cout << fdeep::show_tensors(non_stateful_out) << std::endl; + std::cout << "Stateful" << std::endl; + std::cout << fdeep::show_tensors(stateful_out) << std::endl; + + // B + std::cout << "starting B" << std::endl; + non_stateful_out = stateless_model.predict({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); + stateful_out = stateful_model.predict_stateful({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); + std::cout << "Non-Stateful" << std::endl; + std::cout << fdeep::show_tensors(non_stateful_out) << std::endl; + std::cout << "Stateful" << std::endl; + std::cout << fdeep::show_tensors(stateful_out) << std::endl; + + // C + std::cout << "starting C" << std::endl; + // stateful_model.reset_states(); + non_stateful_out = stateless_model.predict({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); + stateful_out = stateful_model.predict_stateful({test_in_0, test_state_0, test_state_1, test_state_2, test_state_3}); + std::cout << "Non-Stateful" << std::endl; + std::cout << fdeep::show_tensors(non_stateful_out) << std::endl; + std::cout << "Stateful" << std::endl; + std::cout << fdeep::show_tensors(stateful_out) << std::endl; +} + diff --git a/test/stateful_test/tf_behaivor_scripts/temp_bidi_state_in.py b/test/stateful_test/tf_behaivor_scripts/temp_bidi_state_in.py new file mode 100644 index 00000000..2d51b5f0 --- /dev/null +++ b/test/stateful_test/tf_behaivor_scripts/temp_bidi_state_in.py @@ -0,0 +1,90 @@ +import os +os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' +os.environ['CUDA_VISIBLE_DEVICES']='' + +import numpy as np +from tensorflow.keras.layers import Input, Dense, SimpleRNN, GRU, LSTM, Bidirectional +from tensorflow.keras.models import Model + +REC = LSTM + +sequence_length = 3 +feature_dim = 1 +features_in = Input(batch_shape=(1, sequence_length, feature_dim)) +state_h_fwd_in = Input(batch_shape=(1, 1)) +state_h_bwd_in = Input(batch_shape=(1, 1)) +state_c_fwd_in = Input(batch_shape=(1, 1)) +state_c_bwd_in = Input(batch_shape=(1, 1)) + +four_state_shape = [state_h_fwd_in, state_c_fwd_in, state_h_bwd_in, state_c_bwd_in] +two_state_shape = [state_h_fwd_in, state_h_bwd_in] + +if REC == LSTM: + rnn_out = Bidirectional( REC(1, activation='linear', use_bias=False, return_sequences=True, return_state=False, stateful=False))(features_in, initial_state=four_state_shape) + stateful_rnn_out = Bidirectional( REC(1, activation='linear', use_bias=False, return_sequences=True, return_state=False, stateful=True))(features_in, initial_state=four_state_shape) + rnn_inputs = [features_in, state_h_fwd_in, state_c_fwd_in, state_h_bwd_in, state_c_bwd_in] +else: + if REC == SimpleRNN: + rnn_out = Bidirectional( REC(1, activation='linear', use_bias=False, return_sequences=True, return_state=False, stateful=False))(features_in, initial_state=two_state_shape) + stateful_rnn_out = Bidirectional( REC(1, activation='linear', use_bias=False, return_sequences=True, return_state=False, stateful=True))(features_in, initial_state=two_state_shape) + else: + rnn_out = Bidirectional( REC(1, activation='linear', use_bias=False, return_sequences=True, return_state=False, stateful=False))(features_in, initial_state=two_state_shape) + stateful_rnn_out = Bidirectional( REC(1, activation='linear', use_bias=False, return_sequences=True, return_state=False, stateful=True))(features_in, initial_state=two_state_shape) + rnn_inputs = [features_in, state_h_fwd_in, state_h_bwd_in] + +stateless_model = Model(inputs=rnn_inputs, outputs=rnn_out) +stateful_model = Model(inputs=rnn_inputs, outputs=stateful_rnn_out) + + +# toy_weights = [np.asarray([[ 1.0]], dtype=np.float32), np.asarray([[0.5 ]], dtype=np.float32), np.asarray([[ -1.0 ]], dtype=np.float32), np.asarray([[ -0.5 ]], dtype=np.float32)] +# stateless_model.set_weights(toy_weights) +# stateful_model.set_weights(toy_weights) + +stateful_model.set_weights( stateless_model.get_weights() ) + +stateful_model.save('temp_stateful.h5') +stateless_model.save('temp_stateless.h5') + +x_in = np.random.normal(0,10,sequence_length) +x_in = np.asarray([1,0,0]) +x_in = x_in.reshape( (1, sequence_length, feature_dim) ) + +fwd_initial_h = np.asarray(2.75).reshape(1,1) +fwd_initial_c = np.asarray(1.3).reshape(1,1) +bwd_initial_h = np.asarray(-2.0).reshape(1,1) +bwd_initial_c = np.asarray(-1.2).reshape(1,1) + +# fwd_initial_h = np.asarray(np.random.normal(0,10)).reshape(1,1) +# fwd_initial_h = np.asarray(np.random.normal(0,10)).reshape(1,1) +# bwd_initial_h = np.asarray(np.random.normal(0,10)).reshape(1,1) +# fwd_initial_c = np.asarray(np.random.normal(0,10)).reshape(1,1) +# bwd_initial_c = np.asarray(np.random.normal(0,10)).reshape(1,1) + +if REC == LSTM: + rnn_input = [x_in, fwd_initial_h, fwd_initial_c, bwd_initial_h, bwd_initial_c] +else: + rnn_input = [x_in, fwd_initial_h, bwd_initial_h] + + +def print_bidi_out(non_stateful_out, stateful_out): + fb = ['FWD::', 'BWD::'] + + for i in range(2): + print(fb[i]) + print(f'non_stateful: {non_stateful_out.T[i]}') + print(f'stateful: {stateful_out.T[i]}') + print(f'delta: {stateful_out.T[i]-non_stateful_out.T[i]}') + +non_stateful_out = stateless_model.predict(rnn_input).reshape((sequence_length,2)) +stateful_out = stateful_model.predict(rnn_input).reshape((sequence_length,2)) +print_bidi_out(non_stateful_out, stateful_out) + +non_stateful_out = stateless_model.predict(rnn_input).reshape((sequence_length,2)) +stateful_out = stateful_model.predict(rnn_input).reshape((sequence_length,2)) +print_bidi_out(non_stateful_out, stateful_out) + +print('\n** RESETING STATES in STATEFUL MODEL **\n') +stateful_model.reset_states() +non_stateful_out = stateless_model.predict(rnn_input).reshape((sequence_length,2)) +stateful_out = stateful_model.predict(rnn_input).reshape((sequence_length,2)) +print_bidi_out(non_stateful_out, stateful_out) diff --git a/test/stateful_test/tf_behaivor_scripts/temp_rnn_test.cpp b/test/stateful_test/tf_behaivor_scripts/temp_rnn_test.cpp new file mode 100644 index 00000000..5e627b83 --- /dev/null +++ b/test/stateful_test/tf_behaivor_scripts/temp_rnn_test.cpp @@ -0,0 +1,57 @@ +#include "fdeep/fdeep.hpp" +#include + +using namespace fdeep; + +int main() +{ + const std::vector x_inf_0 = {1.0, 0.0, 0.0}; + const std::vector state_0 = {10.0}; + + const shared_float_vec xt0(fplus::make_shared_ref(x_inf_0)); + const shared_float_vec st0(fplus::make_shared_ref(state_0)); + + std::cout << "convert to tensors" << std::endl; + const tensor test_in_0(tensor_shape(3, 1), xt0); + std::cout << "convert to tensors" << std::endl; + const tensor test_state_0(tensor_shape(static_cast(1)), st0); + + std::cout << "loading models" << std::endl; + auto stateful_model = load_model("temp_stateful.json"); + auto stateless_model = load_model("temp_stateless.json"); + + // A + std::cout << "starting A" << std::endl; + auto non_stateful_out = stateless_model.predict({test_in_0, test_state_0}); + auto stateful_out = stateful_model.predict_stateful({test_in_0, test_state_0}); + std::cout << "Non-Stateful" << std::endl; + std::cout << fdeep::show_tensors(non_stateful_out) << std::endl; + std::cout << "Stateful" << std::endl; + std::cout << fdeep::show_tensors(stateful_out) << std::endl; + + // B + non_stateful_out = stateless_model.predict({test_in_0, test_state_0}); + stateful_out = stateful_model.predict_stateful({test_in_0, test_state_0}); + std::cout << "Non-Stateful" << std::endl; + std::cout << fdeep::show_tensors(non_stateful_out) << std::endl; + std::cout << "Stateful" << std::endl; + std::cout << fdeep::show_tensors(stateful_out) << std::endl; + + // C + std::cout << "** RESETING STATES in STATEFUL MODEL **" << std::endl; + stateful_model.reset_states(); + non_stateful_out = stateless_model.predict({test_in_0, test_state_0}); + stateful_out = stateful_model.predict_stateful({test_in_0, test_state_0}); + std::cout << "Non-Stateful" << std::endl; + std::cout << fdeep::show_tensors(non_stateful_out) << std::endl; + std::cout << "Stateful" << std::endl; + std::cout << fdeep::show_tensors(stateful_out) << std::endl; + + //D + non_stateful_out = stateless_model.predict({test_in_0, test_state_0}); + stateful_out = stateful_model.predict_stateful({test_in_0, test_state_0}); + std::cout << "Non-Stateful" << std::endl; + std::cout << fdeep::show_tensors(non_stateful_out) << std::endl; + std::cout << "Stateful" << std::endl; + std::cout << fdeep::show_tensors(stateful_out) << std::endl; +} \ No newline at end of file diff --git a/test/stateful_test/tf_behaivor_scripts/temp_rnn_test.py b/test/stateful_test/tf_behaivor_scripts/temp_rnn_test.py new file mode 100644 index 00000000..acf9dedf --- /dev/null +++ b/test/stateful_test/tf_behaivor_scripts/temp_rnn_test.py @@ -0,0 +1,68 @@ +import os +os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' +os.environ['CUDA_VISIBLE_DEVICES']='' + +import numpy as np +from tensorflow.keras.layers import Input, Dense, SimpleRNN, GRU, LSTM, Bidirectional +from tensorflow.keras.models import Model + +USE_TOY_WEIGHTS = True +REC_LAYER = GRU +sequence_length = 3 +feature_dim = 1 +features_in = Input(batch_shape=(1, sequence_length, feature_dim)) +state_h_in = Input(batch_shape=(1, 1)) + +rnn_out = REC_LAYER(1, activation=None, use_bias=False, return_sequences=True, return_state=False, stateful=False)(features_in, initial_state=state_h_in) +stateless_model = Model(inputs=[features_in, state_h_in], outputs=rnn_out) + +stateful_rnn_out = REC_LAYER(1, activation=None, use_bias=False, return_sequences=True, return_state=False, stateful=True)(features_in, initial_state=state_h_in) +stateful_model = Model(inputs=[features_in, state_h_in], outputs=stateful_rnn_out) + +if USE_TOY_WEIGHTS: + if REC_LAYER == SimpleRNN: + toy_weights = [ np.asarray([[1.0]], dtype=np.float32), np.asarray([[-0.5]], dtype=np.float32)] + + elif REC_LAYER == GRU: + # for a GRU, the first are the non-recurrent kernels W, and the second are the recurrent kernels U (V) + toy_weights = [np.asarray([[ 1.0, -2.0, 3.0 ]], dtype=np.float32), np.asarray([[ -0.5 , 2.0, -1.1 ]], dtype=np.float32)] + + stateless_model.set_weights(toy_weights) + stateful_model.set_weights(toy_weights) + +# w = stateless_model.get_weights() +# print(w) + +stateless_model.save('temp_stateless.h5', include_optimizer=False) +stateful_model.save('temp_stateful.h5', include_optimizer=False) + +x_in = np.zeros(sequence_length) +x_in[0] = 1 +x_in = x_in.reshape( (1, sequence_length, feature_dim) ) +initial_state = np.asarray( [10]) +initial_state = initial_state.reshape((1,1)) + +def print_rnn_out(non_stateful_out, stateful_out): + fb = ['FWD::', 'BWD::'] + + print(f'non_stateful: {non_stateful_out}') + print(f'stateful: {stateful_out}') + print(f'delta: {stateful_out-non_stateful_out}') + +non_stateful_out = stateless_model.predict([x_in, initial_state]).reshape((sequence_length)) +stateful_out = stateful_model.predict([x_in, initial_state]).reshape((sequence_length)) +print_rnn_out(non_stateful_out, stateful_out) + +non_stateful_out = stateless_model.predict([x_in, initial_state]).reshape((sequence_length)) +stateful_out = stateful_model.predict([x_in, initial_state]).reshape((sequence_length)) +print_rnn_out(non_stateful_out, stateful_out) + +print('\n** RESETING STATES in STATEFUL MODEL **\n') +stateful_model.reset_states() +non_stateful_out = stateless_model.predict([x_in, initial_state]).reshape((sequence_length)) +stateful_out = stateful_model.predict([x_in, initial_state]).reshape((sequence_length)) +print_rnn_out(non_stateful_out, stateful_out) + +non_stateful_out = stateless_model.predict([x_in, initial_state]).reshape((sequence_length)) +stateful_out = stateful_model.predict([x_in, initial_state]).reshape((sequence_length)) +print_rnn_out(non_stateful_out, stateful_out)