From b300d134bad341e7b0b085510220302014bbce32 Mon Sep 17 00:00:00 2001 From: Jun Doi Date: Fri, 23 Apr 2021 23:37:52 +0900 Subject: [PATCH 1/7] Fixes of multi-chunk simulators (#1200) Co-authored-by: Christopher Wood --- .../aer/backends/unitary_simulator.py | 2 + .../fix-gpu-memory-cf50e1def7072f38.yaml | 5 + src/controllers/aer_controller.hpp | 83 ++- src/controllers/statevector_controller.hpp | 1 + src/controllers/unitary_controller.hpp | 80 ++- .../densitymatrix_state_chunk.hpp | 340 +++++---- .../density_matrix/densitymatrix_thrust.hpp | 138 +++- src/simulators/state_chunk.hpp | 160 ++++- .../statevector/chunk/chunk_container.hpp | 47 +- .../statevector/chunk/chunk_manager.hpp | 29 +- .../chunk/device_chunk_container.hpp | 1 + .../chunk/host_chunk_container.hpp | 26 +- src/simulators/statevector/qubitvector.hpp | 17 +- .../statevector/qubitvector_thrust.hpp | 108 +-- .../statevector/statevector_state_chunk.hpp | 283 ++++++-- src/simulators/unitary/unitary_state.hpp | 6 + .../unitary/unitary_state_chunk.hpp | 660 ++++++++++++++++++ .../unitary/unitarymatrix_thrust.hpp | 21 +- src/transpile/cacheblocking.hpp | 180 +++-- 19 files changed, 1773 insertions(+), 414 deletions(-) create mode 100644 releasenotes/notes/fix-gpu-memory-cf50e1def7072f38.yaml create mode 100644 src/simulators/unitary/unitary_state_chunk.hpp diff --git a/qiskit/providers/aer/backends/unitary_simulator.py b/qiskit/providers/aer/backends/unitary_simulator.py index 9e91c9bb31..b0353757c7 100644 --- a/qiskit/providers/aer/backends/unitary_simulator.py +++ b/qiskit/providers/aer/backends/unitary_simulator.py @@ -193,6 +193,8 @@ def _default_options(cls): fusion_verbose=False, fusion_max_qubit=5, fusion_threshold=14, + blocking_qubits=None, + blocking_enable=False, # statevector options statevector_parallel_threshold=14) diff --git a/releasenotes/notes/fix-gpu-memory-cf50e1def7072f38.yaml b/releasenotes/notes/fix-gpu-memory-cf50e1def7072f38.yaml new file mode 100644 index 0000000000..01ebe9e31f --- /dev/null +++ b/releasenotes/notes/fix-gpu-memory-cf50e1def7072f38.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes a bug introduced in 0.8.0 where GPU simulations would allocate + unneeded host memory in addition to the GPU memory. diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index df379ecc93..9b34f6d6ce 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -64,6 +64,7 @@ #include "simulators/statevector/statevector_state_chunk.hpp" #include "simulators/superoperator/superoperator_state.hpp" #include "simulators/unitary/unitary_state.hpp" +#include "simulators/unitary/unitary_state_chunk.hpp" namespace AER { @@ -1399,33 +1400,67 @@ void Controller::run_circuit(const Circuit &circ, } case Method::unitary: { if (sim_device_ == Device::CPU) { - if (sim_precision_ == Precision::Double) { - // Double-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>( - circ, noise, config, shots, rng_seed, Method::unitary, - false, result); - } else { - // Single-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>( - circ, noise, config, shots, rng_seed, Method::unitary, - false, result); + if (multiple_chunk_required(circ, noise)) { + if (sim_precision_ == Precision::Double) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } + } + else{ + if (sim_precision_ == Precision::Double) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } } } else { #ifdef AER_THRUST_SUPPORTED - if (sim_precision_ == Precision::Double) { - // Double-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>( - circ, noise, config, shots, rng_seed, Method::unitary, - false, result); - } else { - // Single-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>( - circ, noise, config, shots, rng_seed, Method::unitary, - false, result); + if (multiple_chunk_required(circ, noise)) { + if (sim_precision_ == Precision::Double) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } + } + else{ + if (sim_precision_ == Precision::Double) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>( + circ, noise, config, shots, rng_seed, Method::unitary, + false, result); + } } #endif } diff --git a/src/controllers/statevector_controller.hpp b/src/controllers/statevector_controller.hpp index db5c9a9cfe..cc67fd08c2 100755 --- a/src/controllers/statevector_controller.hpp +++ b/src/controllers/statevector_controller.hpp @@ -356,6 +356,7 @@ void StatevectorController::run_circuit_helper( } Transpile::CacheBlocking cache_block_pass = transpile_cache_blocking(opt_circ,dummy_noise,config,(precision_ == Precision::single_precision) ? sizeof(std::complex) : sizeof(std::complex),false); + cache_block_pass.set_save_state(true); cache_block_pass.optimize_circuit(opt_circ, dummy_noise, state.opset(), result); uint_t block_bits = 0; diff --git a/src/controllers/unitary_controller.hpp b/src/controllers/unitary_controller.hpp index 8b7d7126da..bdd1413634 100755 --- a/src/controllers/unitary_controller.hpp +++ b/src/controllers/unitary_controller.hpp @@ -17,6 +17,7 @@ #include "controller.hpp" #include "simulators/unitary/unitary_state.hpp" +#include "simulators/unitary/unitary_state_chunk.hpp" #include "transpile/fusion.hpp" namespace AER { @@ -196,30 +197,60 @@ void UnitaryController::run_circuit(const Circuit &circ, switch (method_) { case Method::automatic: case Method::unitary_cpu: { - if (precision_ == Precision::double_precision) { - // Double-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>(circ, noise, config, - shots, rng_seed, result); - } else { - // Single-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>(circ, noise, config, - shots, rng_seed, result); + if(Base::Controller::multiple_chunk_required(circ,noise)){ + if (precision_ == Precision::double_precision) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>(circ, noise, config, + shots, rng_seed, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>(circ, noise, config, + shots, rng_seed, result); + } + } + else{ + if (precision_ == Precision::double_precision) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>(circ, noise, config, + shots, rng_seed, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>(circ, noise, config, + shots, rng_seed, result); + } } } case Method::unitary_thrust_gpu: { #ifdef AER_THRUST_CUDA - if (precision_ == Precision::double_precision) { - // Double-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>( - circ, noise, config, shots, rng_seed, result); - } else { - // Single-precision unitary simulation - return run_circuit_helper< - QubitUnitary::State>>( - circ, noise, config, shots, rng_seed, result); + if(Base::Controller::multiple_chunk_required(circ,noise)){ + if (precision_ == Precision::double_precision) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>( + circ, noise, config, shots, rng_seed, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitaryChunk::State>>( + circ, noise, config, shots, rng_seed, result); + } + } + else{ + if (precision_ == Precision::double_precision) { + // Double-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>( + circ, noise, config, shots, rng_seed, result); + } else { + // Single-precision unitary simulation + return run_circuit_helper< + QubitUnitary::State>>( + circ, noise, config, shots, rng_seed, result); + } } #else throw std::runtime_error( @@ -309,6 +340,15 @@ void UnitaryController::run_circuit_helper( fusion_pass.optimize_circuit(opt_circ, dummy_noise, state.opset(), result); } + Transpile::CacheBlocking cache_block_pass = transpile_cache_blocking(opt_circ,dummy_noise,config,(precision_ == Precision::single_precision) ? sizeof(std::complex) : sizeof(std::complex),true); + cache_block_pass.set_save_state(true); + cache_block_pass.optimize_circuit(opt_circ, dummy_noise, state.opset(), result); + + uint_t block_bits = 0; + if(cache_block_pass.enabled()) + block_bits = cache_block_pass.block_bits(); + state.allocate(Base::Controller::max_qubits_,block_bits); + // Run single shot collecting measure data or snapshots if (initial_unitary_.empty()) { diff --git a/src/simulators/density_matrix/densitymatrix_state_chunk.hpp b/src/simulators/density_matrix/densitymatrix_state_chunk.hpp index 055fa08c1a..89aceb50d5 100644 --- a/src/simulators/density_matrix/densitymatrix_state_chunk.hpp +++ b/src/simulators/density_matrix/densitymatrix_state_chunk.hpp @@ -40,11 +40,11 @@ const Operations::OpSet StateOpSet( OpType::barrier, OpType::bfunc, OpType::roerror, OpType::matrix, OpType::diagonal_matrix, OpType::kraus, - OpType::superop, OpType::save_expval, + OpType::superop, OpType::set_statevec, + OpType::set_densmat, OpType::save_expval, OpType::save_expval_var, OpType::save_densmat, - OpType::save_state, OpType::save_probs, OpType::save_probs_ket, - OpType::save_amps_sq + OpType::save_amps_sq, OpType::save_state }, // Gates {"U", "CX", "u1", "u2", "u3", "u", "cx", "cy", "cz", @@ -116,7 +116,9 @@ class State : public Base::StateChunk { auto move_to_matrix(); auto copy_to_matrix(); protected: - auto apply_to_matrix(bool copy = false); + + template + void initialize_from_vector(const list_t &vec); //----------------------------------------------------------------------- // Apply instructions @@ -143,7 +145,7 @@ class State : public Base::StateChunk { RngEngine &rng); // Reset the specified qubits to the |0> state by tracing out qubits - void apply_reset(const reg_t &qubits); + void apply_reset(const int_t iChunk, const reg_t &qubits); // Apply a supported snapshot instruction // If the input is not in allowed_snapshots an exeption will be raised. @@ -166,6 +168,10 @@ class State : public Base::StateChunk { // Apply an N-qubit Pauli gate void apply_pauli(const reg_t &qubits, const std::string &pauli); + // apply phase + void apply_phase(const uint_t iChunk,const uint_t qubit, const complex_t phase); + void apply_phase(const uint_t iChunk,const reg_t& qubits, const complex_t phase); + //----------------------------------------------------------------------- // Save data instructions //----------------------------------------------------------------------- @@ -279,12 +285,22 @@ class State : public Base::StateChunk { return 2; } + virtual bool is_applied_to_each_chunk(const Operations::Op &op); }; //========================================================================= // Implementation: Base class method overrides //========================================================================= +template +bool State::is_applied_to_each_chunk(const Operations::Op &op) +{ + if(op.type == Operations::OpType::reset){ + return true; + } + return BaseState::is_applied_to_each_chunk(op); +} + //------------------------------------------------------------------------- // Initialization //------------------------------------------------------------------------- @@ -303,10 +319,13 @@ void State::initialize_qreg(uint_t num_qubits) } } else{ //multi-chunk distribution + for(i=0;inum_qubits_ == this->chunk_bits_){ BaseState::qregs_[i].initialize(); } @@ -335,6 +354,11 @@ void State::initialize_qreg(uint_t num_qubits, } } else{ //multi-chunk distribution + for(iChunk=0;iChunk::initialize_qreg(uint_t num_qubits, uint_t icol_chunk = ((iChunk + BaseState::global_chunk_index_) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)) << (BaseState::chunk_bits_); //copy part of state for this chunk - uint_t i; - cvector_t tmp(1ull << BaseState::chunk_bits_); - for(i=0;i<(1ull << BaseState::chunk_bits_);i++){ + uint_t i,row,col; + cvector_t tmp(1ull << (BaseState::chunk_bits_*2)); + for(i=0;i<(1ull << (BaseState::chunk_bits_*2));i++){ uint_t icol = i & ((1ull << (BaseState::chunk_bits_))-1); uint_t irow = i >> (BaseState::chunk_bits_); tmp[i] = input[icol_chunk + icol + ((irow_chunk + irow) << (BaseState::num_qubits_))]; } - - BaseState::qregs_[iChunk].set_num_qubits(BaseState::chunk_bits_); BaseState::qregs_[iChunk].initialize_from_vector(tmp); } } @@ -374,6 +396,10 @@ void State::initialize_qreg(uint_t num_qubits, } } else{ //multi-chunk distribution + for(iChunk=0;iChunk::initialize_qreg(uint_t num_qubits, uint_t icol_chunk = ((iChunk + BaseState::global_chunk_index_) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)) << (BaseState::chunk_bits_); //copy part of state for this chunk - uint_t i; - cvector_t tmp(1ull << BaseState::chunk_bits_); - for(i=0;i<(1ull << BaseState::chunk_bits_);i++){ + uint_t i,row,col; + cvector_t tmp(1ull << (BaseState::chunk_bits_*2)); + for(i=0;i<(1ull << (BaseState::chunk_bits_*2));i++){ uint_t icol = i & ((1ull << (BaseState::chunk_bits_))-1); uint_t irow = i >> (BaseState::chunk_bits_); tmp[i] = state[icol_chunk + icol + ((irow_chunk + irow) << (BaseState::num_qubits_))]; } - - BaseState::qregs_[iChunk].set_num_qubits(BaseState::chunk_bits_); BaseState::qregs_[iChunk].initialize_from_vector(tmp); } } @@ -413,6 +437,10 @@ void State::initialize_qreg(uint_t num_qubits, } } else{ //multi-chunk distribution + for(iChunk=0;iChunk::initialize_qreg(uint_t num_qubits, uint_t icol_chunk = ((iChunk + BaseState::global_chunk_index_) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)) << (BaseState::chunk_bits_); //copy part of state for this chunk - uint_t i; - cvector_t tmp(1ull << BaseState::chunk_bits_); - for(i=0;i<(1ull << BaseState::chunk_bits_);i++){ + uint_t i,row,col; + cvector_t tmp(1ull << (BaseState::chunk_bits_*2)); + for(i=0;i<(1ull << (BaseState::chunk_bits_*2));i++){ uint_t icol = i & ((1ull << (BaseState::chunk_bits_))-1); uint_t irow = i >> (BaseState::chunk_bits_); tmp[i] = state[icol_chunk + icol + ((irow_chunk + irow) << (BaseState::num_qubits_))]; } - - BaseState::qregs_[iChunk].set_num_qubits(BaseState::chunk_bits_); BaseState::qregs_[iChunk].initialize_from_vector(tmp); } } @@ -446,13 +472,55 @@ void State::initialize_omp() } } +template +template +void State::initialize_from_vector(const list_t &vec) +{ + if((1ull << (BaseState::num_qubits_*2)) == vec.size()){ + BaseState::initialize_from_vector(vec); + } + else if((1ull << (BaseState::num_qubits_*2)) == vec.size() * vec.size()) { + int_t iChunk; + if(BaseState::chunk_bits_ == BaseState::num_qubits_){ + for(iChunk=0;iChunk> ((BaseState::num_qubits_ - BaseState::chunk_bits_))) << (BaseState::chunk_bits_); + uint_t icol_chunk = ((iChunk + BaseState::global_chunk_index_) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)) << (BaseState::chunk_bits_); + + //copy part of state for this chunk + uint_t i,row,col; + list_t vec1(1ull << BaseState::chunk_bits_); + list_t vec2(1ull << BaseState::chunk_bits_); + + for(i=0;i<(1ull << BaseState::chunk_bits_);i++){ + vec1[i] = vec[(irow_chunk << BaseState::chunk_bits_) + i]; + vec2[i] = std::conj(vec[(icol_chunk << BaseState::chunk_bits_) + i]); + } + BaseState::qregs_[iChunk].initialize_from_vector(AER::Utils::tensor_product(vec1, vec2)); + } + } + } + else { + throw std::runtime_error("DensityMatrixChunk::initialize input vector is incorrect length. Expected: " + + std::to_string((1ull << (BaseState::num_qubits_*2))) + " Received: " + + std::to_string(vec.size())); + } +} + + template auto State::move_to_matrix() { if(BaseState::num_global_chunks_ == 1){ return BaseState::qregs_[0].move_to_matrix(); } - return apply_to_matrix(false); + return BaseState::apply_to_matrix(false); } template @@ -461,86 +529,9 @@ auto State::copy_to_matrix() if(BaseState::num_global_chunks_ == 1){ return BaseState::qregs_[0].copy_to_matrix(); } - return apply_to_matrix(true); + return BaseState::apply_to_matrix(true); } -template -auto State::apply_to_matrix(bool copy) -{ - int_t iChunk; - uint_t size = 1ull << (BaseState::chunk_bits_*2); - uint_t mask = (1ull << (BaseState::chunk_bits_)) - 1; - uint_t num_threads = BaseState::qregs_[0].get_omp_threads(); - - auto matrix = BaseState::qregs_[0].copy_to_matrix(); - - if(BaseState::distributed_rank_ == 0){ - //TO DO check memory availability - matrix.resize(1ull << (BaseState::num_qubits_),1ull << (BaseState::num_qubits_)); - -#ifdef AER_MPI - auto recv = BaseState::qregs_[0].copy_to_matrix(); - //gather states from other processes - for(iChunk=BaseState::num_local_chunks_;iChunk> ((BaseState::num_qubits_ - BaseState::chunk_bits_))) << (BaseState::chunk_bits_); - uint_t icol_chunk = ((iChunk) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)) << (BaseState::chunk_bits_); -#pragma omp parallel for if(num_threads > 1) num_threads(num_threads) - for(i=0;i> (BaseState::chunk_bits_); - uint_t icol = i & mask; - matrix(icol_chunk+icol,irow_chunk+irow) = recv(icol,irow); - } - } -#endif - - for(iChunk=0;iChunk> ((BaseState::num_qubits_ - BaseState::chunk_bits_))) << (BaseState::chunk_bits_); - uint_t icol_chunk = ((iChunk + BaseState::global_chunk_index_) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)) << (BaseState::chunk_bits_); - if(copy){ - auto tmp = BaseState::qregs_[iChunk].copy_to_matrix(); -#pragma omp parallel for if(num_threads > 1) num_threads(num_threads) - for(i=0;i> (BaseState::chunk_bits_); - uint_t icol = i & mask; - matrix(icol_chunk+icol,irow_chunk+irow) = tmp(icol,irow); - } - } - else{ - auto tmp = BaseState::qregs_[iChunk].move_to_matrix(); -#pragma omp parallel for if(num_threads > 1) num_threads(num_threads) - for(i=0;i> (BaseState::chunk_bits_); - uint_t icol = i & mask; - matrix(icol_chunk+icol,irow_chunk+irow) = tmp(icol,irow); - } - } - } - } - else{ -#ifdef AER_MPI - //send matrices to process 0 - for(iChunk=0;iChunk::apply_op(const int_t iChunk,const Operations::Op &op, case Operations::OpType::barrier: break; case Operations::OpType::reset: - apply_reset(op.qubits); + apply_reset(iChunk,op.qubits); break; case Operations::OpType::measure: apply_measure(op.qubits, op.memory, op.registers, rng); @@ -617,6 +608,15 @@ void State::apply_op(const int_t iChunk,const Operations::Op &op, case Operations::OpType::superop: BaseState::qregs_[iChunk].apply_superop_matrix(op.qubits, Utils::vectorize_matrix(op.mats[0])); break; + case OpType::kraus: + apply_kraus(op.qubits, op.mats); + break; + case OpType::set_statevec: + initialize_from_vector(op.params); + break; + case OpType::set_densmat: + BaseState::initialize_from_matrix(op.mats[0]); + break; case Operations::OpType::save_expval: case Operations::OpType::save_expval_var: BaseState::apply_save_expval(op, result); @@ -990,54 +990,65 @@ template cmatrix_t State::reduced_density_matrix_helper(const reg_t &qubits, const reg_t &qubits_sorted) { - // Get superoperator qubits - const reg_t squbits = BaseState::qregs_[0].superop_qubits(qubits); - const reg_t squbits_sorted = BaseState::qregs_[0].superop_qubits(qubits_sorted); - - // Get dimensions - const size_t N = qubits.size(); - const size_t DIM = 1ULL << N; - const size_t VDIM = 1ULL << (2 * N); - const size_t END = 1ULL << (BaseState::qregs_[0].num_qubits() - N); - const size_t SHIFT = END + 1; - - // TODO: If we are not going to apply any additional instructions after - // this function we could move the memory when constructing rather - // than copying int_t iChunk; - auto vmat = BaseState::qregs_[0].vector(); + uint_t size = 1ull << (BaseState::chunk_bits_*2); + uint_t mask = (1ull << (BaseState::chunk_bits_)) - 1; + uint_t num_threads = BaseState::qregs_[0].get_omp_threads(); //TO DO check memory availability - vmat.resize(BaseState::num_local_chunks_ << (BaseState::chunk_bits_*2)); - -#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(iChunk) - for(iChunk=1;iChunk> ((BaseState::num_qubits_ - BaseState::chunk_bits_))) << BaseState::chunk_bits_; + uint_t icol_chunk = (iChunk & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)) << BaseState::chunk_bits_; + if(iChunk < BaseState::num_local_chunks_) + tmp = BaseState::qregs_[iChunk].copy_to_matrix(); #ifdef AER_MPI - BaseState::gather_state(vmat); + else + BaseState::recv_data(tmp.data(),size,0,iChunk); #endif - - cmatrix_t reduced_state(DIM, DIM, false); - { - // Fill matrix with first iteration - const auto inds = QV::indexes(squbits, squbits_sorted, 0); - for (int_t i = 0; i < VDIM; ++i) { - reduced_state[i] = complex_t(vmat[inds[i]]); +#pragma omp parallel for if(num_threads > 1) num_threads(num_threads) + for(i=0;i> (BaseState::chunk_bits_)) + irow_chunk; + uint_t icol = (i & mask) + icol_chunk; + uint_t irow_out = 0; + uint_t icol_out = 0; + int j; + for(j=0;j> qubits[j]) & 1){ + irow &= ~(1ull << qubits[j]); + irow_out += (1ull << j); + } + if((icol >> qubits[j]) & 1){ + icol &= ~(1ull << qubits[j]); + icol_out += (1ull << j); + } + } + if(irow == icol){ //only diagonal base can be reduced + uint_t idx = ((irow_out) << qubits.size()) + icol_out; +#pragma omp critical + reduced_state[idx] += tmp[i]; + } + } } } - // Accumulate with remaning blocks - for (size_t k = 1; k < END; k++) { - const auto inds = QV::indexes(squbits, squbits_sorted, k * SHIFT); - for (int_t i = 0; i < VDIM; ++i) { - reduced_state[i] += complex_t(vmat[inds[i]]); + else{ +#ifdef AER_MPI + //send matrices to process 0 + for(iChunk=0;iChunk::apply_gate(const uint_t iChunk, const Operations::Op &op) std::real(op.params[1])); break; case DensityMatrix::Gates::u1: - BaseState::qregs_[iChunk].apply_phase(op.qubits[0], std::exp(complex_t(0., 1.) * op.params[0])); + apply_phase(iChunk,op.qubits[0], std::exp(complex_t(0., 1.) * op.params[0])); break; case DensityMatrix::Gates::cx: BaseState::qregs_[iChunk].apply_cnot(op.qubits[0], op.qubits[1]); @@ -1097,21 +1108,21 @@ void State::apply_gate(const uint_t iChunk, const Operations::Op &op) apply_gate_u3(iChunk, op.qubits[0], M_PI / 2., 0., M_PI); break; case DensityMatrix::Gates::s: - BaseState::qregs_[iChunk].apply_phase(op.qubits[0], complex_t(0., 1.)); + apply_phase(iChunk,op.qubits[0], complex_t(0., 1.)); break; case DensityMatrix::Gates::sdg: - BaseState::qregs_[iChunk].apply_phase(op.qubits[0], complex_t(0., -1.)); + apply_phase(iChunk,op.qubits[0], complex_t(0., -1.)); break; case DensityMatrix::Gates::sx: BaseState::qregs_[iChunk].apply_unitary_matrix(op.qubits, Linalg::VMatrix::SX); break; case DensityMatrix::Gates::t: { const double isqrt2{1. / std::sqrt(2)}; - BaseState::qregs_[iChunk].apply_phase(op.qubits[0], complex_t(isqrt2, isqrt2)); + apply_phase(iChunk,op.qubits[0], complex_t(isqrt2, isqrt2)); } break; case DensityMatrix::Gates::tdg: { const double isqrt2{1. / std::sqrt(2)}; - BaseState::qregs_[iChunk].apply_phase(op.qubits[0], complex_t(isqrt2, -isqrt2)); + apply_phase(iChunk,op.qubits[0], complex_t(isqrt2, -isqrt2)); } break; case DensityMatrix::Gates::swap: { BaseState::qregs_[iChunk].apply_swap(op.qubits[0], op.qubits[1]); @@ -1192,13 +1203,44 @@ void State::apply_diagonal_unitary_matrix(const int_t iChunk, const r } else{ reg_t qubits_in = qubits; + reg_t qubits_row = qubits; cvector_t diag_in = diag; + cvector_t diag_row = diag; BaseState::block_diagonal_matrix(iChunk,qubits_in,diag_in); - BaseState::qregs_[iChunk].apply_diagonal_unitary_matrix(qubits_in,diag_in); + + for(int_t i=0;i= BaseState::chunk_bits_) + qubits_row[i] = qubits[i] + BaseState::num_qubits_ - BaseState::chunk_bits_; + } + BaseState::block_diagonal_matrix(iChunk,qubits_row,diag_row); + + reg_t qubits_chunk(qubits_in.size()*2); + for(int_t i=0;i +void State::apply_phase(const uint_t iChunk,const uint_t qubit, const complex_t phase) +{ + cvector_t diag(2); + diag[0] = 1.0; + diag[1] = phase; + apply_diagonal_unitary_matrix(iChunk,reg_t({qubit}), diag); +} + +template +void State::apply_phase(const uint_t iChunk,const reg_t& qubits, const complex_t phase) +{ + cvector_t diag((1 << qubits.size()),1.0); + diag[(1 << qubits.size()) - 1] = phase; + apply_diagonal_unitary_matrix(iChunk,qubits, diag); +} + //========================================================================= // Implementation: Reset and Measurement Sampling //========================================================================= @@ -1399,18 +1441,14 @@ std::vector State::sample_measure(const reg_t &qubits, template -void State::apply_reset(const reg_t &qubits) +void State::apply_reset(const int_t iChunk,const reg_t &qubits) { // TODO: This can be more efficient by adding reset // to base class rather than doing a matrix multiplication // where all but 1 row is zeros. const auto reset_op = Linalg::SMatrix::reset(1ULL << qubits.size()); - int_t i; -#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(i) - for(i=0;i diff --git a/src/simulators/density_matrix/densitymatrix_thrust.hpp b/src/simulators/density_matrix/densitymatrix_thrust.hpp index 645b108344..4a7730bdb0 100755 --- a/src/simulators/density_matrix/densitymatrix_thrust.hpp +++ b/src/simulators/density_matrix/densitymatrix_thrust.hpp @@ -317,6 +317,73 @@ class DensityDiagMatMult2x2 : public GateFuncBase } }; +template +class DensityDiagMatMultNxN : public GateFuncBase +{ +protected: + int nqubits_; + int total_bits_; + int chunk_bits_; +public: + DensityDiagMatMultNxN(const reg_t &qb,int total,int chunk) + { + nqubits_ = qb.size(); + total_bits_ = total; + chunk_bits_ = chunk; + } + + bool is_diagonal(void) + { + return true; + } + + __host__ __device__ void operator()(const uint_t &i) const + { + uint_t j,imr,imc; + thrust::complex* vec; + thrust::complex q; + thrust::complex* pMat; + uint_t* qubits; + uint_t irow,icol,gid,local_mask; + uint_t irow_chunk,icol_chunk; + + vec = this->data_; + gid = this->base_index_; + + irow_chunk = ((gid + i) >> (chunk_bits_*2)) >> (total_bits_ - chunk_bits_); + icol_chunk = ((gid + i) >> (chunk_bits_*2)) & ((1ull << (total_bits_ - chunk_bits_)-1)); + + local_mask = (1ull << (chunk_bits_*2)) - 1; + irow = (i & local_mask) >> chunk_bits_; + icol = (i & local_mask) & ((1ull << chunk_bits_)-1); + + irow += (irow_chunk << chunk_bits_); + icol += (icol_chunk << chunk_bits_); + + pMat = this->matrix_; + qubits = this->params_; + + imr = 0; + imc = 0; + for(j=0;j> qubits[j]) & 1) != 0){ + imr += (1 << j); + } + if(((icol >> qubits[j]) & 1) != 0){ + imc += (1 << j); + } + } + + q = vec[i]; + vec[i] = thrust::conj(pMat[imr])*pMat[imc]*q; + } + const char* name(void) + { + return "DensityDiagMatMultNxN"; + } +}; + + template void DensityMatrixThrust::apply_unitary_matrix(const reg_t &qubits, const cvector_t &mat) @@ -340,6 +407,11 @@ template void DensityMatrixThrust::apply_diagonal_unitary_matrix(const reg_t &qubits, const cvector_t &diag) { + BaseVector::chunk_->StoreMatrix(diag); + BaseVector::chunk_->StoreUintParams(qubits); + BaseVector::apply_function(DensityDiagMatMultNxN(qubits, BaseVector::chunk_manager_.num_qubits()/2, num_qubits())); + + /* if(qubits.size() == 1){ const reg_t qubits_sp = {{qubits[0], qubits[0] + num_qubits()}}; BaseVector::apply_function(DensityDiagMatMult2x2(diag,qubits[0], num_qubits())); @@ -348,6 +420,7 @@ void DensityMatrixThrust::apply_diagonal_unitary_matrix(const reg_t &qub // Apply as single 2N-qubit matrix mult. apply_diagonal_superop_matrix(qubits, AER::Utils::tensor_product(AER::Utils::conjugate(diag), diag)); } + */ } //----------------------------------------------------------------------- @@ -439,6 +512,62 @@ void DensityMatrixThrust::apply_cnot(const uint_t qctrl, const uint_t qt #endif } +template +class DensityPhase : public GateFuncBase +{ +protected: + thrust::complex phase_; + int qubit_; + int total_bits_; + int chunk_bits_; +public: + DensityPhase(int qubit,thrust::complex* phase,int total,int chunk) + { + qubit_ = qubit; + phase_ = *phase; + total_bits_ = total; + chunk_bits_ = chunk; + } + + bool is_diagonal(void) + { + return true; + } + + __host__ __device__ void operator()(const uint_t &i) const + { + thrust::complex* vec; + thrust::complex q; + uint_t irow,icol,gid,local_mask; + uint_t irow_chunk,icol_chunk; + + vec = this->data_; + gid = this->base_index_; + + irow_chunk = ((gid + i) >> (chunk_bits_*2)) >> (total_bits_ - chunk_bits_); + icol_chunk = ((gid + i) >> (chunk_bits_*2)) & ((1ull << (total_bits_ - chunk_bits_)-1)); + + local_mask = (1ull << (chunk_bits_*2)) - 1; + irow = (i & local_mask) >> chunk_bits_; + icol = (i & local_mask) & ((1ull << chunk_bits_)-1); + + irow += (irow_chunk << chunk_bits_); + icol += (icol_chunk << chunk_bits_); + + q = vec[i]; + if((icol >> qubit_) & 1) + q = phase_*q; + if((irow >> qubit_) & 1) + q = thrust::conj(phase_)*q; + vec[i] = q; + } + const char* name(void) + { + return "DensityPhase"; + } +}; + + /* template class DensityPhase : public GateFuncBase { @@ -454,10 +583,11 @@ class DensityPhase : public GateFuncBase phase_ = *phase; } - int qubits_count(void) + bool is_diagonal(void) { - return 2; + return true; } + __host__ __device__ void operator()(const uint_t &i) const { uint_t i0,i1,i2; @@ -490,11 +620,13 @@ class DensityPhase : public GateFuncBase return "DensityPhase"; } }; + */ template void DensityMatrixThrust::apply_phase(const uint_t q,const complex_t &phase) { - BaseVector::apply_function(DensityPhase(q, num_qubits(), (thrust::complex*)&phase )); + BaseVector::apply_function(DensityPhase(q, (thrust::complex*)&phase, BaseVector::chunk_manager_.num_qubits()/2, num_qubits() )); +// BaseVector::apply_function(DensityPhase(q, num_qubits(), (thrust::complex*)&phase )); #ifdef AER_DEBUG BaseVector::DebugMsg(" density::apply_phase"); diff --git a/src/simulators/state_chunk.hpp b/src/simulators/state_chunk.hpp index fc4f7924f3..f49f68256d 100644 --- a/src/simulators/state_chunk.hpp +++ b/src/simulators/state_chunk.hpp @@ -188,6 +188,15 @@ class StateChunk { const std::string &memory_hex, const std::string ®ister_hex); + //----------------------------------------------------------------------- + // Initialization + //----------------------------------------------------------------------- + template + void initialize_from_vector(const list_t &vec); + + template + void initialize_from_matrix(const list_t &mat); + //----------------------------------------------------------------------- // Save result data //----------------------------------------------------------------------- @@ -355,6 +364,12 @@ class StateChunk { // block diagonal matrix in chunk void block_diagonal_matrix(const int_t iChunk, reg_t &qubits, cvector_t &diag); + void qubits_inout(const reg_t& qubits, reg_t& qubits_in,reg_t& qubits_out) const; + + auto apply_to_matrix(bool copy = false); + + virtual bool is_applied_to_each_chunk(const Operations::Op &op); + // Set a global phase exp(1j * theta) for the state bool has_global_phase_ = false; complex_t global_phase_ = 1; @@ -520,6 +535,16 @@ void StateChunk::set_config(const json_t &config) JSON::get_value(block_bits_, "blocking_qubits", config); } +template +bool StateChunk::is_applied_to_each_chunk(const Operations::Op &op) +{ + if(op.type == Operations::OpType::gate || op.type == Operations::OpType::matrix || + op.type == Operations::OpType::diagonal_matrix || op.type == Operations::OpType::multiplexer || + op.type == Operations::OpType::superop){ + return true; + } + return false; +} template void StateChunk::apply_ops(const std::vector &ops, @@ -566,9 +591,7 @@ void StateChunk::apply_ops(const std::vector &ops, iOp = iOpEnd; } - else if(ops[iOp].type == Operations::OpType::gate || ops[iOp].type == Operations::OpType::matrix || - ops[iOp].type == Operations::OpType::diagonal_matrix || ops[iOp].type == Operations::OpType::multiplexer) - { + else if(is_applied_to_each_chunk(ops[iOp])){ #pragma omp parallel for if(chunk_omp_parallel_) private(iChunk) for(iChunk=0;iChunk::block_diagonal_matrix(const int_t iChunk, reg_t &qubit } } +template +void StateChunk::qubits_inout(const reg_t& qubits, reg_t& qubits_in,reg_t& qubits_out) const +{ + int_t i; + qubits_in.clear(); + qubits_out.clear(); + for(i=0;i std::vector StateChunk::sample_measure(const reg_t &qubits, uint_t shots, @@ -646,6 +686,118 @@ void StateChunk::initialize_creg(uint_t num_memory, creg_.initialize(num_memory, num_register, memory_hex, register_hex); } +template +template +void StateChunk::initialize_from_vector(const list_t &vec) +{ + int_t iChunk; + if(chunk_bits_ == num_qubits_){ + for(iChunk=0;iChunk +template +void StateChunk::initialize_from_matrix(const list_t &mat) +{ + int_t iChunk; + if(chunk_bits_ == num_qubits_){ + for(iChunk=0;iChunk> ((num_qubits_ - chunk_bits_))) << (chunk_bits_); + uint_t icol_chunk = ((iChunk + global_chunk_index_) & ((1ull << ((num_qubits_ - chunk_bits_)))-1)) << (chunk_bits_); + + //copy part of state for this chunk + uint_t i,row,col; + for(i=0;i<(1ull << (chunk_bits_*qubit_scale()));i++){ + uint_t icol = i & ((1ull << chunk_bits_)-1); + uint_t irow = i >> chunk_bits_; + tmp[i] = mat[icol_chunk + icol + ((irow_chunk + irow) << num_qubits_)]; + } + qregs_[iChunk].initialize_from_matrix(tmp); + } + } +} + +template +auto StateChunk::apply_to_matrix(bool copy) +{ + int_t iChunk; + uint_t size = 1ull << (chunk_bits_*qubit_scale()); + uint_t mask = (1ull << (chunk_bits_)) - 1; + uint_t num_threads = qregs_[0].get_omp_threads(); + + auto matrix = qregs_[0].copy_to_matrix(); + + if(distributed_rank_ == 0){ + //TO DO check memory availability + matrix.resize(1ull << (num_qubits_),1ull << (num_qubits_)); + + auto tmp = qregs_[0].copy_to_matrix(); + for(iChunk=0;iChunk> ((num_qubits_ - chunk_bits_))) << chunk_bits_; + uint_t icol_chunk = (iChunk & ((1ull << ((num_qubits_ - chunk_bits_)))-1)) << chunk_bits_; + + if(iChunk < num_local_chunks_){ + if(copy) + tmp = qregs_[iChunk].copy_to_matrix(); + else + tmp = qregs_[iChunk].move_to_matrix(); + } +#ifdef AER_MPI + else + recv_data(tmp.data(),size,0,iChunk); +#endif +#pragma omp parallel for if(num_threads > 1) num_threads(num_threads) + for(i=0;i> (chunk_bits_); + uint_t icol = i & mask; + uint_t idx = ((irow+irow_chunk) << (num_qubits_)) + icol_chunk + icol; + matrix[idx] = tmp[i]; + } + } + } + else{ +#ifdef AER_MPI + //send matrices to process 0 + for(iChunk=0;iChunk void StateChunk::save_creg(ExperimentResult &result, const std::string &key, @@ -1194,6 +1346,8 @@ void StateChunk::send_chunk(uint_t local_chunk_index, uint_t global_pai MPI_Isend(pSend,sizeSend,MPI_BYTE,iProc,local_chunk_index + global_chunk_index_,distributed_comm_,&reqSend); MPI_Wait(&reqSend,&st); + + qregs_[local_chunk_index].release_send_buffer(); #endif } diff --git a/src/simulators/statevector/chunk/chunk_container.hpp b/src/simulators/statevector/chunk/chunk_container.hpp index b024313ad2..6c4c27a479 100644 --- a/src/simulators/statevector/chunk/chunk_container.hpp +++ b/src/simulators/statevector/chunk/chunk_container.hpp @@ -487,6 +487,7 @@ class ChunkContainer : public std::enable_shared_from_this @@ -709,14 +710,48 @@ void ChunkContainer::allocate_chunks(void) buffers_.resize(num_buffers_); checkpoints_.resize(num_checkpoint_); - for(i=0;i>(this->shared_from_this(),i); + if(num_chunks_ > 0){ + chunks_.resize(num_chunks_); + for(i=0;i>(this->shared_from_this(),i); + } + } + if(num_buffers_ > 0){ + buffers_.resize(num_buffers_); + for(i=0;i>(this->shared_from_this(),num_chunks_+i); + } } - for(i=0;i>(this->shared_from_this(),num_chunks_+i); + if(num_checkpoint_ > 0){ + checkpoints_.resize(num_checkpoint_); + for(i=0;i>(this->shared_from_this(),num_chunks_+num_buffers_+i); + } } - for(i=0;i>(this->shared_from_this(),num_chunks_+num_buffers_+i); +} + +template +void ChunkContainer::deallocate_chunks(void) +{ + uint_t i; + + if(num_chunks_ > 0){ + for(i=0;i 0){ + for(i=0;i 0){ + for(i=0;i::ChunkManager() #endif - chunks_.resize(num_places_*2 + 1); + chunks_.resize(num_places_*2 + 1,nullptr); iplace_host_ = num_places_ ; @@ -173,7 +173,6 @@ uint_t ChunkManager::Allocate(int chunk_bits,int nqubits,uint_t nchunks) char* str; bool multi_gpu = false; bool hybrid = false; - uint_t num_checkpoint,total_checkpoint = 0; bool multi_shot = false; //--- for test @@ -253,25 +252,13 @@ uint_t ChunkManager::Allocate(int chunk_bits,int nqubits,uint_t nchunks) nc /= 2; } - num_checkpoint = nc; chunks_[iDev] = std::make_shared>(); - -#ifdef AER_THRUST_CUDA - size_t freeMem,totalMem; - cudaSetDevice(iDev); - cudaMemGetInfo(&freeMem,&totalMem); - if(freeMem <= ( ((uint_t)sizeof(thrust::complex) * (nc + num_buffers + num_checkpoint)) << chunk_bits_)){ - num_checkpoint = 0; - } -#endif - - total_checkpoint += num_checkpoint; - num_chunks_ += chunks_[iDev]->Allocate(iDev,chunk_bits,nc,num_buffers,num_checkpoint); + num_chunks_ += chunks_[iDev]->Allocate(iDev,chunk_bits,nc,num_buffers); } if(num_chunks_ < nchunks){ //rest of chunks are stored on host chunks_[num_places_] = std::make_shared>(); - chunks_[num_places_]->Allocate(-1,chunk_bits,nchunks-num_chunks_,AER_MAX_BUFFERS); + chunks_[num_places_]->Allocate(-1,chunk_bits,nchunks-num_chunks_,num_buffers); num_places_ += 1; num_chunks_ = nchunks; } @@ -279,7 +266,11 @@ uint_t ChunkManager::Allocate(int chunk_bits,int nqubits,uint_t nchunks) //additional host buffer iplace_host_ = num_places_; chunks_[iplace_host_] = std::make_shared>(); +#ifdef AER_DISABLE_GDR chunks_[iplace_host_]->Allocate(-1,chunk_bits,0,AER_MAX_BUFFERS); +#else + chunks_[iplace_host_]->Allocate(-1,chunk_bits,0,0); +#endif } } @@ -292,9 +283,11 @@ void ChunkManager::Free(void) int i; for(i=0;iDeallocate(); - chunks_[i].reset(); + chunks_[i].reset(); + chunks_[i] = nullptr; + } } chunk_bits_ = 0; diff --git a/src/simulators/statevector/chunk/device_chunk_container.hpp b/src/simulators/statevector/chunk/device_chunk_container.hpp index 8fe2ba9250..4e55457a13 100644 --- a/src/simulators/statevector/chunk/device_chunk_container.hpp +++ b/src/simulators/statevector/chunk/device_chunk_container.hpp @@ -373,6 +373,7 @@ void DeviceChunkContainer::Deallocate(void) } stream_.clear(); #endif + ChunkContainer::deallocate_chunks(); } template diff --git a/src/simulators/statevector/chunk/host_chunk_container.hpp b/src/simulators/statevector/chunk/host_chunk_container.hpp index a6b32d1375..f00dee8195 100644 --- a/src/simulators/statevector/chunk/host_chunk_container.hpp +++ b/src/simulators/statevector/chunk/host_chunk_container.hpp @@ -131,12 +131,16 @@ uint_t HostChunkContainer::Allocate(int idev,int bits,uint_t chunks,uint ChunkContainer::num_buffers_ = buffers; ChunkContainer::num_checkpoint_ = checkpoint; ChunkContainer::num_chunks_ = nc; - data_.resize((nc + buffers + checkpoint) << bits); - matrix_.resize(nc + buffers); - params_.resize(nc + buffers); + if(nc + buffers + checkpoint > 0) + data_.resize((nc + buffers + checkpoint) << bits); + if(nc + buffers > 0){ + matrix_.resize(nc + buffers); + params_.resize(nc + buffers); + } //allocate chunk classes - ChunkContainer::allocate_chunks(); + if(nc + buffers + checkpoint > 0) + ChunkContainer::allocate_chunks(); return nc; } @@ -147,9 +151,12 @@ uint_t HostChunkContainer::Resize(uint_t chunks,uint_t buffers,uint_t ch uint_t i; if(chunks + buffers + checkpoint > this->num_chunks_ + this->num_buffers_ + this->num_checkpoint_){ - data_.resize((chunks + buffers + checkpoint) << this->chunk_bits_); - matrix_.resize(chunks + buffers); - params_.resize(chunks + buffers); + if(chunks + buffers + checkpoint > 0) + data_.resize((chunks + buffers + checkpoint) << this->chunk_bits_); + if(chunks + buffers > 0){ + matrix_.resize(chunks + buffers); + params_.resize(chunks + buffers); + } } this->num_chunks_ = chunks; @@ -157,7 +164,8 @@ uint_t HostChunkContainer::Resize(uint_t chunks,uint_t buffers,uint_t ch this->num_checkpoint_ = checkpoint; //allocate chunk classes - ChunkContainer::allocate_chunks(); + if(chunks + buffers + checkpoint > 0) + ChunkContainer::allocate_chunks(); return chunks + buffers + checkpoint; } @@ -171,6 +179,8 @@ void HostChunkContainer::Deallocate(void) matrix_.shrink_to_fit(); params_.clear(); params_.shrink_to_fit(); + + ChunkContainer::deallocate_chunks(); } diff --git a/src/simulators/statevector/qubitvector.hpp b/src/simulators/statevector/qubitvector.hpp index f04d1d5de9..5451ba7e9a 100755 --- a/src/simulators/statevector/qubitvector.hpp +++ b/src/simulators/statevector/qubitvector.hpp @@ -150,6 +150,8 @@ class QubitVector { //prepare buffer for MPI send/recv std::complex* send_buffer(uint_t& size_in_byte); std::complex* recv_buffer(uint_t& size_in_byte); + void release_send_buffer(void) const; + void release_recv_buffer(void) const; //----------------------------------------------------------------------- // Check point operations @@ -364,7 +366,7 @@ class QubitVector { std::complex* checkpoint_; uint_t chunk_index_; //global chunk index - cvector_t recv_buffer_; //receive buffer for MPI + mutable cvector_t recv_buffer_; //receive buffer for MPI //----------------------------------------------------------------------- // Config settings @@ -897,6 +899,18 @@ std::complex* QubitVector::recv_buffer(uint_t& size_in_byte) return &recv_buffer_[0]; } +template +void QubitVector::release_send_buffer(void) const +{ + +} + +template +void QubitVector::release_recv_buffer(void) const +{ + recv_buffer_.clear(); +} + //------------------------------------------------------------------------------ // Initialization //------------------------------------------------------------------------------ @@ -1226,7 +1240,6 @@ void QubitVector::apply_permutation_matrix(const reg_t& qubits, } // end switch } - /******************************************************************************* * * APPLY OPTIMIZED GATES diff --git a/src/simulators/statevector/qubitvector_thrust.hpp b/src/simulators/statevector/qubitvector_thrust.hpp index c99cb24cc0..a756a5cf25 100644 --- a/src/simulators/statevector/qubitvector_thrust.hpp +++ b/src/simulators/statevector/qubitvector_thrust.hpp @@ -144,6 +144,9 @@ class QubitVectorThrust { thrust::complex* send_buffer(uint_t& size_in_byte); thrust::complex* recv_buffer(uint_t& size_in_byte); + void release_send_buffer(void) const; + void release_recv_buffer(void) const; + //----------------------------------------------------------------------- // Check point operations //----------------------------------------------------------------------- @@ -364,12 +367,13 @@ class QubitVectorThrust { mutable std::shared_ptr> chunk_; mutable std::shared_ptr> buffer_chunk_; std::shared_ptr> checkpoint_; - std::shared_ptr> send_chunk_; - std::shared_ptr> recv_chunk_; + mutable std::shared_ptr> send_chunk_; + mutable std::shared_ptr> recv_chunk_; static ChunkManager chunk_manager_; uint_t chunk_index_; bool multi_chunk_distribution_; + bool multi_shots_; bool register_blocking_; @@ -517,7 +521,10 @@ QubitVectorThrust::QubitVectorThrust(size_t num_qubits) : num_qubits_(0) chunk_ = nullptr; chunk_index_ = 0; multi_chunk_distribution_ = false; + multi_shots_ = false; checkpoint_ = nullptr; + recv_chunk_ = nullptr; + send_chunk_ = nullptr; #ifdef AER_DEBUG debug_count = 0; @@ -788,6 +795,28 @@ void QubitVectorThrust::initialize_component(const reg_t &qubits, const // Utility //------------------------------------------------------------------------------ +template +class ZeroClear : public GateFuncBase +{ +protected: +public: + ZeroClear() {} + bool is_diagonal(void) + { + return true; + } + __host__ __device__ void operator()(const uint_t &i) const + { + thrust::complex* vec; + vec = this->data_; + vec[i] = 0.0; + } + const char* name(void) + { + return "zero"; + } +}; + template void QubitVectorThrust::zero() { @@ -795,8 +824,7 @@ void QubitVectorThrust::zero() DebugMsg("zero"); #endif - chunk_->Zero(); -// chunk_->synchronize(); + apply_function(ZeroClear()); #ifdef AER_DEBUG DebugMsg("zero done"); @@ -818,6 +846,9 @@ void QubitVectorThrust::chunk_setup(int chunk_bits,int num_qubits,uint_t if(chunk_bits < num_qubits){ multi_chunk_distribution_ = true; } + + if(omp_get_num_threads() > 1) + multi_shots_ = true; } template @@ -914,31 +945,14 @@ std::complex QubitVectorThrust::inner_product() const chunk_->set_device(); vec0 = (data_t*)chunk_->pointer(); + vec1 = (data_t*)checkpoint_->pointer(); #ifdef AER_THRUST_CUDA cudaStream_t strm = chunk_->stream(); - if(strm){ - if(chunk_->device() == checkpoint_->device()){ - vec1 = (data_t*)checkpoint_->pointer(); - - dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); - } - else{ - std::shared_ptr> pBuffer = chunk_manager_.MapBufferChunk(chunk_->place()); - pBuffer->CopyIn(checkpoint_); - vec1 = (data_t*)pBuffer->pointer(); - - dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); - chunk_manager_.UnmapBufferChunk(pBuffer); - } - } - else{ - vec1 = (data_t*)checkpoint_->pointer(); - + if(strm) + dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); + else dot = thrust::inner_product(thrust::omp::par,vec0,vec0 + data_size_*2,vec1,0.0); - } #else - vec1 = (data_t*)checkpoint_->pointer(); - if(num_qubits_ > omp_threshold_ && omp_threads_ > 1) dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); else @@ -1056,6 +1070,26 @@ thrust::complex* QubitVectorThrust::recv_buffer(uint_t& size_in_ return recv_chunk_->pointer(); } +template +void QubitVectorThrust::release_send_buffer(void) const +{ +#ifdef AER_DISABLE_GDR + if(send_chunk_){ + chunk_manager_.UnmapBufferChunk(send_chunk_); + send_chunk_ = nullptr; + } +#endif +} + +template +void QubitVectorThrust::release_recv_buffer(void) const +{ + if(recv_chunk_){ + chunk_manager_.UnmapBufferChunk(recv_chunk_); + recv_chunk_ = nullptr; + } +} + //------------------------------------------------------------------------------ // Initialization //------------------------------------------------------------------------------ @@ -1147,13 +1181,14 @@ void QubitVectorThrust::apply_function(Function func) const #endif - func.set_base_index(chunk_index_ << num_qubits_); if(func.batch_enable() && multi_chunk_distribution_ && chunk_->device() >= 0){ if(chunk_->pos() == 0){ //only first chunk on device calculates all the chunks + func.set_base_index(chunk_index_ << num_qubits_); chunk_->Execute(func,chunk_->container()->num_chunks()); } } else{ + func.set_base_index(chunk_index_ << num_qubits_); chunk_->Execute(func,1); } @@ -2700,14 +2735,10 @@ void QubitVectorThrust::apply_chunk_swap(const reg_t &qubits, uint_t rem } } - chunk_manager_.UnmapBufferChunk(recv_chunk_); -// recv_chunk_.reset(); + release_recv_buffer(); #ifdef AER_DISABLE_GDR - if(send_chunk_){ - chunk_manager_.UnmapBufferChunk(send_chunk_); -// send_chunk_.reset(); - } + release_send_buffer(); #endif } @@ -2730,15 +2761,6 @@ class phase_func : public GateFuncBase mask |= (1ull << qubits[i]); } } - int qubits_count(void) - { - return nqubits; - } - int num_control_bits(void) - { - return nqubits - 1; - } - bool is_diagonal(void) { return true; @@ -3711,6 +3733,10 @@ double QubitVectorThrust::expval_pauli(const reg_t &qubits, chunk_manager_.UnmapBufferChunk(buffer); } + if(pair_chunk.data() == this->data()){ + release_recv_buffer(); + } + return ret; } diff --git a/src/simulators/statevector/statevector_state_chunk.hpp b/src/simulators/statevector/statevector_state_chunk.hpp index 673a4e10e4..bfa091b35a 100644 --- a/src/simulators/statevector/statevector_state_chunk.hpp +++ b/src/simulators/statevector/statevector_state_chunk.hpp @@ -44,12 +44,12 @@ const Operations::OpSet StateOpSet( OpType::bfunc, OpType::roerror, OpType::matrix, OpType::diagonal_matrix, OpType::multiplexer, OpType::kraus, - OpType::sim_op, OpType::save_expval, - OpType::save_expval_var, OpType::save_densmat, + OpType::sim_op, OpType::set_statevec, + OpType::save_expval, OpType::save_expval_var, OpType::save_probs, OpType::save_probs_ket, OpType::save_amps, OpType::save_amps_sq, - OpType::save_statevec, OpType::save_state, - OpType::save_statevec_dict + OpType::save_state, OpType::save_statevec, + OpType::save_statevec_dict, OpType::save_densmat }, // Gates {"u1", "u2", "u3", "u", "U", "CX", "cx", "cz", @@ -157,6 +157,8 @@ class State : public Base::StateChunk { // /psi> is given in params void apply_initialize(const reg_t &qubits, const cvector_t ¶ms, RngEngine &rng); + void initialize_from_vector(const cvector_t ¶ms); + // Apply a supported snapshot instruction // If the input is not in allowed_snapshots an exeption will be raised. virtual void apply_snapshot(const Operations::Op &op, ExperimentResult &result, bool last_op = false); @@ -293,6 +295,15 @@ class State : public Base::StateChunk { const double phi, const double lambda); + void apply_gate_mcphase(const int_t iChunk, const reg_t& qubits, const complex_t phase); + + //------------------------------------------------------------------------- + // data access + //------------------------------------------------------------------------- + complex_t get_state(const uint_t idx); + void set_state(const uint_t idx,const complex_t s); + + //----------------------------------------------------------------------- // Config Settings //----------------------------------------------------------------------- @@ -332,10 +343,13 @@ void State::initialize_qreg(uint_t num_qubits) } } else{ //multi-chunk distribution + for(i=0;inum_qubits_ == this->chunk_bits_){ BaseState::qregs_[i].initialize(); } @@ -366,9 +380,12 @@ void State::initialize_qreg(uint_t num_qubits, else{ //multi-chunk distribution uint_t local_offset = BaseState::global_chunk_index_ << BaseState::chunk_bits_; -#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(iChunk) for(iChunk=0;iChunk::initialize_qreg(uint_t num_qubits, initialize_omp(); - if(BaseState::chunk_bits_ == BaseState::num_qubits_){ - for(int_t iChunk=0;iChunk::copy_to_vector() } } +template +complex_t State::get_state(const uint_t idx) +{ + complex_t ret = 0.0; + if(BaseState::num_global_chunks_ == 1){ + return BaseState::qregs_[0].get_state(idx); + } + else{ + uint_t iChunk = idx >> BaseState::chunk_bits_; + if(BaseState::chunk_index_begin_[BaseState::distributed_rank_] <= iChunk && BaseState::chunk_index_end_[BaseState::distributed_rank_] > iChunk){ //on this process + ret = BaseState::qregs_[iChunk - BaseState::global_chunk_index_].get_state(idx - (iChunk << BaseState::chunk_bits_)); + } +#ifdef AER_MPI + BaseState::reduce_sum(ret); +#endif + } + return ret; +} + +template +void State::set_state(const uint_t idx,const complex_t s) +{ + if(BaseState::num_global_chunks_ == 1){ + BaseState::qregs_[0][idx] = s; + } + else{ + uint_t iChunk = idx >> BaseState::chunk_bits_; + if(BaseState::chunk_index_begin_[BaseState::distributed_rank_] <= iChunk && BaseState::chunk_index_end_[BaseState::distributed_rank_] > iChunk){ //on this process + BaseState::qregs_[iChunk - BaseState::global_chunk_index_][idx - (iChunk << BaseState::chunk_bits_)] = s; + } + } +} + //========================================================================= // Implementation: apply operations //========================================================================= @@ -582,6 +618,9 @@ void State::apply_op(const int_t iChunk,const Operations::Op &op, case Operations::OpType::kraus: apply_kraus(op.qubits, op.mats, rng); break; + case OpType::set_statevec: + initialize_from_vector(op.params); + break; case Operations::OpType::save_expval: case Operations::OpType::save_expval_var: BaseState::apply_save_expval(op, result); @@ -872,9 +911,8 @@ void State::apply_snapshot(const Operations::Op &op, result.legacy_data.add_pershot_snapshot("statevector", op.string_params[0], move_to_vector()); } else { - //also using move_to_vector(actually no move) result.legacy_data.add_pershot_snapshot("statevector", op.string_params[0], - move_to_vector()); + copy_to_vector()); } break; case Statevector::Snapshots::cmemory: @@ -1181,7 +1219,7 @@ void State::apply_gate(const uint_t iChunk, const Operations::Op &op break; case Statevector::Gates::mcz: // Includes Z, CZ, CCZ, etc - BaseState::qregs_[iChunk].apply_mcphase(op.qubits, -1); + apply_gate_mcphase(iChunk,op.qubits, -1); break; case Statevector::Gates::mcr: BaseState::qregs_[iChunk].apply_mcu(op.qubits, Linalg::VMatrix::r(op.params[0], op.params[1])); @@ -1246,8 +1284,7 @@ void State::apply_gate(const uint_t iChunk, const Operations::Op &op break; case Statevector::Gates::mcp: // Includes u1, cu1, p, cp, mcp etc - BaseState::qregs_[iChunk].apply_mcphase(op.qubits, - std::exp(complex_t(0, 1) * op.params[0])); + apply_gate_mcphase(iChunk,op.qubits,std::exp(complex_t(0, 1) * op.params[0])); break; case Statevector::Gates::mcsx: // Includes sx, csx, mcsx etc @@ -1327,6 +1364,21 @@ void State::apply_gate_phase(const int_t iChunk, uint_t qubit, compl apply_matrix(iChunk,reg_t({qubit}), diag); } +template +void State::apply_gate_mcphase(const int_t iChunk, const reg_t& qubits, const complex_t phase) +{ + if(BaseState::gpu_optimization_){ + //GPU computes all chunks in one kernel + BaseState::qregs_[iChunk].apply_mcphase(qubits,phase); + } + else{ + cvector_t diag(1ull << qubits.size(),1.0); + diag[diag.size()-1] = phase; + + apply_diagonal_matrix(iChunk,qubits,diag); + } +} + template void State::apply_mcswap(const int_t iChunk,const reg_t &qubits) { @@ -1361,14 +1413,7 @@ rvector_t State::measure_probs(const reg_t &qubits) const reg_t qubits_in_chunk; reg_t qubits_out_chunk; - for(i=0;i::measure_reset_update(const std::vector &qubits, // Update a state vector based on an outcome pair [m, p] from // sample_measure_with_prob function, and a desired post-measurement final_state - int_t i; + int_t i,iChunk; // Single-qubit case if (qubits.size() == 1) { // Diagonal matrix for projecting and renormalizing to measurement outcome cvector_t mdiag(2, 0.); mdiag[meas_state] = 1. / std::sqrt(meas_prob); -#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(i) - for(i=0;i::measure_reset_update(const std::vector &qubits, cvector_t mdiag(dim, 0.); mdiag[meas_state] = 1. / std::sqrt(meas_prob); -#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(i) - for(i=0;i::measure_reset_update(const std::vector &qubits, reg_t qubits_in_chunk; reg_t qubits_out_chunk; - for(i=0;i 0){ //in chunk exchange - const size_t dim_in = 1ULL << qubits_in_chunk.size(); + if(qubits_in_chunk.size() == qubits.size()){ //all bits are inside chunk // build vectorized permutation matrix - cvector_t perm(dim_in * dim_in, 0.); - perm[final_state * dim_in + meas_state] = 1.; - perm[meas_state * dim_in + final_state] = 1.; - for (size_t j=0; j < dim_in; j++) { + cvector_t perm(dim * dim, 0.); + perm[final_state * dim + meas_state] = 1.; + perm[meas_state * dim + final_state] = 1.; + for (size_t j=0; j < dim; j++) { if (j != final_state && j != meas_state) - perm[j * dim_in + j] = 1.; + perm[j * dim + j] = 1.; } // apply permutation to swap state -#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(i) - for(i=0;i 0){ //out of chunk exchange - for(i=0;i> i) & 1) != ((meas_state >> i) & 1)){ + BaseState::apply_chunk_x(qubits[i]); + } } } } @@ -1614,24 +1653,120 @@ void State::apply_initialize(const reg_t &qubits, const cvector_t ¶ms, RngEngine &rng) { - uint_t i; + int_t i,iChunk; + auto sorted_qubits = qubits; + std::sort(sorted_qubits.begin(), sorted_qubits.end()); if (qubits.size() == BaseState::num_qubits_) { // If qubits is all ordered qubits in the statevector // we can just initialize the whole state directly - auto sorted_qubits = qubits; - std::sort(sorted_qubits.begin(), sorted_qubits.end()); if (qubits == sorted_qubits) { - initialize_qreg(qubits.size(), params); + initialize_from_vector(params); return; } } // Apply reset to qubits apply_reset(qubits, rng); - // Apply initialize_component - for(i=0;i 0){ + //scatter inside chunks + const size_t dim = 1ULL << qubits_in_chunk.size(); + cvector_t perm(dim * dim, 0.); + for(i=0;i 0){ + //then scatter outside chunk + auto sorted_qubits_out = qubits_out_chunk; + std::sort(sorted_qubits_out.begin(), sorted_qubits_out.end()); + + for(i=0;i<(1ull << (BaseState::num_qubits_ - BaseState::chunk_bits_ - qubits_out_chunk.size()));i++){ + uint_t baseChunk = 0; + uint_t j,ii,t; + ii = i; + for(j=0;j>= BaseState::chunk_bits_; + + for(j=1;j<(1ull << qubits_out_chunk.size());j++){ + iChunk = baseChunk; + for(t=0;t> t) & 1) + iChunk += (1ull << (qubits_out_chunk[t] - BaseState::chunk_bits_)); + } + + if(iChunk >= BaseState::chunk_index_begin_[BaseState::distributed_rank_] && iChunk < BaseState::chunk_index_end_[BaseState::distributed_rank_]){ //on this process + if(baseChunk >= BaseState::chunk_index_begin_[BaseState::distributed_rank_] && baseChunk < BaseState::chunk_index_end_[BaseState::distributed_rank_]){ //base chunk is on this process + BaseState::qregs_[iChunk].initialize_from_data(BaseState::qregs_[baseChunk].data(),1ull << BaseState::chunk_bits_); + } + else{ + BaseState::recv_chunk(iChunk,baseChunk); + //using swap chunk function to release send/recv buffers for Thrust + reg_t swap(2); + swap[0] = BaseState::chunk_bits_; + swap[1] = BaseState::chunk_bits_; + BaseState::qregs_[iChunk].apply_chunk_swap(swap,baseChunk); + } + } + else if(baseChunk >= BaseState::chunk_index_begin_[BaseState::distributed_rank_] && baseChunk < BaseState::chunk_index_end_[BaseState::distributed_rank_]){ //base chunk is on this process + BaseState::send_chunk(baseChunk - BaseState::global_chunk_index_,iChunk); + } + } + } + } + + //initialize by params +#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(iChunk) + for(iChunk=0;iChunk +void State::initialize_from_vector(const cvector_t ¶ms) +{ + int_t iChunk; + if(BaseState::chunk_bits_ == BaseState::num_qubits_){ + for(iChunk=0;iChunk class State; +} + namespace QubitUnitary { // OpSet of supported instructions @@ -62,6 +67,7 @@ enum class Gates { template > class State : public Base::State { + friend class QubitUnitaryChunk::State; public: using BaseState = Base::State; diff --git a/src/simulators/unitary/unitary_state_chunk.hpp b/src/simulators/unitary/unitary_state_chunk.hpp new file mode 100644 index 0000000000..49be0c8677 --- /dev/null +++ b/src/simulators/unitary/unitary_state_chunk.hpp @@ -0,0 +1,660 @@ +/** + * This code is part of Qiskit. + * + * (C) Copyright IBM 2018, 2019, 2020. + * + * This code is licensed under the Apache License, Version 2.0. You may + * obtain a copy of this license in the LICENSE.txt file in the root directory + * of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. + * + * Any modifications or derivative works of this code must retain this + * copyright notice, and modified files need to carry a notice indicating + * that they have been altered from the originals. + */ + +#ifndef _unitary_state_chunk_hpp +#define _unitary_state_chunk_hpp + +#include +#define _USE_MATH_DEFINES +#include +#include "simulators/state.hpp" +#include "framework/json.hpp" +#include "framework/utils.hpp" +#include "simulators/state_chunk.hpp" +#include "unitarymatrix.hpp" +#ifdef AER_THRUST_SUPPORTED +#include "unitarymatrix_thrust.hpp" +#endif + +namespace AER { +namespace QubitUnitaryChunk { + +// OpSet of supported instructions +const Operations::OpSet StateOpSet( + // Op types + {Operations::OpType::gate, Operations::OpType::barrier, + Operations::OpType::bfunc, Operations::OpType::roerror, + Operations::OpType::matrix, Operations::OpType::diagonal_matrix, + Operations::OpType::snapshot, Operations::OpType::save_unitary, + Operations::OpType::save_state, Operations::OpType::set_unitary}, + // Gates + {"u1", "u2", "u3", "u", "U", "CX", "cx", "cz", + "cy", "cp", "cu1", "cu2", "cu3", "swap", "id", "p", + "x", "y", "z", "h", "s", "sdg", "t", "tdg", + "r", "rx", "ry", "rz", "rxx", "ryy", "rzz", "rzx", + "ccx", "cswap", "mcx", "mcy", "mcz", "mcu1", "mcu2", "mcu3", + "mcswap", "mcphase", "mcr", "mcrx", "mcry", "mcry", "sx", "csx", + "mcsx", "delay", "pauli"}, + // Snapshots + {"unitary"}); + +//========================================================================= +// QubitUnitary State subclass +//========================================================================= + +template > +class State : public Base::StateChunk { +public: + using BaseState = Base::StateChunk; + + State() : BaseState(StateOpSet) {} + virtual ~State() = default; + + //----------------------------------------------------------------------- + // Base class overrides + //----------------------------------------------------------------------- + + // Return the string name of the State class + virtual std::string name() const override { return "unitary"; } + + // Initializes an n-qubit unitary to the identity matrix + virtual void initialize_qreg(uint_t num_qubits) override; + + // Initializes to a specific n-qubit unitary matrix + virtual void initialize_qreg(uint_t num_qubits, + const unitary_matrix_t &unitary) override; + + // Returns the required memory for storing an n-qubit state in megabytes. + // For this state the memory is indepdentent of the number of ops + // and is approximately 16 * 1 << 2 * num_qubits bytes + virtual size_t + required_memory_mb(uint_t num_qubits, + const std::vector &ops) const override; + + // Load the threshold for applying OpenMP parallelization + // if the controller/engine allows threads for it + // Config: {"omp_qubit_threshold": 7} + virtual void set_config(const json_t &config) override; + + //----------------------------------------------------------------------- + // Additional methods + //----------------------------------------------------------------------- + + // Initializes to a specific n-qubit unitary given as a complex matrix + virtual void initialize_qreg(uint_t num_qubits, const cmatrix_t &unitary); + + // Initialize OpenMP settings for the underlying QubitVector class + void initialize_omp(); + + auto move_to_matrix(); + auto copy_to_matrix(); + +protected: + //----------------------------------------------------------------------- + // Apply Instructions + //----------------------------------------------------------------------- + virtual void apply_op(const int_t iChunk,const Operations::Op &op, + ExperimentResult &result, + RngEngine &rng, + bool final_ops = false) override; + + //swap between chunks + virtual void apply_chunk_swap(const reg_t &qubits) override; + + // Applies a Gate operation to the state class. + // This should support all and only the operations defined in + // allowed_operations. + void apply_gate(const uint_t iChunk,const Operations::Op &op); + + // Apply a supported snapshot instruction + // If the input is not in allowed_snapshots an exeption will be raised. + virtual void apply_snapshot(const Operations::Op &op, ExperimentResult &result); + + // Apply a matrix to given qubits (identity on all other qubits) + void apply_matrix(const uint_t iChunk,const reg_t &qubits, const cmatrix_t &mat); + + // Apply a matrix to given qubits (identity on all other qubits) + void apply_matrix(const uint_t iChunk,const reg_t &qubits, const cvector_t &vmat); + + // Apply a diagonal matrix + void apply_diagonal_matrix(const uint_t iChunk,const reg_t &qubits, const cvector_t &diag); + + //----------------------------------------------------------------------- + // 1-Qubit Gates + //----------------------------------------------------------------------- + + // Optimize phase gate with diagonal [1, phase] + void apply_gate_phase(const uint_t iChunk,const uint_t qubit, const complex_t phase); + + void apply_gate_phase(const uint_t iChunk,const reg_t& qubits, const complex_t phase); + + //----------------------------------------------------------------------- + // Multi-controlled u3 + //----------------------------------------------------------------------- + + // Apply N-qubit multi-controlled single qubit waltz gate specified by + // parameters u3(theta, phi, lambda) + // NOTE: if N=1 this is just a regular u3 gate. + void apply_gate_mcu3(const uint_t iChunk,const reg_t &qubits, const double theta, + const double phi, const double lambda); + + //----------------------------------------------------------------------- + // Save data instructions + //----------------------------------------------------------------------- + // Save the unitary matrix for the simulator + void apply_save_unitary(const Operations::Op &op, + ExperimentResult &result, + bool last_op); + + // Helper function for computing expectation value + virtual double expval_pauli(const reg_t &qubits, + const std::string& pauli) override; + + //----------------------------------------------------------------------- + // Config Settings + //----------------------------------------------------------------------- + + // Apply the global phase + void apply_global_phase(); + + // OpenMP qubit threshold + int omp_qubit_threshold_ = 6; + + // Threshold for chopping small values to zero in JSON + double json_chop_threshold_ = 1e-10; + + int qubit_scale() override + { + return 2; + } +}; + + +//============================================================================ +// Implementation: Base class method overrides +//============================================================================ +template +void State::apply_op(const int_t iChunk,const Operations::Op &op, + ExperimentResult &result, + RngEngine &rng, + bool final_ops) +{ + switch (op.type) { + case Operations::OpType::barrier: + break; + case Operations::OpType::bfunc: + BaseState::creg_.apply_bfunc(op); + break; + case Operations::OpType::roerror: + BaseState::creg_.apply_roerror(op, rng); + break; + case Operations::OpType::gate: + // Note conditionals will always fail since no classical registers + if (BaseState::creg_.check_conditional(op)) + apply_gate(iChunk,op); + break; + case Operations::OpType::set_unitary: + BaseState::initialize_from_matrix(op.mats[0]); + break; + case Operations::OpType::save_state: + case Operations::OpType::save_unitary: + apply_save_unitary(op, result, final_ops); + break; + case Operations::OpType::snapshot: + apply_snapshot(op, result); + break; + case Operations::OpType::matrix: + apply_matrix(iChunk,op.qubits, op.mats[0]); + break; + case Operations::OpType::diagonal_matrix: + apply_diagonal_matrix(iChunk,op.qubits, op.params); + break; + default: + throw std::invalid_argument( + "QubitUnitary::State::invalid instruction \'" + op.name + "\'."); + } +} + +//swap between chunks +template +void State::apply_chunk_swap(const reg_t &qubits) +{ + uint_t q0,q1; + q0 = qubits[0]; + q1 = qubits[1]; + + std::swap(BaseState::qubit_map_[q0],BaseState::qubit_map_[q1]); + + if(qubits[0] >= BaseState::chunk_bits_){ + q0 += BaseState::chunk_bits_; + } + if(qubits[1] >= BaseState::chunk_bits_){ + q1 += BaseState::chunk_bits_; + } + reg_t qs0 = {{q0, q1}}; + BaseState::apply_chunk_swap(qs0); +} + +template +size_t State::required_memory_mb( + uint_t num_qubits, const std::vector &ops) const { + // An n-qubit unitary as 2^2n complex doubles + // where each complex double is 16 bytes + (void)ops; // avoid unused variable compiler warning + size_t shift_mb = std::max(0, num_qubits + 4 - 20); + size_t mem_mb = 1ULL << (2 * shift_mb); + return mem_mb; +} + +template +void State::set_config(const json_t &config) { + BaseState::set_config(config); + + // Set OMP threshold for state update functions + JSON::get_value(omp_qubit_threshold_, "unitary_parallel_threshold", config); + + // Set threshold for truncating snapshots + JSON::get_value(json_chop_threshold_, "zero_threshold", config); + uint_t i; + for(i=0;i +void State::initialize_qreg(uint_t num_qubits) +{ + int_t iChunk; + + initialize_omp(); + + if(BaseState::chunk_bits_ == BaseState::num_qubits_){ + for(iChunk=0;iChunk> ((BaseState::num_qubits_ - BaseState::chunk_bits_)); + icol = (BaseState::global_chunk_index_ + iChunk) - (irow << ((BaseState::num_qubits_ - BaseState::chunk_bits_))); + if(irow == icol) + BaseState::qregs_[iChunk].initialize(); + else + BaseState::qregs_[iChunk].zero(); + } + } + + apply_global_phase(); +} + +template +void State::initialize_qreg(uint_t num_qubits, + const unitary_matrix_t &unitary) +{ + // Check dimension of state + if (unitary.num_qubits() != num_qubits) { + throw std::invalid_argument( + "Unitary::State::initialize: initial state does not match qubit " + "number"); + } + initialize_omp(); + + int_t iChunk; + if(BaseState::chunk_bits_ == BaseState::num_qubits_){ + for(iChunk=0;iChunk> ((BaseState::num_qubits_ - BaseState::chunk_bits_))); + uint_t icol_chunk = ((iChunk + BaseState::global_chunk_index_) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)); + + //copy part of state for this chunk + uint_t i,row,col; + cvector_t tmp(1ull << BaseState::chunk_bits_); + for(i=0;i<(1ull << BaseState::chunk_bits_);i++){ + uint_t icol = i >> (BaseState::chunk_bits_); + uint_t irow = i & mask; + uint_t idx = ((icol+(irow_chunk << BaseState::chunk_bits_)) << (BaseState::num_qubits_)) + (icol_chunk << BaseState::chunk_bits_) + irow; + tmp[i] = input[idx]; + } + BaseState::qregs_[iChunk].initialize_from_vector(tmp); + } + } + + apply_global_phase(); +} + +template +void State::initialize_qreg(uint_t num_qubits, + const cmatrix_t &unitary) +{ + // Check dimension of unitary + if (unitary.size() != 1ULL << (2 * num_qubits)) { + throw std::invalid_argument( + "Unitary::State::initialize: initial state does not match qubit " + "number"); + } + initialize_omp(); + + int_t iChunk; + if(BaseState::chunk_bits_ == BaseState::num_qubits_){ + for(iChunk=0;iChunk> ((BaseState::num_qubits_ - BaseState::chunk_bits_))); + uint_t icol_chunk = ((iChunk + BaseState::global_chunk_index_) & ((1ull << ((BaseState::num_qubits_ - BaseState::chunk_bits_)))-1)); + + //copy part of state for this chunk + uint_t i,row,col; + cvector_t tmp(1ull << BaseState::chunk_bits_); + for(i=0;i<(1ull << BaseState::chunk_bits_);i++){ + uint_t icol = i >> (BaseState::chunk_bits_); + uint_t irow = i & mask; + uint_t idx = ((icol+(irow_chunk << BaseState::chunk_bits_)) << (BaseState::num_qubits_)) + (icol_chunk << BaseState::chunk_bits_) + irow; + tmp[i] = unitary[idx]; + } + BaseState::qregs_[iChunk].initialize_from_vector(tmp); + } + } + apply_global_phase(); +} + +template +void State::initialize_omp() +{ + uint_t i; + for(i=0;i 0) + BaseState::qregs_[i].set_omp_threads(BaseState::threads_); // set allowed OMP threads in qubitvector + } +} + +template +auto State::move_to_matrix() +{ + if(BaseState::num_global_chunks_ == 1){ + return BaseState::qregs_[0].move_to_matrix(); + } + return BaseState::apply_to_matrix(false); +} + +template +auto State::copy_to_matrix() +{ + if(BaseState::num_global_chunks_ == 1){ + return BaseState::qregs_[0].copy_to_matrix(); + } + return BaseState::apply_to_matrix(true); +} + +//========================================================================= +// Implementation: Gates +//========================================================================= + +template +void State::apply_gate(const uint_t iChunk,const Operations::Op &op) { + // Look for gate name in gateset + auto it = QubitUnitary::State::gateset_.find(op.name); + if (it == QubitUnitary::State::gateset_.end()) + throw std::invalid_argument("Unitary::State::invalid gate instruction \'" + + op.name + "\'."); + QubitUnitary::Gates g = it->second; + switch (g) { + case QubitUnitary::Gates::mcx: + // Includes X, CX, CCX, etc + BaseState::qregs_[iChunk].apply_mcx(op.qubits); + break; + case QubitUnitary::Gates::mcy: + // Includes Y, CY, CCY, etc + BaseState::qregs_[iChunk].apply_mcy(op.qubits); + break; + case QubitUnitary::Gates::mcz: + // Includes Z, CZ, CCZ, etc + apply_gate_phase(iChunk,op.qubits, -1); + break; + case QubitUnitary::Gates::mcr: + BaseState::qregs_[iChunk].apply_mcu(op.qubits, Linalg::VMatrix::r(op.params[0], op.params[1])); + break; + case QubitUnitary::Gates::mcrx: + BaseState::qregs_[iChunk].apply_mcu(op.qubits, Linalg::VMatrix::rx(op.params[0])); + break; + case QubitUnitary::Gates::mcry: + BaseState::qregs_[iChunk].apply_mcu(op.qubits, Linalg::VMatrix::ry(op.params[0])); + break; + case QubitUnitary::Gates::mcrz: + BaseState::qregs_[iChunk].apply_mcu(op.qubits, Linalg::VMatrix::rz(op.params[0])); + break; + case QubitUnitary::Gates::rxx: + BaseState::qregs_[iChunk].apply_matrix(op.qubits, Linalg::VMatrix::rxx(op.params[0])); + break; + case QubitUnitary::Gates::ryy: + BaseState::qregs_[iChunk].apply_matrix(op.qubits, Linalg::VMatrix::ryy(op.params[0])); + break; + case QubitUnitary::Gates::rzz: + apply_diagonal_matrix(iChunk,op.qubits, Linalg::VMatrix::rzz_diag(op.params[0])); + break; + case QubitUnitary::Gates::rzx: + BaseState::qregs_[iChunk].apply_matrix(op.qubits, Linalg::VMatrix::rzx(op.params[0])); + break; + case QubitUnitary::Gates::id: + break; + case QubitUnitary::Gates::h: + apply_gate_mcu3(iChunk,op.qubits, M_PI / 2., 0., M_PI); + break; + case QubitUnitary::Gates::s: + apply_gate_phase(iChunk,op.qubits[0], complex_t(0., 1.)); + break; + case QubitUnitary::Gates::sdg: + apply_gate_phase(iChunk,op.qubits[0], complex_t(0., -1.)); + break; + case QubitUnitary::Gates::pauli: + BaseState::qregs_[iChunk].apply_pauli(op.qubits, op.string_params[0]); + break; + case QubitUnitary::Gates::t: { + const double isqrt2{1. / std::sqrt(2)}; + apply_gate_phase(iChunk,op.qubits[0], complex_t(isqrt2, isqrt2)); + } break; + case QubitUnitary::Gates::tdg: { + const double isqrt2{1. / std::sqrt(2)}; + apply_gate_phase(iChunk,op.qubits[0], complex_t(isqrt2, -isqrt2)); + } break; + case QubitUnitary::Gates::mcswap: + // Includes SWAP, CSWAP, etc + BaseState::qregs_[iChunk].apply_mcswap(op.qubits); + break; + case QubitUnitary::Gates::mcu3: + // Includes u3, cu3, etc + apply_gate_mcu3(iChunk,op.qubits, std::real(op.params[0]), std::real(op.params[1]), + std::real(op.params[2])); + break; + case QubitUnitary::Gates::mcu2: + // Includes u2, cu2, etc + apply_gate_mcu3(iChunk,op.qubits, M_PI / 2., std::real(op.params[0]), + std::real(op.params[1])); + break; + case QubitUnitary::Gates::mcp: + // Includes u1, cu1, p, cp, mcp, etc + apply_gate_phase(iChunk,op.qubits, std::exp(complex_t(0, 1) * op.params[0])); + break; + case QubitUnitary::Gates::mcsx: + // Includes sx, csx, mcsx etc + BaseState::qregs_[iChunk].apply_mcu(op.qubits, Linalg::VMatrix::SX); + break; + default: + // We shouldn't reach here unless there is a bug in gateset + throw std::invalid_argument("Unitary::State::invalid gate instruction \'" + + op.name + "\'."); + } +} + +template +void State::apply_matrix(const uint_t iChunk,const reg_t &qubits, + const cmatrix_t &mat) { + if (qubits.empty() == false && mat.size() > 0) { + apply_matrix(iChunk,qubits, Utils::vectorize_matrix(mat)); + } +} + +template +void State::apply_matrix(const uint_t iChunk,const reg_t &qubits, + const cvector_t &vmat) { + // Check if diagonal matrix + if (vmat.size() == 1ULL << qubits.size()) { + apply_diagonal_matrix(iChunk,qubits, vmat); + } else { + BaseState::qregs_[iChunk].apply_matrix(qubits, vmat); + } +} + +template +void State::apply_diagonal_matrix(const uint_t iChunk, const reg_t &qubits, const cvector_t &diag) +{ + if(BaseState::gpu_optimization_){ + //GPU computes all chunks in one kernel, so pass qubits and diagonal matrix as is + reg_t qubits_chunk = qubits; + for(uint_t i;i= BaseState::chunk_bits_){ + qubits_chunk[i] += BaseState::chunk_bits_; + } + } + + BaseState::qregs_[iChunk].apply_diagonal_matrix(qubits_chunk,diag); + } + else{ + reg_t qubits_in = qubits; + cvector_t diag_in = diag; + + BaseState::block_diagonal_matrix(iChunk,qubits_in,diag_in); + BaseState::qregs_[iChunk].apply_diagonal_matrix(qubits_in,diag_in); + } +} + +template +void State::apply_gate_phase(const uint_t iChunk,const uint_t qubit, complex_t phase) { + cvector_t diag(2); + diag[0] = 1.0; + diag[1] = phase; + apply_diagonal_matrix(iChunk,reg_t({qubit}), diag); +} + +template +void State::apply_gate_phase(const uint_t iChunk,const reg_t& qubits, complex_t phase) +{ + cvector_t diag((1 << qubits.size()),1.0); + diag[(1 << qubits.size()) - 1] = phase; + apply_diagonal_matrix(iChunk,qubits, diag); +} + +template +void State::apply_gate_mcu3(const uint_t iChunk,const reg_t &qubits, double theta, + double phi, double lambda) { + const auto u3 = Linalg::Matrix::u3(theta, phi, lambda); + BaseState::qregs_[iChunk].apply_mcu(qubits, Utils::vectorize_matrix(u3)); +} + +template +void State::apply_snapshot(const Operations::Op &op, + ExperimentResult &result) { + // Look for snapshot type in snapshotset + if (op.name == "unitary" || op.name == "state") { + auto matrix = copy_to_matrix(); + if(BaseState::distributed_rank_ == 0){ + result.legacy_data.add_pershot_snapshot("unitary", op.string_params[0], + matrix); + } + } else { + throw std::invalid_argument( + "Unitary::State::invalid snapshot instruction \'" + op.name + "\'."); + } +} + +template +void State::apply_global_phase() { + if (BaseState::has_global_phase_) { + int_t i; +#pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(i) + for(i=0;i +void State::apply_save_unitary(const Operations::Op &op, + ExperimentResult &result, + bool last_op) +{ + if (op.qubits.size() != BaseState::num_qubits_) { + throw std::invalid_argument( + op.name + " was not applied to all qubits." + " Only the full unitary can be saved."); + } + std::string key = (op.string_params[0] == "_method_") ? "unitary" : op.string_params[0]; + + if (last_op) { + BaseState::save_data_pershot(result, key, + move_to_matrix(), + op.save_type); + } else { + BaseState::save_data_pershot(result, key, + copy_to_matrix(), + op.save_type); + } +} + +template +double State::expval_pauli(const reg_t &qubits, + const std::string& pauli) { + throw std::runtime_error("Unitary simulator does not support Pauli expectation values."); +} + +//------------------------------------------------------------------------------ +} // namespace QubitUnitary +} // end namespace AER +//------------------------------------------------------------------------------ +#endif diff --git a/src/simulators/unitary/unitarymatrix_thrust.hpp b/src/simulators/unitary/unitarymatrix_thrust.hpp index 2dcceea8da..300f5e9e6f 100755 --- a/src/simulators/unitary/unitarymatrix_thrust.hpp +++ b/src/simulators/unitary/unitarymatrix_thrust.hpp @@ -208,10 +208,7 @@ matrix> UnitaryMatrixThrust::copy_to_matrix() const uint_t irow, icol; #pragma omp parallel for private(i,irow,icol) if (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && BaseVector::omp_threads_ > 1) num_threads(BaseVector::omp_threads_) for (i = 0; i < csize; i++) { - irow = (i >> num_qubits_); - icol = i - (irow << num_qubits_); - - ret(icol, irow) = qreg[i]; + ret[i] = qreg[i]; } return ret; } @@ -229,17 +226,13 @@ void UnitaryMatrixThrust::initialize() BaseVector::zero(); // Set to be identity matrix - uint_t is,ie,idx; + uint_t idx; int_t i; - is = BaseVector::chunk_index_ << BaseVector::num_qubits_; - ie = is + (1ull << BaseVector::num_qubits_); #pragma omp parallel private(idx) if (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && BaseVector::omp_threads_ > 1) num_threads(BaseVector::omp_threads_) for(i=0;i= is && idx < ie){ - BaseVector::set_state(idx-is,one); - } + BaseVector::set_state(idx,one); } } @@ -262,15 +255,15 @@ void UnitaryMatrixThrust::initialize_from_matrix(const matrix tmp(BaseVector::data_size_); - int_t i; + matrix> tmp(nrows,nrows); #pragma omp parallel for if (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && BaseVector::omp_threads_ > 1) num_threads(BaseVector::omp_threads_) for (int_t row = 0; row < nrows; ++row) for (int_t col = 0; col < nrows; ++col) { - tmp[row + nrows * col] = mat(row, col); + tmp(row,col) = mat(row, col); } - BaseVector::initialize_from_vector(tmp); + + BaseVector::initialize_from_data(tmp.data(), tmp.size()); } template diff --git a/src/transpile/cacheblocking.hpp b/src/transpile/cacheblocking.hpp index 035f970c30..db53160e21 100644 --- a/src/transpile/cacheblocking.hpp +++ b/src/transpile/cacheblocking.hpp @@ -69,6 +69,11 @@ class CacheBlocking : public CircuitOptimization { sample_measure_ = enabled; } + void set_save_state(bool enabled) + { + save_state_ = enabled; + } + //setting blocking parameters automatically void set_blocking(int bits, size_t min_memory, uint_t n_place, const size_t complex_size, bool is_matrix = false); @@ -79,7 +84,9 @@ class CacheBlocking : public CircuitOptimization { mutable reg_t qubitSwapped_; mutable bool blocking_enabled_; mutable bool sample_measure_ = false; + mutable bool save_state_ = false; int gpu_blocking_bits_; + bool density_matrix_ = false; bool block_circuit(Circuit& circ,bool doSwap) const; @@ -94,7 +101,7 @@ class CacheBlocking : public CircuitOptimization { bool is_diagonal_op(Operations::Op& op) const; void insert_swap(std::vector& ops,uint_t bit0,uint_t bit1,bool chunk) const; - void insert_sim_op(std::vector& ops,const char* name,const reg_t& qubits) const; + void insert_sim_op(std::vector& ops,char* name,const reg_t& qubits) const; void insert_pauli(std::vector& ops,reg_t& qubits,std::string& pauli) const; void define_blocked_qubits(std::vector& ops,reg_t& blockedQubits,bool crossQubitOnly) const; @@ -104,6 +111,8 @@ class CacheBlocking : public CircuitOptimization { bool split_pauli(const Operations::Op& op, const reg_t blockedQubits, std::vector& out,std::vector& queue) const; + bool split_op(const Operations::Op& op,const reg_t blockedQubits,std::vector& out,std::vector& queue) const; + bool is_blockable_operation(Operations::Op& op) const; }; @@ -123,6 +132,14 @@ void CacheBlocking::set_config(const json_t &config) gpu_blocking_bits_ = 10; } } + + std::string method; + if (JSON::get_value(method, "method", config)) { + if(method.find("density_matrix") != std::string::npos){ + density_matrix_ = true; + } + } + } @@ -130,6 +147,7 @@ void CacheBlocking::set_blocking(int bits, size_t min_memory, uint_t n_place, si { int chunk_bits = bits; uint_t scale = is_matrix ? 2 : 1; + size_t size; //get largest possible chunk bits while((complex_size << (scale*chunk_bits)) > min_memory){ @@ -172,7 +190,7 @@ void CacheBlocking::insert_swap(std::vector& ops,uint_t bit0,uin ops.push_back(sgate); } -void CacheBlocking::insert_sim_op(std::vector& ops,const char* name,const reg_t& qubits) const +void CacheBlocking::insert_sim_op(std::vector& ops,char* name,const reg_t& qubits) const { Operations::Op op; op.type = Operations::OpType::sim_op; @@ -223,7 +241,7 @@ void CacheBlocking::optimize_circuit(Circuit& circ, void CacheBlocking::define_blocked_qubits(std::vector& ops,reg_t& blockedQubits,bool crossQubitOnly) const { uint_t i,j,iq; - int nq; + int nq,nb; bool exist; for(i=0;i= block_bits_) @@ -290,7 +308,10 @@ bool CacheBlocking::can_reorder(Operations::Op& op,std::vector& //only gate and matrix can be reordered if(op.type != Operations::OpType::gate && op.type != Operations::OpType::matrix){ - return false; + //except for reset for density matrix + if(!density_matrix_ || op.type != Operations::OpType::reset){ + return false; + } } for(j=0;j& bool CacheBlocking::block_circuit(Circuit& circ,bool doSwap) const { - uint_t n; + uint_t i,n; std::vector out; std::vector queue; std::vector queue_next; n = add_ops(circ.ops,out,queue,doSwap,true); -// put_nongate_ops(out,queue_next,queue,doSwap); -// queue.clear(); while(queue.size() > 0){ n = add_ops(queue,out,queue_next,doSwap,false); queue = queue_next; queue_next.clear(); -// put_nongate_ops(out,queue_next,queue,doSwap); if(n == 0){ break; } -// queue.clear(); } if(queue.size() > 0) return false; -// restore_qubits_order(out); + if(save_state_) + restore_qubits_order(out); circ.ops = out; return true; } -void CacheBlocking::put_nongate_ops(std::vector& out,std::vector& queue,std::vector& input,bool doSwap) const -{ - uint_t i; - for(i=0;i& ops) const { uint_t i,j,t; @@ -400,6 +395,16 @@ void CacheBlocking::restore_qubits_order(std::vector& ops) const if(qubitMap_[i] != i){ j = qubitMap_[qubitMap_[i]]; if(j != i && j < block_bits_){ + if(nInBlock == 0){ + uint_t last = ops.size() - 1; + if(ops[last].type == Operations::OpType::sim_op && ops[last].name == "end_blocking"){ + ops.pop_back(); + nInBlock = 1; + } + else{ + insert_sim_op(ops,"begin_blocking",qubitMap_); + } + } insert_swap(ops,i,j,false); qubitMap_[qubitSwapped_[i]] = j; @@ -440,9 +445,14 @@ void CacheBlocking::restore_qubits_order(std::vector& ops) const bool CacheBlocking::is_blockable_operation(Operations::Op& op) const { if(op.type == Operations::OpType::gate || op.type == Operations::OpType::matrix || - op.type == Operations::OpType::diagonal_matrix || op.type == Operations::OpType::multiplexer){ + op.type == Operations::OpType::diagonal_matrix || op.type == Operations::OpType::multiplexer || + op.type == Operations::OpType::superop){ + return true; + } + if(density_matrix_ && op.type == Operations::OpType::reset){ return true; } + return false; } @@ -450,6 +460,7 @@ uint_t CacheBlocking::add_ops(std::vector& ops,std::vector& ops,std::vector 0){ //swap gate is not required for initial state + if(!first){ //swap gate is not required for initial state insert_swap(out,swap[i],qubitMap_[blockedQubits[i]],true); } @@ -545,19 +556,21 @@ uint_t CacheBlocking::add_ops(std::vector& ops,std::vector 0 && !end_block_inserted){ //insert end of block to synchronize chunks - insert_sim_op(out,"end_blocking",blockedQubits); - } - else if(!end_block_inserted){ - out.pop_back(); - } + bool restore_qubits = false; if(ops[i].type == Operations::OpType::kraus){ - if(ops[i].qubits.size() >= block_bits_){ - throw std::runtime_error("CacheBlocking : kraus number of qubits should be smaller than chunk qubit size"); + if(ops[i].qubits.size() > block_bits_){ + throw std::runtime_error("CacheBlocking : Kraus operator, number of qubits should be smaller than chunk qubit size"); break; } if(!can_block(ops[i],blockedQubits)){ //if some qubits are out of chunk, queued for next step @@ -565,15 +578,37 @@ uint_t CacheBlocking::add_ops(std::vector& ops,std::vector 0 && !end_block_inserted){ //insert end of block to synchronize chunks + insert_sim_op(out,"end_blocking",blockedQubits); + } + else if(!end_block_inserted){ + out.pop_back(); } + + if(restore_qubits) + restore_qubits_order(out); + //mapping swapped qubits for(iq=0;iq 1) return true; } - else if(op.type == Operations::OpType::matrix){ //fusion + else if(op.type == Operations::OpType::matrix || op.type == Operations::OpType::multiplexer || op.type == Operations::OpType::superop){ if(op.qubits.size() > 1) return true; } @@ -792,6 +827,51 @@ bool CacheBlocking::split_pauli(const Operations::Op& op,const reg_t blockedQubi return false; } +//split op to inside op and outside op +bool CacheBlocking::split_op(const Operations::Op& op,const reg_t blockedQubits,std::vector& out,std::vector& queue) const +{ + reg_t qubits_in_chunk; + reg_t qubits_out_chunk; + int_t i,j,n; + bool inside; + + n = op.qubits.size(); + for(i=0;i 0){ //save in queue + Operations::Op op_out = op; + op_out.qubits = qubits_out_chunk; + queue.push_back(op_out); + } + + if(qubits_in_chunk.size() > 0){ + Operations::Op op_in = op; + //mapping swapped qubits + for(i=0;i Date: Mon, 26 Apr 2021 21:15:59 -0400 Subject: [PATCH 2/7] Fix configuration.basis_gates performance issue (#1232) Fixes performance issue with how the ``basis_gates`` was dynamically computed based on noise model and simulation method. --- .../providers/aer/backends/aer_simulator.py | 35 ++++++++++----- qiskit/providers/aer/backends/aerbackend.py | 2 +- .../providers/aer/backends/qasm_simulator.py | 44 ++++++++++++------- .../basis-gates-perf-aba3fe10a154a19a.yaml | 9 ++++ 4 files changed, 61 insertions(+), 29 deletions(-) create mode 100644 releasenotes/notes/basis-gates-perf-aba3fe10a154a19a.yaml diff --git a/qiskit/providers/aer/backends/aer_simulator.py b/qiskit/providers/aer/backends/aer_simulator.py index 57e43779e2..fdefb75d8a 100644 --- a/qiskit/providers/aer/backends/aer_simulator.py +++ b/qiskit/providers/aer/backends/aer_simulator.py @@ -471,6 +471,10 @@ def __init__(self, configuration = QasmBackendConfiguration.from_dict( AerSimulator._DEFAULT_CONFIGURATION) + # Cache basis gates since computing the intersection + # of noise model, method, and config gates is expensive. + self._cached_basis_gates = self._BASIS_GATES['automatic'] + super().__init__(configuration, properties=properties, available_methods=AerSimulator._AVAILABLE_METHODS, @@ -581,14 +585,14 @@ def configuration(self): Returns: BackendConfiguration: the configuration for the backend. """ + config = copy.copy(self._configuration) + for key, val in self._options_configuration.items(): + setattr(config, key, val) # Update basis gates based on custom options, config, method, # and noise model - basis_gates = self._basis_gates() - method = getattr(self.options, 'method', 'automatic') - custom_inst = self._CUSTOM_INSTR[method] - config = super().configuration() - config.custom_instructions = custom_inst - config.basis_gates = basis_gates + custom_inst + config.custom_instructions = self._CUSTOM_INSTR[ + getattr(self.options, 'method', 'automatic')] + config.basis_gates = self._cached_basis_gates + config.custom_instructions # Update simulator name config.backend_name = self.name() return config @@ -606,9 +610,14 @@ def _execute(self, qobj): def set_options(self, **fields): out_options = {} + update_basis_gates = False for key, value in fields.items(): if key == 'method': self._set_method_config(value) + update_basis_gates = True + out_options[key] = value + elif key in ['noise_model', 'basis_gates']: + update_basis_gates = True out_options[key] = value elif key == 'device': if value is not None and value not in self._AVAILABLE_DEVICES: @@ -621,6 +630,8 @@ def set_options(self, **fields): else: out_options[key] = value super().set_options(**out_options) + if update_basis_gates: + self._cached_basis_gates = self._basis_gates() def _validate(self, qobj): """Semantic validations of the qobj which cannot be done via schemas. @@ -652,15 +663,15 @@ def _basis_gates(self): if 'basis_gates' in self._options_configuration: return self._options_configuration['basis_gates'] - # Set basis gates to be the intersection of config, method, and noise model - # basis gates - config_gates = self._configuration.basis_gates - basis_gates = set(config_gates) - # Compute intersection with method basis gates method = getattr(self._options, 'method', 'automatic') method_gates = self._BASIS_GATES[method] - basis_gates = basis_gates.intersection(method_gates) + config_gates = self._configuration.basis_gates + if config_gates: + basis_gates = set(config_gates).intersection( + method_gates) + else: + basis_gates = method_gates # Compute intersection with noise model basis gates noise_model = getattr(self.options, 'noise_model', None) diff --git a/qiskit/providers/aer/backends/aerbackend.py b/qiskit/providers/aer/backends/aerbackend.py index b5780eeafb..ec5136216a 100644 --- a/qiskit/providers/aer/backends/aerbackend.py +++ b/qiskit/providers/aer/backends/aerbackend.py @@ -181,7 +181,7 @@ def configuration(self): # If config has custom instructions add them to # basis gates to include them for the terra transpiler if hasattr(config, 'custom_instructions'): - config.basis_gates += config.custom_instructions + config.basis_gates = config.basis_gates + config.custom_instructions return config def properties(self): diff --git a/qiskit/providers/aer/backends/qasm_simulator.py b/qiskit/providers/aer/backends/qasm_simulator.py index 16863fb718..a7b4f02f6e 100644 --- a/qiskit/providers/aer/backends/qasm_simulator.py +++ b/qiskit/providers/aer/backends/qasm_simulator.py @@ -344,6 +344,10 @@ def __init__(self, else: configuration.open_pulse = False + # Cache basis gates since computing the intersection + # of noise model, method, and config gates is expensive. + self._cached_basis_gates = self._DEFAULT_BASIS_GATES + super().__init__(configuration, properties=properties, available_methods=QasmSimulator._AVAILABLE_METHODS, @@ -445,13 +449,13 @@ def configuration(self): Returns: BackendConfiguration: the configuration for the backend. """ + config = copy.copy(self._configuration) + for key, val in self._options_configuration.items(): + setattr(config, key, val) # Update basis gates based on custom options, config, method, # and noise model - basis_gates = self._basis_gates() - custom_inst = self._custom_instructions() - config = super().configuration() - config.custom_instructions = custom_inst - config.basis_gates = basis_gates + custom_inst + config.custom_instructions = self._custom_instructions() + config.basis_gates = self._cached_basis_gates + config.custom_instructions return config def _execute(self, qobj): @@ -467,15 +471,22 @@ def _execute(self, qobj): def set_options(self, **fields): out_options = {} + update_basis_gates = False for key, value in fields.items(): if key == 'method': self._set_method_config(value) + update_basis_gates = True + out_options[key] = value + elif key in ['noise_model', 'basis_gates']: + update_basis_gates = True out_options[key] = value elif key == 'custom_instructions': self._set_configuration_option(key, value) else: out_options[key] = value super().set_options(**out_options) + if update_basis_gates: + self._cached_basis_gates = self._basis_gates() def _validate(self, qobj): """Semantic validations of the qobj which cannot be done via schemas. @@ -511,21 +522,22 @@ def _basis_gates(self): if 'basis_gates' in self._options_configuration: return self._options_configuration['basis_gates'] - # Set basis gates to be the intersection of config, method, and noise model - # basis gates + # Compute intersection with method basis gates + method_gates = self._method_basis_gates() config_gates = self._configuration.basis_gates - basis_gates = set(config_gates) - if self._options.noise_model: - noise_gates = self._options.noise_model.basis_gates - basis_gates = basis_gates.intersection(noise_gates) + if config_gates: + basis_gates = set(config_gates).intersection( + method_gates) else: - noise_gates = None + basis_gates = method_gates - if self._options.method: - method_gates = self._method_basis_gates() - basis_gates = basis_gates.intersection(method_gates) + # Compute intersection with noise model basis gates + noise_model = getattr(self.options, 'noise_model', None) + if noise_model: + noise_gates = noise_model.basis_gates + basis_gates = basis_gates.intersection(noise_gates) else: - method_gates = None + noise_gates = None if not basis_gates: logger.warning( diff --git a/releasenotes/notes/basis-gates-perf-aba3fe10a154a19a.yaml b/releasenotes/notes/basis-gates-perf-aba3fe10a154a19a.yaml new file mode 100644 index 0000000000..dffc003f6a --- /dev/null +++ b/releasenotes/notes/basis-gates-perf-aba3fe10a154a19a.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes performance issue with how the ``basis_gates`` configuration + attribute was set. Previously there were unintended side-effects to the + backend class which could cause repeated simulation runtime to + incrementally increase. Refer to + `#1229 ` for more + information and examples. From ef54e9bc17f84553ebff45105655a9a9516f7080 Mon Sep 17 00:00:00 2001 From: "Christopher J. Wood" Date: Wed, 28 Apr 2021 13:02:23 -0400 Subject: [PATCH 3/7] Fix stabilizer simulator expval function (#1231) --- ...ix-stabilizer-expval-8649ee03f4b4b82d.yaml | 8 ++ src/simulators/stabilizer/clifford.hpp | 34 -------- .../stabilizer/stabilizer_state.hpp | 79 ++++++++++++++----- .../instructions/test_save_expval.py | 38 ++++++--- 4 files changed, 95 insertions(+), 64 deletions(-) create mode 100644 releasenotes/notes/fix-stabilizer-expval-8649ee03f4b4b82d.yaml diff --git a/releasenotes/notes/fix-stabilizer-expval-8649ee03f4b4b82d.yaml b/releasenotes/notes/fix-stabilizer-expval-8649ee03f4b4b82d.yaml new file mode 100644 index 0000000000..08f2dac8ca --- /dev/null +++ b/releasenotes/notes/fix-stabilizer-expval-8649ee03f4b4b82d.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes a bug in the ``stabilizer`` simulator method of the + :class:`~qiskit.providers.aer.QasmSimulator` and + :class:`~qiskit.providers.aer.AerSimulator` where the expectation value + for the ``save_expectation_value`` and ``snapshot_expectation_value`` + could have the wrong sign for certain ``Y`` Pauli's. diff --git a/src/simulators/stabilizer/clifford.hpp b/src/simulators/stabilizer/clifford.hpp index c540471414..bee03ed7e2 100644 --- a/src/simulators/stabilizer/clifford.hpp +++ b/src/simulators/stabilizer/clifford.hpp @@ -106,10 +106,6 @@ class Clifford { // the outcome was random. bool measure_and_update(const uint64_t qubit, const uint64_t randint); - // Return 1 or -1: the expectation value of observable Z on all - // the qubits in the parameter `qubits` - int64_t expectation_value(const std::vector& qubits); - //----------------------------------------------------------------------- // Configuration settings //----------------------------------------------------------------------- @@ -337,36 +333,6 @@ bool Clifford::measure_and_update(const uint64_t qubit, const uint64_t randint) } } -int64_t Clifford::expectation_value(const std::vector& qubits) { - // Check if there is a row that anti-commutes with an odd number of qubits - // If so expectation value is 0 - for (uint64_t p = num_qubits_; p < 2 * num_qubits_; p++) { - uint64_t num_of_x = 0; - for (auto qubit : qubits) { - if (table_[p].X[qubit]) - num_of_x ++; - } - if(num_of_x % 2 == 1) - return 0; - } - - // Otherwise the expectation value is +1 or -1 - uint64_t sum_of_outcomes = 0; - for (auto qubit : qubits) { - Pauli::Pauli accum(num_qubits_); - phase_t outcome = 0; - for (uint64_t i = 0; i < num_qubits_; i++) { - if (table_[i].X[qubit]) { - rowsum_helper(table_[i + num_qubits_], phases_[i + num_qubits_], - accum, outcome); - } - } - sum_of_outcomes += outcome; - } - - return (sum_of_outcomes % 2 == 0) ? 1 : -1; -} - void Clifford::rowsum_helper(const Pauli::Pauli &row, const phase_t row_phase, Pauli::Pauli &accum, phase_t &accum_phase) const { int8_t newr = ((2 * row_phase + 2 * accum_phase) + diff --git a/src/simulators/stabilizer/stabilizer_state.hpp b/src/simulators/stabilizer/stabilizer_state.hpp index a4dc7940e7..37aed72890 100644 --- a/src/simulators/stabilizer/stabilizer_state.hpp +++ b/src/simulators/stabilizer/stabilizer_state.hpp @@ -558,36 +558,73 @@ void State::apply_save_amplitudes_sq(const Operations::Op &op, double State::expval_pauli(const reg_t &qubits, const std::string& pauli) { - // Compute expval components - auto state_cpy = BaseState::qreg_; - reg_t measured_qubits; - for (uint_t pos = 0; pos < qubits.size(); ++pos) { - const auto& qubit = qubits[pos]; - switch (pauli[pauli.size() - 1 - pos]) { - case 'I': - break; + // Construct Pauli on N-qubits + const auto num_qubits = BaseState::qreg_.num_qubits(); + Pauli::Pauli P(num_qubits); + uint_t phase = 0; + for (size_t i = 0; i < qubits.size(); ++i) { + switch (pauli[pauli.size() - 1 - i]) { case 'X': - state_cpy.append_h(qubit); - measured_qubits.push_back(qubit); + P.X.set1(qubits[i]); break; case 'Y': - state_cpy.append_s(qubit); - state_cpy.append_z(qubit); - state_cpy.append_h(qubit); - measured_qubits.push_back(qubit); + P.X.set1(qubits[i]); + P.Z.set1(qubits[i]); + phase += 1; break; case 'Z': - measured_qubits.push_back(qubit); + P.Z.set1(qubits[i]); + break; + default: break; - default: { - std::stringstream msg; - msg << "Invalid Pauli string \'" << pauli[pos] - << "\'."; - throw std::invalid_argument(msg.str()); + }; + } + + // Check if there is a stabilizer that anti-commutes with an odd number of qubits + // If so expectation value is 0 + for (size_t i = 0; i < num_qubits; i++) { + const auto& stabi = BaseState::qreg_.stabilizer(i); + size_t num_anti = 0; + for (const auto& qubit : qubits) { + if (P.Z[qubit] & stabi.X[qubit]) { + num_anti++; + } + if (P.X[qubit] & stabi.Z[qubit]) { + num_anti++; } } + if(num_anti % 2 == 1) + return 0.0; + } + + // Otherwise P is (-1)^a prod_j S_j^b_j for Clifford stabilizers + // If P anti-commutes with D_j then b_j = 1. + // Multiply P by stabilizers with anti-commuting destabilizers + auto PZ = P.Z; // Make a copy of P.Z + for (size_t i = 0; i < num_qubits; i++) { + // Check if destabilizer anti-commutes + const auto& destabi = BaseState::qreg_.destabilizer(i); + size_t num_anti = 0; + for (const auto& qubit : qubits) { + if (P.Z[qubit] & destabi.X[qubit]) { + num_anti++; + } + if (P.X[qubit] & destabi.Z[qubit]) { + num_anti++; + } + } + if (num_anti % 2 == 0) continue; + + // If anti-commutes multiply Pauli by stabilizer + const auto& stabi = BaseState::qreg_.stabilizer(i); + phase += 2 * BaseState::qreg_.phases()[i + num_qubits]; + for (size_t k = 0; k < num_qubits; k++) { + phase += stabi.Z[k] & stabi.X[k]; + phase += 2 * (PZ[k] & stabi.X[k]); + PZ.setValue(PZ[k] ^ stabi.Z[k], k); + } } - return state_cpy.expectation_value(measured_qubits); + return (phase % 4) ? -1.0 : 1.0; } static void set_value_helper(std::map& probs, diff --git a/test/terra/backends/aer_simulator/instructions/test_save_expval.py b/test/terra/backends/aer_simulator/instructions/test_save_expval.py index 1685c96972..aa753feb5f 100644 --- a/test/terra/backends/aer_simulator/instructions/test_save_expval.py +++ b/test/terra/backends/aer_simulator/instructions/test_save_expval.py @@ -65,13 +65,33 @@ def _test_save_expval(self, circuit, oper, qubits, variance, **options): [ 'automatic', 'stabilizer', 'statevector', 'density_matrix', 'matrix_product_state' - ], # , 'extended_stabilizer'], + ], + PAULI2) + def test_save_expval_bell_pauli(self, method, device, pauli): + """Test Pauli expval for Bell state circuit""" + circ = QuantumCircuit(2) + circ.h(0) + circ.cx(0, 1) + oper = qi.Pauli(pauli) + qubits = [0, 1] + self._test_save_expval(circ, + oper, + qubits, + False, + method=method, + device=device) + + @supported_methods( + [ + 'automatic', 'stabilizer', 'statevector', 'density_matrix', + 'matrix_product_state' + ], PAULI2) def test_save_expval_stabilizer_pauli(self, method, device, pauli): """Test Pauli expval for stabilizer circuit""" SEED = 5832 circ = qi.random_clifford(2, seed=SEED).to_circuit() - oper = qi.Operator(qi.Pauli(pauli)) + oper = qi.Pauli(pauli) qubits = [0, 1] self._test_save_expval(circ, oper, @@ -79,18 +99,18 @@ def test_save_expval_stabilizer_pauli(self, method, device, pauli): False, method=method, device=device) - + @supported_methods( [ 'automatic', 'stabilizer', 'statevector', 'density_matrix', 'matrix_product_state' - ], # , 'extended_stabilizer'], + ], PAULI2) def test_save_expval_var_stabilizer_pauli(self, method, device, pauli): """Test Pauli expval_var for stabilizer circuit""" SEED = 5832 circ = qi.random_clifford(2, seed=SEED).to_circuit() - oper = qi.Operator(qi.Pauli(pauli)) + oper = qi.Pauli(pauli) qubits = [0, 1] self._test_save_expval(circ, oper, @@ -103,7 +123,7 @@ def test_save_expval_var_stabilizer_pauli(self, method, device, pauli): [ 'automatic', 'stabilizer', 'statevector', 'density_matrix', 'matrix_product_state' - ], # , 'extended_stabilizer'], + ], [[0, 1], [1, 0], [0, 2], [2, 0], [1, 2], [2, 1]]) def test_save_expval_stabilizer_hermitian(self, method, device, qubits): """Test expval for stabilizer circuit and Hermitian operator""" @@ -159,7 +179,7 @@ def test_save_expval_var_nonstabilizer_pauli(self, method, device, pauli): """Test Pauli expval_var for non-stabilizer circuit""" SEED = 7382 circ = QuantumVolume(2, 1, seed=SEED) - oper = qi.Operator(qi.Pauli(pauli)) + oper = qi.Pauli(pauli) qubits = [0, 1] self._test_save_expval(circ, oper, @@ -237,7 +257,7 @@ def test_save_expval_stabilizer_pauli_cache_blocking( """Test Pauli expval for stabilizer circuit""" SEED = 5832 circ = qi.random_clifford(2, seed=SEED).to_circuit() - oper = qi.Operator(qi.Pauli(pauli)) + oper = qi.Pauli(pauli) qubits = [0, 1] self._test_save_expval(circ, oper, @@ -254,7 +274,7 @@ def test_save_expval_var_stabilizer_pauli_cache_blocking( """Test Pauli expval_var for stabilizer circuit""" SEED = 5832 circ = qi.random_clifford(2, seed=SEED).to_circuit() - oper = qi.Operator(qi.Pauli(pauli)) + oper = qi.Pauli(pauli) qubits = [0, 1] self._test_save_expval(circ, oper, From c08fd8dc7af6ad4ffeaa17ace4ea193b838e6150 Mon Sep 17 00:00:00 2001 From: georgios-ts <45130028+georgios-ts@users.noreply.github.com> Date: Wed, 28 Apr 2021 21:52:00 +0300 Subject: [PATCH 4/7] Change qubits order in multiplexer gate (#1230) --- ...x-multiplexer-qubits-654bb6f8ece25b02.yaml | 6 ++++ src/framework/operations.hpp | 4 +-- test/terra/reference/ref_multiplexer.py | 34 +++++++++---------- 3 files changed, 25 insertions(+), 19 deletions(-) create mode 100644 releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml diff --git a/releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml b/releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml new file mode 100644 index 0000000000..313eebeca0 --- /dev/null +++ b/releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes a bug where qiskit-terra assumes that qubits in a multiplexer gate + are first the targets and then the controls of the gate while qiskit-aer + assumes the opposite order. diff --git a/src/framework/operations.hpp b/src/framework/operations.hpp index cc89812bb5..d84547c7fc 100755 --- a/src/framework/operations.hpp +++ b/src/framework/operations.hpp @@ -457,8 +457,8 @@ inline Op make_multiplexer(const reg_t &qubits, } // Get lists of controls and targets reg_t controls(num_controls), targets(num_targets); - std::copy_n(qubits.begin(), num_controls, controls.begin()); - std::copy_n(qubits.begin() + num_controls, num_targets, targets.begin()); + std::copy_n(qubits.begin(), num_targets, targets.begin()); + std::copy_n(qubits.begin() + num_targets, num_controls, controls.begin()); // Construct the Op Op op; diff --git a/test/terra/reference/ref_multiplexer.py b/test/terra/reference/ref_multiplexer.py index 8814925482..ac1a7366c9 100644 --- a/test/terra/reference/ref_multiplexer.py +++ b/test/terra/reference/ref_multiplexer.py @@ -36,7 +36,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): # CX01, |00> state circuit = QuantumCircuit(*regs) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -44,7 +44,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): # CX10, |00> state circuit = QuantumCircuit(*regs) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -54,7 +54,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.x(qr[1]) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -64,7 +64,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.x(qr[0]) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -74,7 +74,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.x(qr[0]) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -84,7 +84,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.x(qr[1]) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -94,7 +94,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.x(qr) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -104,7 +104,7 @@ def multiplexer_cx_gate_circuits_deterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.x(qr) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -129,7 +129,7 @@ def multiplexer_cx_gate_circuits_nondeterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.h(qr[0]) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -139,7 +139,7 @@ def multiplexer_cx_gate_circuits_nondeterministic(final_measure=True): circuit = QuantumCircuit(*regs) circuit.h(qr[1]) circuit.barrier(qr) - circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[1], qr[0]]) + circuit.append(multiplexer_multi_controlled_x(num_control_qubits), [qr[0], qr[1]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -177,7 +177,7 @@ def multiplexer_ccx_gate_circuits_deterministic(final_measure=True): # CCX(0,1,2) circuit = QuantumCircuit(*regs) circuit.append(multiplexer_multi_controlled_x(num_control_qubits), - [qr[0], qr[1], qr[2]]) + [qr[2], qr[0], qr[1]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -190,7 +190,7 @@ def multiplexer_ccx_gate_circuits_deterministic(final_measure=True): circuit.x(qr[1]) circuit.barrier(qr) circuit.append(multiplexer_multi_controlled_x(num_control_qubits), - [qr[0], qr[1], qr[2]]) + [qr[2], qr[0], qr[1]]) circuit.barrier(qr) circuit.x(qr[0]) circuit.barrier(qr) @@ -203,7 +203,7 @@ def multiplexer_ccx_gate_circuits_deterministic(final_measure=True): # CCX(2,1,0) circuit = QuantumCircuit(*regs) circuit.append(multiplexer_multi_controlled_x(num_control_qubits), - [qr[2], qr[1], qr[0]]) + [qr[0], qr[2], qr[1]]) if final_measure: circuit.barrier(qr) circuit.measure(qr, cr) @@ -216,7 +216,7 @@ def multiplexer_ccx_gate_circuits_deterministic(final_measure=True): circuit.x(qr[2]) circuit.barrier(qr) circuit.append(multiplexer_multi_controlled_x(num_control_qubits), - [qr[2], qr[1], qr[0]]) + [qr[0], qr[2], qr[1]]) circuit.barrier(qr) circuit.x(qr[1]) circuit.barrier(qr) @@ -249,7 +249,7 @@ def multiplexer_ccx_gate_circuits_nondeterministic(final_measure=True): circuit.x(qr[1]) circuit.barrier(qr) circuit.append(multiplexer_multi_controlled_x(num_control_qubits), - [qr[0], qr[1], qr[2]]) + [qr[2], qr[0], qr[1]]) circuit.barrier(qr) circuit.x(qr[1]) if final_measure: @@ -264,7 +264,7 @@ def multiplexer_ccx_gate_circuits_nondeterministic(final_measure=True): circuit.x(qr[2]) circuit.barrier(qr) circuit.append(multiplexer_multi_controlled_x(num_control_qubits), - [qr[2], qr[1], qr[0]]) + [qr[0], qr[2], qr[1]]) circuit.barrier(qr) circuit.x(qr[2]) if final_measure: @@ -282,4 +282,4 @@ def multiplexer_ccx_gate_counts_deterministic(shots, hex_counts=True): def multiplexer_ccx_gate_counts_nondeterministic(shots, hex_counts=True): """ The counts are exactly the same as the ccx gate """ - return ccx_gate_counts_nondeterministic(shots, hex_counts) \ No newline at end of file + return ccx_gate_counts_nondeterministic(shots, hex_counts) From 039b7b590c3fa86b79dcf54f9253ae050fca2296 Mon Sep 17 00:00:00 2001 From: "Christopher J. Wood" Date: Wed, 28 Apr 2021 16:11:48 -0400 Subject: [PATCH 5/7] Disable save expectation value for ext stabilizer (#1238) --- qiskit/providers/aer/backends/aer_simulator.py | 3 +-- qiskit/providers/aer/backends/qasm_simulator.py | 3 +-- qiskit/providers/aer/library/instructions_table.csv | 4 ++-- .../notes/ext-stab-expval-b205dcf3a26707d4.yaml | 11 +++++++++++ .../extended_stabilizer/extended_stabilizer_state.hpp | 11 ++++++----- 5 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/ext-stab-expval-b205dcf3a26707d4.yaml diff --git a/qiskit/providers/aer/backends/aer_simulator.py b/qiskit/providers/aer/backends/aer_simulator.py index fdefb75d8a..6ca8b3168e 100644 --- a/qiskit/providers/aer/backends/aer_simulator.py +++ b/qiskit/providers/aer/backends/aer_simulator.py @@ -400,8 +400,7 @@ class AerSimulator(AerBackend): 'set_stabilizer' ]), 'extended_stabilizer': sorted([ - 'roerror', 'snapshot', 'save_statevector', - 'save_expval', 'save_expval_var' + 'roerror', 'snapshot', 'save_statevector' ]), 'unitary': sorted([ 'snapshot', 'save_state', 'save_unitary', 'set_unitary' diff --git a/qiskit/providers/aer/backends/qasm_simulator.py b/qiskit/providers/aer/backends/qasm_simulator.py index a7b4f02f6e..a47aebcb97 100644 --- a/qiskit/providers/aer/backends/qasm_simulator.py +++ b/qiskit/providers/aer/backends/qasm_simulator.py @@ -613,8 +613,7 @@ def _custom_instructions(self): 'set_stabilizer' ]) if method == 'extended_stabilizer': - return sorted(['roerror', 'snapshot', 'save_statevector', - 'save_expval', 'save_expval_var']) + return sorted(['roerror', 'snapshot', 'save_statevector']) return QasmSimulator._DEFAULT_CUSTOM_INSTR def _set_method_config(self, method=None): diff --git a/qiskit/providers/aer/library/instructions_table.csv b/qiskit/providers/aer/library/instructions_table.csv index dda33ccf0d..ae8b58f819 100644 --- a/qiskit/providers/aer/library/instructions_table.csv +++ b/qiskit/providers/aer/library/instructions_table.csv @@ -2,8 +2,8 @@ :class:`SaveAmplitudes`,✔,✔,✘,✔,✘,✘,✘,✘ :class:`SaveAmplitudesSquared`,✔,✔,✔,✔,✔,✘,✘,✘ :class:`SaveDensityMatrix`,✔,✔,✔,✔,✘,✘,✘,✘ -:class:`SaveExpectationValue`,✔,✔,✔,✔,✔,✔,✘,✘ -:class:`SaveExpectationValueVariance`,✔,✔,✔,✔,✔,✔,✘,✘ +:class:`SaveExpectationValue`,✔,✔,✔,✔,✔,✘,✘,✘ +:class:`SaveExpectationValueVariance`,✔,✔,✔,✔,✔,✘,✘,✘ :class:`SaveMatrixProductState`,✘,✘,✘,✔,✘,✘,✘,✘ :class:`SaveProbabilities`,✔,✔,✔,✔,✔,✘,✘,✘ :class:`SaveProbabilitiesDict`,✔,✔,✔,✔,✔,✘,✘,✘ diff --git a/releasenotes/notes/ext-stab-expval-b205dcf3a26707d4.yaml b/releasenotes/notes/ext-stab-expval-b205dcf3a26707d4.yaml new file mode 100644 index 0000000000..236fa599e5 --- /dev/null +++ b/releasenotes/notes/ext-stab-expval-b205dcf3a26707d4.yaml @@ -0,0 +1,11 @@ +--- +issues: + - | + The :class:`~qiskit.providers.aer.library.SaveExpectationValue` and + :class:`~qiskit.providers.aer.library.SaveExpectationValueVariance` have + been disabled for the `extended_stabilizer` method of the + :class:`~qiskit.providers.aer.QasmSimulator` and + :class:`~qiskit.providers.aer.AerSimulator` due to returning the + incorrect value for certain Pauli operator components. Refer to + `#1227 ` for more + information and examples. diff --git a/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp b/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp index 7cc8eda574..83880597b8 100644 --- a/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp +++ b/src/simulators/extended_stabilizer/extended_stabilizer_state.hpp @@ -37,7 +37,7 @@ const Operations::OpSet StateOpSet( Operations::OpType::reset, Operations::OpType::barrier, Operations::OpType::roerror, Operations::OpType::bfunc, Operations::OpType::snapshot, Operations::OpType::save_statevec, - Operations::OpType::save_expval, Operations::OpType::save_expval_var}, + }, //Operations::OpType::save_expval, Operations::OpType::save_expval_var}, // Gates {"CX", "u0", "u1", "p", "cx", "cz", "swap", "id", "x", "y", "z", "h", "s", "sdg", "t", "tdg", "ccx", "ccz", "delay"}, @@ -431,10 +431,11 @@ void State::apply_ops(const std::vector &ops, ExperimentResult & case Operations::OpType::save_statevec: apply_save_statevector(op, result); break; - case Operations::OpType::save_expval: - case Operations::OpType::save_expval_var: - apply_save_expval(op, result, rng); - break; + // Disabled until can fix bug in expval + // case Operations::OpType::save_expval: + // case Operations::OpType::save_expval_var: + // apply_save_expval(op, result, rng); + // break; default: throw std::invalid_argument("CH::State::apply_ops does not support operations of the type \'" + op.name + "\'."); From ddff470d69da9ab0749dd3035ac29873fe6bb1b1 Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Wed, 28 Apr 2021 16:15:27 -0400 Subject: [PATCH 6/7] Prepare for renaming default branch to main (#1233) With the recent support added to qiskit-bot for configurable default branches in Qiskit/qiskit-bot#9 we're able to rename the default branch for the repository to 'main'. However, when we do that several things will need to be updated, most importantly the CI trigger was hardcoded to the previous default branch 'master. This commit fixes these references and should be merged after we rename the branch to re-enable CI. --- .github/workflows/build.yml | 4 ++-- .github/workflows/docs.yml | 4 ++-- .github/workflows/tests_linux.yml | 4 ++-- .github/workflows/tests_mac.yml | 4 ++-- .github/workflows/tests_windows.yml | 4 ++-- BENCHMARKING.md | 2 +- CONTRIBUTING.md | 10 +++++----- qiskit/providers/aer/version.py | 2 +- test/asv.linux.conf.json | 4 ++-- test/asv.linux.cuda.conf.json | 4 ++-- test/terra/common.py | 2 +- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f684e925a8..d011a250a7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,9 +1,9 @@ name: Build on: push: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] pull_request: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] jobs: standalone: runs-on: ${{ matrix.os }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 628ce52135..3335b4be13 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,9 +1,9 @@ name: Docs and Tutorial on: push: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] pull_request: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] jobs: docs: runs-on: ubuntu-latest diff --git a/.github/workflows/tests_linux.yml b/.github/workflows/tests_linux.yml index b76043bf6a..64e356d5d5 100644 --- a/.github/workflows/tests_linux.yml +++ b/.github/workflows/tests_linux.yml @@ -1,9 +1,9 @@ name: Tests Linux on: push: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] pull_request: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] jobs: lint: runs-on: ubuntu-latest diff --git a/.github/workflows/tests_mac.yml b/.github/workflows/tests_mac.yml index 21cc092d1b..d62ea54bb9 100644 --- a/.github/workflows/tests_mac.yml +++ b/.github/workflows/tests_mac.yml @@ -1,9 +1,9 @@ name: Tests MacOS on: push: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] pull_request: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] jobs: lint: runs-on: macOS-latest diff --git a/.github/workflows/tests_windows.yml b/.github/workflows/tests_windows.yml index b8816e8279..e414d3cb44 100644 --- a/.github/workflows/tests_windows.yml +++ b/.github/workflows/tests_windows.yml @@ -1,9 +1,9 @@ name: Tests Windows on: push: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] pull_request: - branches: [master, 'stable/*'] + branches: [main, 'stable/*'] jobs: lint: runs-on: windows-latest diff --git a/BENCHMARKING.md b/BENCHMARKING.md index 83a844e35a..10bf9232f0 100644 --- a/BENCHMARKING.md +++ b/BENCHMARKING.md @@ -42,7 +42,7 @@ After the completion of the tests, you will see the results with a format simila · Creating environments · Discovering benchmarks · Running 3 total benchmarks (1 commit * 1 environment * 3 benchmarks) -[ 0.00%] · For qiskit-aer commit 8b4f4de1 : +[ 0.00%] · For qiskit-aer commit 8b4f4de1
: [ 0.00%] ·· Benchmarking conda-py3.7 [ 16.67%] ··· Running (quantum_volume_benchmarks.QuantumVolumeTimeSuite.time_quantum_volume--).. [ 50.00%] ··· Running (simple_benchmarks.SimpleU3TimeSuite.time_simple_u3--). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b3b71e5ea9..8ff064c409 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -196,11 +196,11 @@ previous version in the release notes. ### Branches -* `master`: +* `main`: -The master branch is used for development of the next version of qiskit-aer. +The main branch is used for development of the next version of qiskit-aer. It will be updated frequently and should not be considered stable. The API -can and will change on master as we introduce and refine new features. +can and will change on main as we introduce and refine new features. * `stable/*` branches: Branches under `stable/*` are used to maintain released versions of qiskit-aer. @@ -214,13 +214,13 @@ merged to it are bugfixes. When it is time to release a new minor version of qiskit-aer, we will: 1. Create a new tag with the version number and push it to github -2. Change the `master` version to the next release version. +2. Change the `main` version to the next release version. The release automation processes will be triggered by the new tag and perform the following steps: 1. Create a stable branch for the new minor version from the release tag - on the `master` branch + on the `main` branch 2. Build and upload binary wheels to pypi 3. Create a GitHub release page with a generated changelog 4. Generate a PR on the meta-repository to bump the Aer version and diff --git a/qiskit/providers/aer/version.py b/qiskit/providers/aer/version.py index d667d22cd6..237d02af7e 100644 --- a/qiskit/providers/aer/version.py +++ b/qiskit/providers/aer/version.py @@ -42,7 +42,7 @@ def _minimal_ext_cmd(cmd): def git_version(): """Get the current git head sha1.""" - # Determine if we're at master + # Determine if we're at main try: out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) git_revision = out.strip().decode('ascii') diff --git a/test/asv.linux.conf.json b/test/asv.linux.conf.json index 19a372418d..c0d750073c 100644 --- a/test/asv.linux.conf.json +++ b/test/asv.linux.conf.json @@ -51,9 +51,9 @@ "python setup.py bdist_wheel --dist-dir={build_cache_dir} -- -DCMAKE_CXX_COMPILER=g++ -- -j" ], - // List of branches to benchmark. If not provided, defaults to "master" + // List of branches to benchmark. If not provided, defaults to "main" // (for git) or "default" (for mercurial). - // "branches": ["master"], // for git + // "branches": ["main"], // for git // "branches": ["default"], // for mercurial // The DVCS being used. If not set, it will be automatically diff --git a/test/asv.linux.cuda.conf.json b/test/asv.linux.cuda.conf.json index bc0b826e73..d034764dcd 100644 --- a/test/asv.linux.cuda.conf.json +++ b/test/asv.linux.cuda.conf.json @@ -51,9 +51,9 @@ "python setup.py bdist_wheel --dist-dir={build_cache_dir} -- -DCMAKE_CXX_COMPILER=g++ -DAER_THRUST_BACKEND=CUDA -- -j" ], - // List of branches to benchmark. If not provided, defaults to "master" + // List of branches to benchmark. If not provided, defaults to "main" // (for git) or "default" (for mercurial). - // "branches": ["master"], // for git + // "branches": ["main"], // for git // "branches": ["default"], // for mercurial // The DVCS being used. If not set, it will be automatically diff --git a/test/terra/common.py b/test/terra/common.py index 5282a42e75..e4c996d492 100644 --- a/test/terra/common.py +++ b/test/terra/common.py @@ -334,7 +334,7 @@ def _is_ci_fork_pull_request(): Returns: bool: True if the tests are executed inside a CI tool, and the changes - are not against the "master" branch. + are not against the "main" branch. """ if os.getenv('TRAVIS'): # Using Travis CI. From 488acd454b3081a5830d6b1a8a41642ddfa144a8 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Wed, 28 Apr 2021 17:08:56 -0400 Subject: [PATCH 7/7] Prepare 0.8.2 release --- docs/conf.py | 2 +- qiskit/providers/aer/VERSION.txt | 2 +- .../notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index ef235021a5..d888e66244 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -46,7 +46,7 @@ # The short X.Y version version = '' # The full version, including alpha/beta/rc tags -release = '0.8.1' +release = '0.8.2' # -- General configuration --------------------------------------------------- diff --git a/qiskit/providers/aer/VERSION.txt b/qiskit/providers/aer/VERSION.txt index 6f4eebdf6f..100435be13 100644 --- a/qiskit/providers/aer/VERSION.txt +++ b/qiskit/providers/aer/VERSION.txt @@ -1 +1 @@ -0.8.1 +0.8.2 diff --git a/releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml b/releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml index 313eebeca0..6d528ac73d 100644 --- a/releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml +++ b/releasenotes/notes/bug-fix-multiplexer-qubits-654bb6f8ece25b02.yaml @@ -1,6 +1,6 @@ --- fixes: - | - Fixes a bug where qiskit-terra assumes that qubits in a multiplexer gate - are first the targets and then the controls of the gate while qiskit-aer - assumes the opposite order. + Fixes a bug with the ``"multiplexer"`` simulator instruction where the + order of target and control qubits was reversed to the order in the + Qiskit instruction.