From bcd20f675a61decc57f89271e83ae541c0cb096d Mon Sep 17 00:00:00 2001 From: Leo Zhao <48052473+LeoZhao-Intel@users.noreply.github.com> Date: Sun, 24 Nov 2019 21:50:00 +0800 Subject: [PATCH] use prefetch to load next mem into cache (#21206) * use prefetch to load next mem into cache test=develop * remove hard code memcpy om pyramid_hash_ff test=develop --- paddle/fluid/operators/pyramid_hash_op.cc | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index bb1abe3a891fc..b47d2191f60fd 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -161,14 +161,21 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { void hash_embedding_ff(const T* hash_id, int len, T* top_pos, const T* weights, int _num_emb, int _rand_len, int _space_len) const { + unsigned int pos1 = XXH32(hash_id, len * sizeof(T), 0) % _space_len; + unsigned int pos2 = XXH32(hash_id, len * sizeof(T), _rand_len) % _space_len; + for (unsigned int j = 0; j != _num_emb; j += _rand_len) { - unsigned int pos = XXH32(hash_id, len * sizeof(T), j) % _space_len; - if (_rand_len == 16) { - memcpy(top_pos + j, const_cast(weights + pos), 16 * sizeof(T)); - } else { - memcpy(top_pos + j, const_cast(weights + pos), - _rand_len * sizeof(T)); + if (j + _rand_len < _num_emb) { + __builtin_prefetch(weights + pos2); + __builtin_prefetch(top_pos + j + _rand_len); } + + unsigned int pos3 = + XXH32(hash_id, len * sizeof(T), j + 2 * _rand_len) % _space_len; + memcpy(top_pos + j, const_cast(weights + pos1), + _rand_len * sizeof(T)); + pos1 = pos2; + pos2 = pos3; } }