Skip to content

Commit

Permalink
use prefetch to load next mem into cache (PaddlePaddle#21206)
Browse files Browse the repository at this point in the history
* use prefetch to load next mem into cache

test=develop

* remove hard code memcpy om pyramid_hash_ff

test=develop
  • Loading branch information
LeoZhao-Intel authored and seiriosPlus committed Dec 9, 2019
1 parent c20e9b5 commit bcd20f6
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions paddle/fluid/operators/pyramid_hash_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,21 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
void hash_embedding_ff(const T* hash_id, int len, T* top_pos,
const T* weights, int _num_emb, int _rand_len,
int _space_len) const {
unsigned int pos1 = XXH32(hash_id, len * sizeof(T), 0) % _space_len;
unsigned int pos2 = XXH32(hash_id, len * sizeof(T), _rand_len) % _space_len;

for (unsigned int j = 0; j != _num_emb; j += _rand_len) {
unsigned int pos = XXH32(hash_id, len * sizeof(T), j) % _space_len;
if (_rand_len == 16) {
memcpy(top_pos + j, const_cast<float*>(weights + pos), 16 * sizeof(T));
} else {
memcpy(top_pos + j, const_cast<float*>(weights + pos),
_rand_len * sizeof(T));
if (j + _rand_len < _num_emb) {
__builtin_prefetch(weights + pos2);
__builtin_prefetch(top_pos + j + _rand_len);
}

unsigned int pos3 =
XXH32(hash_id, len * sizeof(T), j + 2 * _rand_len) % _space_len;
memcpy(top_pos + j, const_cast<float*>(weights + pos1),
_rand_len * sizeof(T));
pos1 = pos2;
pos2 = pos3;
}
}

Expand Down

0 comments on commit bcd20f6

Please sign in to comment.