This is the source code of HNZSLP. For the full project, including NELL-ZS, Wiki-ZS datasets and progressed relational surface name, please refer to Google Drive.
The architecture of our proposed HNZSLP is depicted in the diagram below. It consists of three major parts:
Part 1: Hierarchical N-gram Graph Constructor constructs a hierarchical n-gram graph from each relation surface name, where the n-gram graph can be further decomposed into an {\it adjoin} graph and {\it compositional} graph to simplify the learning of the relation representation;
Part 2: GramTransformer constructs the relation representation by modeling over the adjoin and compositional graphs;
Part 3: KG Embedding Learning Module combines the embeddings of the head entity and relation to predict the tail entity and update the embeddings.
Python 3.6.6
torch: 1.5.1
Linux: CentOS Linux release
For excample: please enter: LZSL\LZSP_nell_Distmult_13_90
You just run "CUDA_VISIBLE_DEVICES=1 python main.py" in your terminal.
Please use the default parameters.
class Scaled_Dot_Product_Attention(nn.Module):
'''Scaled Dot-Product Attention '''
def __init__(self):
super(Scaled_Dot_Product_Attention, self).__init__()
def forward(self, Q, K, V,adj_mask_matrix,com_mask_matrix,lattice_rel,scale=None):
'''
Args:
Q: [batch_size, len_Q, dim_Q]
K: [batch_size, len_K, dim_K]
V: [batch_size, len_V, dim_V]
scale:
Return:
self-attention '''
len_Q=Q.size(1)
adj_relation=torch.tensor(lattice_rel[0])
adj_relation=Variable(adj_relation.repeat(1,len_Q,1)).cuda()
Adj_Q=adj_relation+Q
Adj_K=adj_relation+K
com_relation = torch.tensor(lattice_rel[1])
com_relation = Variable(com_relation.repeat(1, len_Q, 1)).cuda()
com_Q = com_relation + Q
com_K = com_relation + K
attention_adj = torch.matmul(Adj_Q, Adj_K.permute(0, 2, 1))# (1,6,6)
attention_com = torch.matmul(com_Q, com_K.permute(0, 2, 1)) # (1,6,6)
if scale:
attention_adj = attention_adj * scale
attention_com=attention_com*scale
# if mask: # TODO change this
mask_attention1 = torch.mul(F.softmax(attention_adj, dim=-1), adj_mask_matrix)
mask_attention2 = torch.mul(F.softmax(attention_com, dim=-1), com_mask_matrix)
mask_attention=mask_attention1+mask_attention2
# print("vvvv",mask_attention.size())
context = torch.matmul(mask_attention, V)
return context
Byt5(2021), Byt5(hugging face)
#transformers version:4.7.0
from transformers import T5ForConditionalGeneration, AutoTokenizer
byte_model = T5ForConditionalGeneration.from_pretrained('google/byt5-small')
tokenizer = AutoTokenizer.from_pretrained('google/byt5-small')
enc = tokenizer(["Life is like a box of chocolates.", "Today is Monday.","Today is Monday."], padding="longest", return_tensors="pt")
M = byte_model(input_ids=enc["input_ids"].cuda(),
attention_mask=enc["attention_mask"].cuda(),
decoder_input_ids=enc["input_ids"].cuda()) # .encoder_last_hidden_state # forward pass
relation_text_embedding=M.logits
r = relation_text_embedding.sum(1)
"""
transformers---version 4.7.0
tf---version 1.6.0
torch version: 1.5.1
"""
CharFormer(2021), (CharFromer (implenment)) (https://github.com/lucidrains/charformer-pytorch)
self.tokenizer = GBST(
num_tokens=26, # I just use 26 letter (a,b,.....)
dim=200, # dimension of token and intra-block positional embedding
max_block_size=4, # maximum block size
downsample_factor=1, # the final downsample factor by which the sequence length will decrease by
score_consensus_attn=True
# whether to do the cheap score consensus (aka attention) as in eq. 5 in the paper
)
# r_idx: [[1,2,3,4,5],[4,5,6,7,8],[7,8,9,06,7],....] len(r_idx)==batch size, such as 64
ids_r = torch.tensor(r_idx[j]).unsqueeze(0).cuda()
relations_feather= self.tokenizer(ids_r)[0]
relations_feather = self.postion_embedding(relations_feather)
relations_feather = self.encoder(relations_feather)
relations_feather = self.mean_pooling(relations_feather)
torch: pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
charformer-pytorch--charformer-pytorch-0.0.4
python 3.6
Go to the web wiki query
input:
SELECT ?property ?propertyLabel ?propertyDescription (GROUP_CONCAT(DISTINCT(?altLabel); separator = ", ") AS ?altLabel_list) WHERE {
?property a wikibase:Property .
OPTIONAL { ?property skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "en") }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" .}
}
GROUP BY ?property ?propertyLabel ?propertyDescription
LIMIT 5000
import json
with open("query.json",'r',encoding="utf-8") as load_f:
load_dict = json.load(load_f)
# DIC=load_dict
ids=[]
surface_name=[]
for r in load_dict:
ids.append(r["property"].replace("http://www.wikidata.org/entity/",""))
surface_name.append(r["propertyLabel"])
DIC=dict(zip(ids,surface_name))
# print(DIC)
fw=open("relation_surface_name_id.txt","w",encoding="utf-8")
i=0
with open("relation_to_description.txt","r",encoding="utf-8") as fr:
for line in fr.readlines():
r=line.strip().split("\t")[0]
if r in DIC:
fw.write(r+"\t"+DIC[r]+"\n")
else:
i=i+1
print(r)
print(i)
import numpy as np
from sklearn import feature_extraction
from gensim.models import KeyedVectors
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
model = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)
i=0
corpus=[]
with open("WSP_Q_set.txt","r") as fr:
for line in fr.readlines():
i=i+1
text_=line.strip().replace("[","").replace("]","")
corpus.append(text_)
# sentence_embeddings = model.encode([text_])
# R.append(sentence_embeddings[0])
#
# R=np.array(R)
# np.save("metaqa_hop1_vector.npy",R)
# corpus=["good boy yes","girl project"]
vectorizer=CountVectorizer()
transformer=TfidfTransformer()
tfidf=transformer.fit_transform(vectorizer.fit_transform(corpus))
word=vectorizer.get_feature_names()
R=[]
weight=tfidf.toarray()
for i in range(len(weight)):
print(i)
vector_al=[]
for j in range(len(word)):
# print(word[j], weight[i][j])
w=word[j]
w_w=weight[i][j]
if w_w !=0 and w in model:
vector=model[w]
vector_al.append(vector)
V=np.sum(np.array(vector_al),axis=0)
R.append(V)
# print(V)
# print("----------")
np.save("WSP_vector.npy",R)
Please cite this paper if you find the paper or code is useful.
@article{li2022hierarchical,
title={A Hierarchical N-Gram Framework for Zero-Shot Link Prediction},
author={Li, Mingchen and Chen, Junfan and Mensah, Samuel and Aletras, Nikolaos and Yang, Xiulong and Ye, Yang},
journal={arXiv preprint arXiv:2204.10293},
year={2022}
}