Quick Start
Pre-trained Embedder for Knowledge Discovery
from cogkge import *
loader lut
device = init_cogkge(device_id="0", seed=1)
loader = EVENTKG2MLoader(dataset_path="../dataset", download=True)
train_data, valid_data, test_data = loader.load_all_data()
node_lut, relation_lut, time_lut = loader.load_all_lut()
processor = EVENTKG2MProcessor(node_lut, relation_lut, time_lut,
reprocess=True,
type=False, time=False, description=False, path=False,
time_unit="year",
pretrain_model_name="roberta-base", token_len=10,
path_len=10)
node_lut, relation_lut, time_lut = processor.process_lut()
loader model
model = BoxE(entity_dict_len=len(node_lut),
relation_dict_len=len(relation_lut),
embedding_dim=50)
load predictor
predictor = Predictior(model_name="BoxE",
data_name="EVENTKG2M",
model=model,
device=device,
node_lut=node_lut,
relation_lut=relation_lut,
pretrained_model_path="data/BoxE_Model.pkl",
processed_data_path="data",
reprocess=False,
fuzzy_query_top_k=10,
predict_top_k=10)
#fuzzy query node
result_node = predictor.fuzzy_query_node_keyword('champion')
print(result_node)
# fuzzy query relation
result_relation = predictor.fuzzy_query_relation_keyword("instance")
print(result_relation)
# query similary nodes
similar_node_list = predictor.predict_similar_node(node_id=0)
print(similar_node_list)
# given head and relation, query tail
tail_list = predictor.predcit_tail(head_id=0, relation_id=0)
print(tail_list)
# given tail and relation, query head
head_list = predictor.predict_head(tail_id=0, relation_id=0)
print(head_list)
# given head and tail, query relation
relation_list = predictor.predict_relation(head_id=0, tail_id=0)
print(relation_list)
# dimensionality reduction and visualization of nodes
visual_list = predictor.show_img(node_id=100, visual_num=1000)
Programming Framework for Training Models
import torch
from torch.utils.data import RandomSampler
from cogkge import *
device = init_cogkge(device_id="0", seed=1)
loader = EVENTKG2MLoader(dataset_path="../dataset", download=True)
train_data, valid_data, test_data = loader.load_all_data()
node_lut, relation_lut, time_lut = loader.load_all_lut()
processor = EVENTKG2MProcessor(node_lut, relation_lut, time_lut,
reprocess=True,
type=True, time=False, description=False, path=False,
time_unit="year",
pretrain_model_name="roberta-base", token_len=10,
path_len=10)
train_dataset = processor.process(train_data)
valid_dataset = processor.process(valid_data)
test_dataset = processor.process(test_data)
node_lut, relation_lut, time_lut = processor.process_lut()
train_sampler = RandomSampler(train_dataset)
valid_sampler = RandomSampler(valid_dataset)
test_sampler = RandomSampler(test_dataset)
model = TransE(entity_dict_len=len(node_lut),
relation_dict_len=len(relation_lut),
embedding_dim=50)
loss = MarginLoss(margin=1.0, C=0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0)
metric = Link_Prediction(link_prediction_raw=True,
link_prediction_filt=False,
batch_size=5000000,
reverse=False)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer, mode='min', patience=3, threshold_mode='abs', threshold=5,
factor=0.5, min_lr=1e-9, verbose=True)
negative_sampler = UnifNegativeSampler(triples=train_dataset,
entity_dict_len=len(node_lut),
relation_dict_len=len(relation_lut))
trainer = Trainer(
train_dataset=train_dataset,
valid_dataset=valid_dataset,
train_sampler=train_sampler,
valid_sampler=valid_sampler,
model=model,
loss=loss,
optimizer=optimizer,
negative_sampler=negative_sampler,
device=device,
output_path="../dataset",
lookuptable_E=node_lut,
lookuptable_R=relation_lut,
metric=metric,
lr_scheduler=lr_scheduler,
log=True,
trainer_batch_size=100000,
epoch=3000,
visualization=1,
apex=True,
dataloaderX=True,
num_workers=4,
pin_memory=True,
metric_step=200,
save_step=200,
metric_final_model=True,
save_final_model=True,
load_checkpoint=None)
trainer.train()
evaluator = Evaluator(
test_dataset=test_dataset,
test_sampler=test_sampler,
model=model,
device=device,
metric=metric,
output_path="../dataset",
train_dataset=train_dataset,
valid_dataset=valid_dataset,
lookuptable_E=node_lut,
lookuptable_R=relation_lut,
log=True,
evaluator_batch_size=50000,
dataloaderX=True,
num_workers=4,
pin_memory=True,
trained_model_path=None)
evaluator.evaluate()