Quick Start


Pre-trained Embedder for Knowledge Discovery

from cogkge import *
loader lut
device = init_cogkge(device_id="0", seed=1)
loader = EVENTKG2MLoader(dataset_path="../dataset", download=True)
train_data, valid_data, test_data = loader.load_all_data()
node_lut, relation_lut, time_lut = loader.load_all_lut()
processor = EVENTKG2MProcessor(node_lut, relation_lut, time_lut,
				reprocess=True,
				type=False, time=False, description=False, path=False,
				time_unit="year",
				pretrain_model_name="roberta-base", token_len=10,
				path_len=10)
node_lut, relation_lut, time_lut = processor.process_lut()
loader model
model = BoxE(entity_dict_len=len(node_lut),
		relation_dict_len=len(relation_lut),
		embedding_dim=50)
load predictor
predictor = Predictior(model_name="BoxE",
			data_name="EVENTKG2M",
			model=model,
			device=device,
			node_lut=node_lut,
			relation_lut=relation_lut,
			pretrained_model_path="data/BoxE_Model.pkl",
			processed_data_path="data",
			reprocess=False,
			fuzzy_query_top_k=10,
			predict_top_k=10)
#fuzzy query node
result_node = predictor.fuzzy_query_node_keyword('champion')
print(result_node)
						
# fuzzy query relation
result_relation = predictor.fuzzy_query_relation_keyword("instance")
print(result_relation)
						
# query similary nodes
similar_node_list = predictor.predict_similar_node(node_id=0)
print(similar_node_list)
						
# given head and relation, query tail
tail_list = predictor.predcit_tail(head_id=0, relation_id=0)
print(tail_list)
						
# given tail and relation, query head
head_list = predictor.predict_head(tail_id=0, relation_id=0)
print(head_list)
						
# given head and tail, query relation
relation_list = predictor.predict_relation(head_id=0, tail_id=0)
print(relation_list)
						
# dimensionality reduction and visualization of nodes
visual_list = predictor.show_img(node_id=100, visual_num=1000)
					

Programming Framework for Training Models

import torch
from torch.utils.data import RandomSampler
from cogkge import *
device = init_cogkge(device_id="0", seed=1)

loader = EVENTKG2MLoader(dataset_path="../dataset", download=True)
train_data, valid_data, test_data = loader.load_all_data()
node_lut, relation_lut, time_lut = loader.load_all_lut()

processor = EVENTKG2MProcessor(node_lut, relation_lut, time_lut,
				reprocess=True,
				type=True, time=False, description=False, path=False,
				time_unit="year",
				pretrain_model_name="roberta-base", token_len=10,
				path_len=10)
train_dataset = processor.process(train_data)
valid_dataset = processor.process(valid_data)
test_dataset = processor.process(test_data)
node_lut, relation_lut, time_lut = processor.process_lut()
						
train_sampler = RandomSampler(train_dataset)
valid_sampler = RandomSampler(valid_dataset)
test_sampler = RandomSampler(test_dataset)
						
model = TransE(entity_dict_len=len(node_lut),
		relation_dict_len=len(relation_lut),
		embedding_dim=50)
						
loss = MarginLoss(margin=1.0, C=0)
						
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0)
						
metric = Link_Prediction(link_prediction_raw=True,
			 link_prediction_filt=False,
			 batch_size=5000000,
			 reverse=False)
						
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
	        optimizer, mode='min', patience=3, threshold_mode='abs', threshold=5,
			factor=0.5, min_lr=1e-9, verbose=True)
						
negative_sampler = UnifNegativeSampler(triples=train_dataset,
					entity_dict_len=len(node_lut),
					relation_dict_len=len(relation_lut))
						
trainer = Trainer(
	train_dataset=train_dataset,
	valid_dataset=valid_dataset,
	train_sampler=train_sampler,
	valid_sampler=valid_sampler,
	model=model,
	loss=loss,
	optimizer=optimizer,
	negative_sampler=negative_sampler,
	device=device,
	output_path="../dataset",
	lookuptable_E=node_lut,
	lookuptable_R=relation_lut,
	metric=metric,
	lr_scheduler=lr_scheduler,
	log=True,
	trainer_batch_size=100000,
	epoch=3000,
	visualization=1,
	apex=True,
	dataloaderX=True,
	num_workers=4,
	pin_memory=True,
	metric_step=200,
	save_step=200,
	metric_final_model=True,
	save_final_model=True,
	load_checkpoint=None)
trainer.train()
						
evaluator = Evaluator(
	test_dataset=test_dataset,
	test_sampler=test_sampler,
	model=model,
	device=device,
	metric=metric,
	output_path="../dataset",
	train_dataset=train_dataset,
	valid_dataset=valid_dataset,
	lookuptable_E=node_lut,
	lookuptable_R=relation_lut,
	log=True,
	evaluator_batch_size=50000,
	dataloaderX=True,
	num_workers=4,
	pin_memory=True,
	trained_model_path=None)
evaluator.evaluate()