From 73879b3b2a498408f48e86d3d0a2127fad04274d Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 27 Oct 2020 20:23:12 +0100 Subject: [PATCH 1/7] Switch to src/ layout and make explicit imports --- {codes => src/query2box}/dataloader.py | 0 {codes => src/query2box}/model.py | 3 ++- {codes => src/query2box}/run.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) rename {codes => src/query2box}/dataloader.py (100%) rename {codes => src/query2box}/model.py (98%) rename {codes => src/query2box}/run.py (99%) diff --git a/codes/dataloader.py b/src/query2box/dataloader.py similarity index 100% rename from codes/dataloader.py rename to src/query2box/dataloader.py diff --git a/codes/model.py b/src/query2box/model.py similarity index 98% rename from codes/model.py rename to src/query2box/model.py index f44d1a5..6d4bb40 100644 --- a/codes/model.py +++ b/src/query2box/model.py @@ -10,10 +10,11 @@ import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader -from dataloader import * +from query2box.dataloader import * import random import pickle import math + def Identity(x): return x diff --git a/codes/run.py b/src/query2box/run.py similarity index 99% rename from codes/run.py rename to src/query2box/run.py index f293b5a..118f6c0 100644 --- a/codes/run.py +++ b/src/query2box/run.py @@ -15,8 +15,8 @@ from torch.utils.data import DataLoader -from model import Query2box -from dataloader import * +from query2box.model import Query2box +from query2box.dataloader import * from tensorboardX import SummaryWriter import time import pickle From aa99db7ed1dd491126cb54cb7eb92664b227b245 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 27 Oct 2020 20:33:18 +0100 Subject: [PATCH 2/7] Add setup configuration and update README with installation instructions --- README.md | 17 +++++++++++++---- setup.cfg | 43 +++++++++++++++++++++++++++++++++++++++++++ setup.py | 8 ++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/README.md b/README.md index c983f16..274f7d3 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,21 @@ Official implementation of [Query2box: Reasoning over Knowledge Graphs in Vector [Hongyu Ren*](http://hyren.me), [Weihua Hu*](http://web.stanford.edu/~weihuahu/), [Jure Leskovec](https://cs.stanford.edu/people/jure/index.html), ICLR 2020. -## Requirements -``` -torch==1.2.0 -tensorboadX==1.6 +## Installation + +To install in development mode, clone from GitHub +with the following: + +```bash +git clone https://github.com/hyren/query2box +cd query2box +pip install --editable . ``` +`--editable` means that the code is symlinked into your +Python's `site-packages` so it doesn't need to be reinstalled +every time the code is changed. + ## Run To reproduce the results on FB15k, FB15k-237 and NELL, the hyperparameters are set in `example.sh`. ``` diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..29b5f87 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,43 @@ +########################## +# Setup.py Configuration # +########################## +[metadata] +name = query2box +version = 1.0.0 +long_description = file: README.md +long_description_content_type = text/markdown + +# URLs associated with the project +url = https://github.com/hyren/query2box +download_url = https://github.com/hyren/query2box/releases +project_urls = + Bug Tracker = https://github.com/hyren/query2box/issues + Source Code = https://github.com/hyren/query2box + +# Author information +author = Hongyu Ren +# author_email = ... +maintainer = Hongyu Ren +# maintainer_email = ... + +# License Information +license = MIT +license_file = LICENSE + +[options] +install_requires = + torch==1.2.0 + tensorboardX==1.6 + +# Random options +zip_safe = false +include_package_data = True +python_requires = >=3.5 + +# Where is my code +packages = find: +package_dir = + = src + +[options.packages.find] +where = src diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a78fbfd --- /dev/null +++ b/setup.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +"""Setup module.""" + +import setuptools + +if __name__ == '__main__': + setuptools.setup() From f23ed3bfc96db588252abbabe783859d50d9fc2b Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 27 Oct 2020 20:33:36 +0100 Subject: [PATCH 3/7] Use __main__ for command line usage --- example.sh | 6 +++--- src/query2box/__main__.py | 4 ++++ src/query2box/run.py | 9 ++++++--- 3 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 src/query2box/__main__.py diff --git a/example.sh b/example.sh index af5e3dd..81c288a 100755 --- a/example.sh +++ b/example.sh @@ -1,18 +1,18 @@ #!/bin/bash - CUDA_VISIBLE_DEVICES=0 python3.5 -u codes/run.py --do_train --cuda --do_valid --do_test \ + CUDA_VISIBLE_DEVICES=0 python3.5 -m query2box -u --do_train --cuda --do_valid --do_test \ --data_path data/FB15k --model BoxTransE -n 128 -b 512 -d 400 -g 24 -a 1.0 \ -lr 0.0001 --max_steps 300000 --cpu_num 1 --test_batch_size 16 --center_reg 0.02 \ --geo box --task 1c.2c.3c.2i.3i.ic.ci.2u.uc --stepsforpath 300000 --offset_deepsets inductive --center_deepsets eleattention \ --print_on_screen - CUDA_VISIBLE_DEVICES=1 python3.5 -u codes/run.py --do_train --cuda --do_valid --do_test \ + CUDA_VISIBLE_DEVICES=1 python3.5 -m query2box -u --do_train --cuda --do_valid --do_test \ --data_path data/FB15k-237 --model BoxTransE -n 128 -b 512 -d 400 -g 24 -a 1.0 \ -lr 0.0001 --max_steps 300000 --cpu_num 1 --test_batch_size 16 --center_reg 0.02 \ --geo box --task 1c.2c.3c.2i.3i.ic.ci.2u.uc --stepsforpath 300000 --offset_deepsets inductive --center_deepsets eleattention \ --print_on_screen - CUDA_VISIBLE_DEVICES=2 python3.5 -u codes/run.py --do_train --cuda --do_valid --do_test \ + CUDA_VISIBLE_DEVICES=2 python3.5 -m query2box -u --do_train --cuda --do_valid --do_test \ --data_path data/NELL --model BoxTransE -n 128 -b 512 -d 400 -g 24 -a 1.0 \ -lr 0.0001 --max_steps 300000 --cpu_num 1 --test_batch_size 16 --center_reg 0.02 \ --geo box --task 1c.2c.3c.2i.3i.ic.ci.2u.uc --stepsforpath 300000 --offset_deepsets inductive --center_deepsets eleattention \ diff --git a/src/query2box/__main__.py b/src/query2box/__main__.py new file mode 100644 index 0000000..df412a6 --- /dev/null +++ b/src/query2box/__main__.py @@ -0,0 +1,4 @@ +from query2box.run import main + +if __name__ == '__main__': + main() diff --git a/src/query2box/run.py b/src/query2box/run.py index 118f6c0..0097783 100644 --- a/src/query2box/run.py +++ b/src/query2box/run.py @@ -172,8 +172,11 @@ def log_metrics(mode, step, metrics): ''' for metric in metrics: logging.info('%s %s at step %d: %f' % (mode, metric, step, metrics[metric])) - -def main(args): + +def main(): + main_helper(parse_args()) + +def main_helper(args): set_global_seed(args.seed) args.test_batch_size = 1 assert args.bn in ['no', 'before', 'after'] @@ -933,4 +936,4 @@ def evaluate_train(): if __name__ == '__main__': - main(parse_args()) \ No newline at end of file + main() \ No newline at end of file From 405d0eaf146e2c061a9fad0b3d2916d930908512 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 13:07:20 +0100 Subject: [PATCH 4/7] Update .gitignore --- .gitignore | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4983e60..a59e560 100755 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,107 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.DS_Store + logs -codes/__pycache__ \ No newline at end of file From 78b97cbdcd0048d4d794a2005438e9e1e2b3721b Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 13:07:44 +0100 Subject: [PATCH 5/7] Add CLI --- README.md | 13 +++++++++++++ example.sh | 6 +++--- setup.cfg | 11 +++++++---- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 274f7d3..e946b4b 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,19 @@ pip install --editable . Python's `site-packages` so it doesn't need to be reinstalled every time the code is changed. +## Command Line Interface + +The `query2box` command line interface is installed automatically. It can be used +like in the following: + +```bash +$ CUDA_VISIBLE_DEVICES=0 query2box --do_train --cuda --do_valid --do_test \ + --data_path data/FB15k --model BoxTransE -n 128 -b 512 -d 400 -g 24 -a 1.0 \ + -lr 0.0001 --max_steps 300000 --cpu_num 1 --test_batch_size 16 --center_reg 0.02 \ + --geo box --task 1c.2c.3c.2i.3i.ic.ci.2u.uc --stepsforpath 300000 --offset_deepsets inductive \ + --center_deepsets eleattention --print_on_screen +``` + ## Run To reproduce the results on FB15k, FB15k-237 and NELL, the hyperparameters are set in `example.sh`. ``` diff --git a/example.sh b/example.sh index 81c288a..2a4a5b5 100755 --- a/example.sh +++ b/example.sh @@ -1,18 +1,18 @@ #!/bin/bash - CUDA_VISIBLE_DEVICES=0 python3.5 -m query2box -u --do_train --cuda --do_valid --do_test \ + CUDA_VISIBLE_DEVICES=0 query2box --do_train --cuda --do_valid --do_test \ --data_path data/FB15k --model BoxTransE -n 128 -b 512 -d 400 -g 24 -a 1.0 \ -lr 0.0001 --max_steps 300000 --cpu_num 1 --test_batch_size 16 --center_reg 0.02 \ --geo box --task 1c.2c.3c.2i.3i.ic.ci.2u.uc --stepsforpath 300000 --offset_deepsets inductive --center_deepsets eleattention \ --print_on_screen - CUDA_VISIBLE_DEVICES=1 python3.5 -m query2box -u --do_train --cuda --do_valid --do_test \ + CUDA_VISIBLE_DEVICES=1 query2box --do_train --cuda --do_valid --do_test \ --data_path data/FB15k-237 --model BoxTransE -n 128 -b 512 -d 400 -g 24 -a 1.0 \ -lr 0.0001 --max_steps 300000 --cpu_num 1 --test_batch_size 16 --center_reg 0.02 \ --geo box --task 1c.2c.3c.2i.3i.ic.ci.2u.uc --stepsforpath 300000 --offset_deepsets inductive --center_deepsets eleattention \ --print_on_screen - CUDA_VISIBLE_DEVICES=2 python3.5 -m query2box -u --do_train --cuda --do_valid --do_test \ + CUDA_VISIBLE_DEVICES=2 query2box --do_train --cuda --do_valid --do_test \ --data_path data/NELL --model BoxTransE -n 128 -b 512 -d 400 -g 24 -a 1.0 \ -lr 0.0001 --max_steps 300000 --cpu_num 1 --test_batch_size 16 --center_reg 0.02 \ --geo box --task 1c.2c.3c.2i.3i.ic.ci.2u.uc --stepsforpath 300000 --offset_deepsets inductive --center_deepsets eleattention \ diff --git a/setup.cfg b/setup.cfg index 29b5f87..e6728ac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,8 +17,6 @@ project_urls = # Author information author = Hongyu Ren # author_email = ... -maintainer = Hongyu Ren -# maintainer_email = ... # License Information license = MIT @@ -26,8 +24,9 @@ license_file = LICENSE [options] install_requires = - torch==1.2.0 - tensorboardX==1.6 + torch + tensorboardX + tqdm # Random options zip_safe = false @@ -41,3 +40,7 @@ package_dir = [options.packages.find] where = src + +[options.entry_points] +console_scripts = + query2box = query2box.run:main From 206b203666ed9bd8b23a502ec5e7dd38f04ad17c Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 13:08:17 +0100 Subject: [PATCH 6/7] Use tqdm for training --- src/query2box/run.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/query2box/run.py b/src/query2box/run.py index 0097783..3a1a80c 100644 --- a/src/query2box/run.py +++ b/src/query2box/run.py @@ -13,6 +13,7 @@ import numpy as np import torch +from tqdm import tqdm, trange from torch.utils.data import DataLoader from query2box.model import Query2box @@ -828,7 +829,7 @@ def evaluate_train(): else: begin_pq_step = args.max_steps - args.stepsforpath #Training Loop - for step in range(init_step, args.max_steps): + for step in trange(init_step, args.max_steps, desc='Training', unit='step', unit_scale=True): # print ("begining training step", step) # if step == 100: # exit(-1) @@ -904,7 +905,7 @@ def evaluate_train(): training_logs = [] if args.do_valid and step % args.valid_steps == 0: - logging.info('Evaluating on Valid Dataset...') + tqdm.write('Evaluating on Valid Dataset...') evaluate_val() save_variable_list = { From ab7e89d35c291041e89240e0de6b022f2fd8d573 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 23 Dec 2020 13:08:24 +0100 Subject: [PATCH 7/7] Make sure CUDA possible --- src/query2box/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query2box/run.py b/src/query2box/run.py index 3a1a80c..51c7a8f 100644 --- a/src/query2box/run.py +++ b/src/query2box/run.py @@ -504,7 +504,7 @@ def main_helper(args): num_params += np.prod(param.size()) logging.info('Parameter Number: %d' % num_params) - if args.cuda: + if args.cuda and torch.cuda.is_available(): query2box = query2box.cuda() if args.do_train: