Skip to content

Commit

Permalink
Added code for testing model
Browse files Browse the repository at this point in the history
Added model testing code in testing_suite.py and added models in
product_roulette.py
  • Loading branch information
bezoar17 committed Feb 12, 2017
1 parent da202d3 commit 2d8f9c8
Show file tree
Hide file tree
Showing 3 changed files with 1,061 additions and 41 deletions.
147 changes: 106 additions & 41 deletions populate_db.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import sqlite3
import random
import logging
import csv

def create_db():
db=sqlite3.connect('example.db')
productids=dict()
useremailtoid=dict()
train_userids_back=dict()
test_userids_back=dict()

def create_db(db_name):

db=sqlite3.connect(db_name+'.db')
cursor=db.cursor()
#drop previous tables
cursor.execute('''DROP TABLE IF EXISTS user_info_table''')
Expand All @@ -16,50 +23,108 @@ def create_db():
cursor.execute('''CREATE TABLE user_inputs_table(s_no INTEGER PRIMARY KEY, user_id INTEGER, product_id INTEGER, input_val INTEGER)''')
db.commit()
db.close()

def populate_products():

# get products in the product_values list
global productids
reader = csv.DictReader(open('db/db_products.csv'))
product_values=list()
for row in reader:
product_values.append((row['product_name'],row['persona'],row['trending']))

def populate_val():
db=sqlite3.connect('example.db')
db=sqlite3.connect('train.db')
cursor=db.cursor()
#put in values
product_values = [('Github', 'Developer', 1),
('Bitbucket', 'Developer', 1),
('Perforce', 'Developer', 0),
('Sublime Text', 'Developer', 1),
('Atom', 'Developer', 0),
('Vim', 'Developer', 1),
('Xcode', 'Developer', 0),
('RStudio', 'Developer', 0),
('Cloud9', 'Developer', 0),
('Koding', 'Developer', 1),
('Unity', 'Developer', 0),
('ImpactJS', 'Developer', 1),
('FMOD', 'Developer', 0),
('Sidekiq Pro', 'Developer', 1),
('jQuery', 'Developer', 1),
('Underscore.js', 'Developer', 0),
('OpenGrok', 'Developer', 1),
('Source Insight', 'Developer', 0),
('FileZilla', 'Developer', 0),
('MySQL', 'Developer', 1),
('PostgreSQL', 'Developer', 0),
('Amazon RDS', 'Developer', 0),
('MongoDB', 'Developer', 1),
('Neo4j', 'Developer', 0),
('Apache Maven', 'Developer', 1),
('CMake', 'Developer', 0),
('Apache Ant', 'Developer', 1),
('Microsoft Azure', 'Developer', 1),
('Amazon EC2', 'Developer', 1),
('DigitalOcean', 'Developer', 0),
('WordPress', 'Developer', 0),
('Medium', 'Developer', 1),
('Bootstrap', 'Developer', 0),
('Redis', 'Developer', 1),]
cursor.executemany('INSERT INTO product_info_table(product_name,persona,trending) VALUES (?,?,?)', product_values)
db.commit()
db.close()

db=sqlite3.connect('test.db')
cursor=db.cursor()
cursor.executemany('INSERT INTO product_info_table(product_name,persona,trending) VALUES (?,?,?)', product_values)
db.commit()
cursor.execute('SELECT product_id,product_name FROM product_info_table')
for i in cursor.fetchall():
productids[i[1]]=i[0]
db.commit()
db.close()

def populate_trts():

# populate the train and test db ,the user info and their likes go in 80:20 fashion
global productids,useremailtoid,train_userids_back,test_userids_back

reader = csv.DictReader(open('db/db_userinfo.csv'))
all_users=list()

for row in reader:
all_users.append((row['user_id'],row['user_email'],row['persona']))
useremailtoid[row['user_email']]=row['user_id']
# all_users_safe=all_users
random.shuffle(all_users) # comment this line for a fixed simulation of first 15 in training and last 5 in test set.
train_userids=[i[0] for i in all_users[0:15]]
test_userids=[i[0] for i in all_users[15:]]

db=sqlite3.connect('train.db')
cursor=db.cursor()
cursor.executemany('INSERT INTO user_info_table(email_id,persona) VALUES (?,?)',[i[1:] for i in all_users[0:15]])
db.commit()
db.close()

db=sqlite3.connect('test.db')
cursor=db.cursor()
cursor.executemany('INSERT INTO user_info_table(email_id,persona) VALUES (?,?)', [i[1:] for i in all_users[15:]])
db.commit()
db.close()

db=sqlite3.connect('train.db')
cursor=db.cursor()
cursor.execute('SELECT user_id,email_id FROM user_info_table')

for i in cursor.fetchall():
train_userids_back[useremailtoid[i[1]]]=i[0]
db.commit()
db.close()

db=sqlite3.connect('test.db')
cursor=db.cursor()
cursor.execute('SELECT user_id,email_id FROM user_info_table')

for i in cursor.fetchall():
test_userids_back[useremailtoid[i[1]]]=i[0]
db.commit()
db.close()

reader = csv.DictReader(open('db/db_userinputs.csv'))
train_userinputs=list()
test_userinputs=list()

for row in reader:
if row['user_id'] in train_userids:
train_userinputs.append((train_userids_back[row['user_id']],productids[row['product_name']],row['user_input']))
elif row['user_id'] in test_userids:
test_userinputs.append((test_userids_back[row['user_id']],productids[row['product_name']],row['user_input']))

# push the datasets

db=sqlite3.connect('train.db')
cursor=db.cursor()
cursor.executemany('INSERT INTO user_inputs_table(user_id,product_id,input_val) VALUES (?,?,?)', train_userinputs)
db.commit()
db.close()

db=sqlite3.connect('test.db')
cursor=db.cursor()
cursor.executemany('INSERT INTO user_inputs_table(user_id,product_id,input_val) VALUES (?,?,?)', test_userinputs)
db.commit()
db.close()

def start():
create_db('train')
create_db('test')
populate_products()
populate_trts()

if __name__ == '__main__':
create_db()
populate_val()
start()

Loading

0 comments on commit 2d8f9c8

Please sign in to comment.