Skip to content

Commit

Permalink
initial location and gold standard import utilities
Browse files Browse the repository at this point in the history
Import the gold standard and location with sample CSV with headers

#14

#12
  • Loading branch information
Sourbiebie committed Jan 11, 2022
1 parent 2639224 commit 8204c8a
Show file tree
Hide file tree
Showing 4 changed files with 321 additions and 0 deletions.
249 changes: 249 additions & 0 deletions back-end/www/models/model_operations/location_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
"""Functions to operate the location table."""

import datetime
import random

from sqlalchemy.sql.expression import null, true
from sqlalchemy import func
from models.model import db
from models.model import Location
from models.model import Answer

DEBUG = False
def dbprint(*values: object):
"""
print for debugging purpose. Enabled by DEBUG flag.
"""
if DEBUG:
print(values)


def create_location(factory_id):
"""
Create a location.
Parameters
----------
factory_id : str
ID (uuid) provided by importing from the factory table
Returns
-------
location : Location
The newly created location.
Raises
------
exception : Exception
When photo year is not an integer
"""
location = Location(factory_id=factory_id)

db.session.add(location)
db.session.commit()

return location


def get_location_by_id(location_id):
"""
Get a location by the location id.
Parameters
----------
location_id : int
ID of the location.
Returns
-------
location : Location
The retrieved location object.
"""
location = Location.query.filter_by(id=location_id).first()
return location


def get_location_by_factory_id(factory_id):
"""
Get a location by the factory id.
Parameters
----------
factory_id : int
ID of the factory.
Returns
-------
location : Location
The retrieved location object.
"""
location = Location.query.filter_by(factory_id=factory_id).first()
return location


def set_location_done(location_id, is_done):
"""
Set the current time to done_at to mark it's done.
Parameters
----------
location_id : int
ID of the location.
is_done : bool
Set done or not done.
Returns
-------
location : Location
The retrieved location object.
Raises
------
exception : Exception
-is_done is not bool
-When no location is found.
"""
if(not isinstance(is_done, bool)):
raise Exception("is_done shall be bool")

location = Location.query.filter_by(id=location_id).first()

if location is None:
raise Exception("No location found in the database to update.")

if(is_done):
location.done_at = datetime.datetime.now()
else:
location.done_at = None

db.session.commit()
return location


def remove_location(location_id):
"""
Remove a location.
Parameters
----------
location_id : int
ID of the location.
Raises
------
exception : Exception
When no location is found.
"""
location = Location.query.filter_by(id=location_id).first()

if location is None:
raise Exception("No location found in the database to delete.")

db.session.delete(location)
db.session.commit()


def get_locations(size, gold_standard_size):
"""
Get specified number of locations,
Parameters
----------
size : int
Total number of locations to be returned.
gold_standard_size : int
Within size, the number of locations which includes gold standard answers
Returns
-------
locations : list of Locations
The list of retrieved location objects.
Raises
------
exception : Exception when either
- size and gold_standard are not integers, or < 0
- gold standard size exceeds size
- Cannot find #gold_standard_size of locations which have gold standards
- Cannot find #size of locations
"""
if(not isinstance(gold_standard_size, int)):
raise Exception("The gold_standard_size shall be an integer")
if(not isinstance(size, int)):
raise Exception("The gold_standard_size shall be an integer")
if(size < 0):
raise Exception("The size must be greater or equal to 0.")
if(gold_standard_size < 0):
raise Exception("The gold_standard_size must be greater or equal to 0.")
if gold_standard_size > size:
raise Exception("The gold standard size cannot exceed size.")

if(size==0):
return None

# get locations which has gold answers
gold_answers_filter = Answer.query.filter(Answer.is_gold_standard)
gold_location_list = [loc.location_id for loc in gold_answers_filter.distinct(Answer.location_id).all()]

if(len(gold_location_list) < gold_standard_size):
err_rstring = "Cannot find expected(enough) amount of locations which have gold standards :{}. {} are found.".format(gold_standard_size, len(gold_location_list))
raise Exception(err_rstring)

gold_location_filter = Location.query.filter(Location.id.in_(gold_location_list))

# get locations which has no gold answers
non_gold_locations_filter = Location.query.filter(Location.id.not_in(gold_location_list))

sel_gold_location_list = None
sel_non_gold_location_list = None

# randomly sort and select the first locations which has been provideded gold answers
if gold_standard_size > 0:
rand_gold_location_list = gold_location_filter.order_by(func.random()).all()
dbprint("Got {} gold rand_gold_location_list.".format(len(rand_gold_location_list)))

sel_gold_location_list = rand_gold_location_list[0:gold_standard_size]
dbprint("sel_gold_location_list : ", sel_gold_location_list)

# randomly sort and select the first locations which doesn't have gold answers
if size > gold_standard_size:
rand_none_gold_location_list = non_gold_locations_filter.order_by(func.random()).all()
sel_non_gold_location_list = rand_none_gold_location_list[0:(size - gold_standard_size)]
dbprint("sel_non_gold_location_list : ", sel_non_gold_location_list)

location_list = sel_non_gold_location_list
if(sel_gold_location_list is not None):
location_list += sel_gold_location_list

if(len(location_list) < size):
raise Exception("Cannot find expected amount of locations", size)

random.shuffle(location_list)

return location_list


def get_location_is_done_count():
"""
Get the count of locations which have been labled done
Returns
-------
count : the amount of locations which have been labled done
"""
# Create an exclusive filter to get locations which have done_at date
location_query = Location.query.filter(Location.done_at.isnot(None))
count = location_query.count()
return count


def get_location_count():
"""
Get the total number of locations in the db
Returns
-------
count : the total number of locations
"""
count = Location.query.count()
return count
1 change: 1 addition & 0 deletions back-end/www/util/api.factory_100.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
deleted_at,id,display_number,lat,lng,landcode,towncode,townname,sectcode,sectname,name,factory_type,before_release,source,cet_review_status,cet_report_status,building_status,usage_status,highlight_category,cet_reviewer,status_time,created_at,updated_at
1 change: 1 addition & 0 deletions back-end/www/util/gold_answers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
factory_ID,year_new,year_old,land_usage,expansion
70 changes: 70 additions & 0 deletions back-end/www/util/import_gold_standards_from_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
The script loads the CSV and import the data into the answer table as gold standards.
Config
------
CSV_FILE_NAME : The CSV file to be import. Ensure the IDs are in row 1 (index from 0)
CFG_NAME : The config name can be Develpment, Staging, Testing
Output
------
The total location numbers after import.
"""
CSV_FILE_NAME = "gold_answers.csv"
CFG_NAME = "config.config.DevelopmentConfig"

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import csv
from models.model import db
from models.model_operations import location_operations
from models.model_operations import answer_operations
from models.model_operations import user_operations
from config.config import Config
from flask import Flask
from controllers import root

# init db
app = Flask(__name__)
app.register_blueprint(root.bp)
app.config.from_object(CFG_NAME)
db.init_app(app)
app.app_context().push()

# If need to re-create the tables:
#db.drop_all()
#db.create_all()

admin_id = 0
ans_count = 0
u1 = user_operations.create_user("1117")
# open file for reading
with open(CSV_FILE_NAME) as csvDataFile:

# read file as csv file
csvReader = csv.reader(csvDataFile)

# Skip the first row of the field name
next(csvReader)

# for every row, insert the id(row 1) into the location table
for row in csvReader:
if row[0] is None:
break
location = location_operations.get_location_by_factory_id(row[0])
if location is not None:
print("location_id is: {}".format(row[0]))
answer = answer_operations.create_answer(u1.id, location.id, int(row[2]), int(row[1]), "", int(row[3]), int(row[4]), 0)
ans_count = ans_count + 1
else:
print("Cannot insert {}".format(row[0]))


print("Insert {} gold standards. ".format(ans_count))
total_ans_count = answer_operations.get_gold_answer_count()
print("Total gold standard cout is : {} ".format(total_ans_count))

db.session.remove()
db.session.close()

1 comment on commit 8204c8a

@Sourbiebie
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I liked to the wrong issues. It should be
#13

#10

Please sign in to comment.