forked from doolin/patentprocessor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
consolidate.py
executable file
·70 lines (57 loc) · 2.69 KB
/
consolidate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import sys
sys.path.append( './lib/' )
import SQLite
import datetime
import shutil
t1 = datetime.datetime.now()
print "Start", t1
##Create invpat
ip = SQLite.SQLite(db = 'invpat.sqlite3', tbl = 'invpat')
ip.c.execute("DROP TABLE IF EXISTS invpat")
ip.c.execute("""CREATE TABLE invpat(Firstname TEXT, Middlename TEXT, Lastname TEXT, Street TEXT,
City TEXT, State TEXT, Country TEXT, Zipcode TEXT, Latitude REAL,
Longitude REAL, InvSeq INT, Patent TEXT, AppYear TEXT, ApplyYear TEXT, GYear INT,
AppDate TEXT, Assignee TEXT, AsgNum INT, Class TEXT, Coauthor TEXT, Invnum TEXT,
Invnum_N TEXT, Unique_Record_ID TEXT);""")
##From inventor.sqlite3: Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude, InvSeq
ip.attach('inventor.sqlite3')
ip.c.execute("""INSERT INTO invpat (Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude, Patent, InvSeq)
SELECT Firstname, Lastname, Street, NCity, NState, NCountry, NZipcode, NLat, NLong, Patent, InvSeq FROM db.inventor_1""")
ip.detach()
##From patent.sqlite3: Patent, AppYear, GYear, AppDate
ip.attach('patent.sqlite3')
ip.merge(key = ['AppYear', 'GYear', 'AppDate'], on= ['Patent'], tableFrom = 'patent', db = 'db')
ip.detach()
##From assignee.sqlite3: Assignee, AsgNum
ip.attach('assignee.sqlite3')
ip.merge(key = [['Assignee', 'assigneeAsc'], 'AsgNum'], on = ['Patent'], tableFrom = 'assignee_1', db = 'db')
ip.detach()
##From class: class
ip.attach('class_1.sqlite3')
ip.merge(key = [['Class', 'ClassSub']], on = ['Patent'], tableFrom = 'class_1', db = 'db')
ip.detach()
ip.commit()
##Generate invnum
ip.c.execute("UPDATE invpat SET invnum = patent || '-' || invseq")
ip.c.execute("UPDATE invpat SET Invnum_N = Invnum")
ip.c.execute("UPDATE invpat SET Unique_Record_ID = Invnum")
ip.c.execute("UPDATE invpat SET Middlename = Firstname")
ip.c.execute("UPDATE invpat SET ApplyYear = AppYear")
ip.commit()
##Index invpat
ip.c.execute("CREATE INDEX asg on invpat (Assignee);")
ip.c.execute("CREATE INDEX asg2 on invpat (AsgNum);")
ip.c.execute("CREATE INDEX gyr on invpat (Gyear);")
ip.c.execute("CREATE INDEX iNidx ON invpat (Invnum_N);")
ip.c.execute("CREATE INDEX locc on invpat (City);")
ip.c.execute("CREATE INDEX loccs on invpat (City, State);")
ip.c.execute("CREATE INDEX locs on invpat (State);")
ip.c.execute("CREATE INDEX pdx ON invpat (Patent);")
ip.c.execute("CREATE INDEX pidx ON invpat (Patent, InvSeq);")
ip.commit()
ip.close()
print "Finish", datetime.datetime.now()-t1
#tables = ["assignee", "citation", "class", "inventor", "patent", "patdesc", "lawyer", "sciref", "usreldoc"]
#for table in tables:
# filename = table + ".sqlite3"
# shutil.move(filename,flder)