-
Notifications
You must be signed in to change notification settings - Fork 0
/
convertmysqldump.py
117 lines (99 loc) · 4.5 KB
/
convertmysqldump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Converts tables defined as MyISAM in to InnoDB in a
MySQLdump file.
This requires a script, rather than simply using a command
line tool like sed or awk. This is because both these tools
operate on a stream and line basis, and have no knowledge
of lines which have gone before. Plus, of course, awk
and sed don't exist in the Microsoft world.
The reason we can't use (say) a very simple sed script
is that although we may wish to convert our own tables from
MyISAM to InnoDB there are some tables in the default
MySQL schema which we should leave well alone.
"""
import logging
import argparse
import os.path
import sys
import re
def ProcessArguments():
"""
Process the command line arguments returning an argparse namepsace
or None if the parse didn't work.
"""
parser = argparse.ArgumentParser(description='Convert MySQLdump MyISAM tables to InnoDB')
parser.add_argument('--verbose', help='Verbose output', action='store_true')
parser.add_argument('--force', help='Overwrite existing output file', action = 'store_true')
parser.add_argument('--encoding',help='Character encoding scheme', default = 'utf-8')
parser.add_argument('input', help='Input MySQLdump file')
parser.add_argument('output', help='Output MySQLdump file')
try:
args = parser.parse_args()
except SystemExit:
print('Error processing command line arguments')
sys.exit()
return args
def SetupLogging(verbose):
"""
Set up the logging options. Done in a separate procedure for the sake
of neatness and readabilty than for any other reason.
"""
if verbose:
level = logging.DEBUG
else:
level = logging.INFO
logging.basicConfig(level = level, format='%(asctime)s:%(levelname)-7s: %(message)s')
def ProcessFiles(infile, outfile, excluded):
"""
Process the input file writing the output as we go. We do this line
by line as the dump file might be too masive to fit comfortably in
memory. Eeh. It's just like the old days of having twin tape drives.
Excluded is a list of databases we DO NOT want to convert from
MyISAM to InnoDB
"""
database_re = re.compile(r'(^CREATE DATABASE.*`)(\w+)(`.*$)', re.IGNORECASE)
table_re = re.compile(r'(^CREATE TABLE.*`)(\w+)(`.*$)', re.IGNORECASE)
myisam_re = re.compile(r'MyISAM', re.IGNORECASE)
database = None # Name of the most recently discovered DB
isexcluded = True # Is the database in the excluded list
preserved = 0 # Number of lines preserved
changed = 0 # Number of lines changed
for n, line in enumerate(infile):
founddb = database_re.search(line)
if founddb:
database = founddb.groups()[1]
isexcluded = database.upper() in excluded
logging.info('Line {:,}: Found database {:} (excluded = {})'.format(
n+1, database, 'Yes' if isexcluded else 'No'))
foundtable = table_re.search(line)
if foundtable:
table = foundtable.groups()[1]
logging.info('Line {:,}: Found table {:}'.format(
n+1, table))
if not isexcluded and myisam_re.search(line):
newline = myisam_re.sub('InnoDB', line)
logging.info('Line {:,}: Changing {}.{} `{}` -> `{}`'.format(n+1,
database, table, line.rstrip(), newline.rstrip()))
outfile.write(newline)
changed +=1
else:
outfile.write(line)
preserved += 1
logging.info('Read {:,} lines. {:,} lines changed, {:,} lines preserved'.format(n+1, changed, preserved))
if __name__ == '__main__':
args = ProcessArguments()
SetupLogging(args.verbose)
logging.debug('Converting {} to {}. Forcible = {}'.format(
args.input, args.output, 'Yes' if args.force else 'No'))
excluded = ['mysql'] # Default list of databases which we don't change
# Open input for reading...
if os.path.exists(args.output) and not args.force:
logging.error('File {} already exists. (Use the --force option, Luke)'.format(
args.output))
else:
with open(args.input, encoding = args.encoding) as infile, open(args.output, 'w', encoding = args.encoding) as outfile:
logging.debug('Opened {} for reading'.format(args.input))
logging.debug('Opened {} for writing'.format(args.output))
ProcessFiles(infile, outfile, [e.upper() for e in excluded])