1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516 | # Author: limodou (limodou@gmail.com)
#
# This tool is used for dump and reload data from and into database
# You can see the help info through:
#
# python db_dump.py -h
#
# For now, it only support .py format, so the output result will
# be saved as python source code, and you can import it.
#
# Version 2.2 2007-11-01
# * improve postgresql sequence field process, thanks oyvind.saltvik@gmail.com and Matthew Wensing
# * add errorquit option in command line
#
# Version 2.1 2007-09-18
# * add Time type support
#
# Version 2.0 2007-09-11
# * refact, and add aoto reset postgres sequence, thanks Eric SIMORRE
#
# Version 1.9 2007-09-02 (Merge from RichardH)
# * Adds try-except to catch the changes in db.backend refactoring in
# svn version. So db_dump.py can support old version except trunk.
#
# Version 1.8 2007-08-30
# * Fix backend.quote_name to backend.DatabaseOperations().quote_name
# Thanks to richardh
#
# Version 1.7 2007-05-28
# * keep up with the change of GenericRel, so you can use db_dump.py
# in trunk and version before 0.97
#
# Version 1.6 2007-04-09
# * Add float support
#
# Version 1.5 2007-02-08
# * If the filename is not exists, then skip it
#
# Version 1.4 2007-01-21
# * support mysql
#
# Version 1.3 2007-01-20
# * change the output format of data file, and improve the process
# effective of dumpping and loading
#
# Version 1.2 2007-01-20
# * change dumpdb to use model info but not cursor.description,
# because some database backend does not support cursor.description
#
# Version 1.1 2007-01-19
# * if no arguments after db_dump.py, then it'll show help infomation
#
# Version 1.0 2007-01-18
#
import os, sys
import datetime
import decimal
from optparse import OptionParser
quote_flag = None
def _get_table_order(app_labels):
from django.db.models import get_app, get_apps, get_models
from django.db.models import ForeignKey, OneToOneField
if not app_labels:
app_list = get_apps()
else:
app_list = [get_app(app_label) for app_label in app_labels]
models = {}
for app in app_list:
for model in get_models(app):
models[model._meta.db_table] = model
s = []
rules = []
def order(s, rule):
a, b = rule
try:
i = s.index(a)
try:
j = s.index(b)
if j<i:
del s[i]
s.insert(j, a)
except:
s.append(b)
except:
s.append(a)
try:
j = s.index(b)
del s[j]
s.append(b)
except:
s.append(b)
for i, table in enumerate(models.keys()[:]):
for field in models[table]._meta.fields:
if isinstance(field, (ForeignKey, OneToOneField)):
tname = field.rel.to._meta.db_table
if not models.has_key(tname) or tname == table:
continue
rules.append((tname, table))
order(s, (tname, table))
n = []
for k, v in models.items():
if s.count(k) == 0:
n.append(k)
return [models[k] for k in s+n]
def _find_key(d, key):
if not d:
return None
for k, v in d.items()[:]:
if k == key:
return d
else:
result = _find_key(v, key)
if result is not None:
return result
def loaddb(app_labels, format, options):
from django.db import connection, transaction
if options.verbose:
print "Begin to load data for %s format...\n" % format
models = _get_table_order(app_labels)
cursor = connection.cursor()
errornum = 0
if not options.remain and not options.stdout:
m = models[:]
m.reverse()
for model in m:
cursor.execute('DELETE FROM %s WHERE 1=1;' % quote_name(model._meta.db_table))
for table, fields in get_model_many2many_stru(model):
cursor.execute('DELETE FROM %s WHERE 1=1;' % quote_name(table))
success = True
for model in models:
try:
load_model(cursor, model, format, options)
setSequence(cursor, model)
for table, fields in get_model_many2many_stru(model):
load_model(cursor, (table, fields), format, options)
setSequence(cursor, model)
except Exception, e:
import traceback
traceback.print_exc()
sys.stderr.write("Problem loading %s format '%s' : %s\n" % (format, model, str(e)))
success = False
errornum += 1
if options.errorquit:
transaction.rollback_unless_managed()
print "Error found! The database has been rollbacked!"
raise
if success:
transaction.commit_unless_managed()
else:
transaction.rollback_unless_managed()
if errornum:
print "There are %d errors found! The database has been rollbacked!" % errornum
else:
print "Successful!"
def load_model(cursor, model, format, options):
datadir, verbose, stdout = options.datadir, options.verbose, options.stdout
sql = 'INSERT INTO %s (%s) VALUES (%s);'
if isinstance(model, (tuple, list)):
filename = os.path.join(datadir, model[0] + '.%s' % format)
fields, default = model[1], {}
else:
opts = model._meta
filename = os.path.join(datadir, opts.db_table + '.%s' % format)
fields, default = get_model_stru(model)
if verbose:
print '..Dealing %s for %s format...\n' % (filename, format)
if not os.path.exists(filename):
if verbose:
print '..%s does not exists, so Skip it..\n' % filename
return
try:
objs = {}
if format == 'py':
s = []
f = file(filename, 'rb')
for line in f:
varname = line.split('=')[0]
if varname.strip() != 'records':
s.append(line)
else:
d = {}
exec ''.join(s) in d
objs['table'] = d.get('table', '')
objs['fields'] = d.get('fields', [])
objs['default'] = d.get('default', {})
objs['records'] = f
break
else:
# f = file(filename, 'rb')
# objs = f.read()
# records = objs['records']
# f.close()
raise 'Not support this format %s' % format
fs = objs['fields']
table = objs['table']
default.update(objs.get('default', {}))
count = 0
for row in objs["records"]:
if row.strip() == ']':
break
row = eval(row)
d = dict(zip(fs, row))
sql_fields = []
sql_values = []
for fd in fields:
v = None
if d.has_key(fd):
v = d[fd]
else:
if default.get(fd, None) is not None:
kind, value = default[fd]
if not kind or kind == 'value':
v = value
elif kind == 'reference':
try:
v = d[value]
except KeyError:
sys.stderr.write("Referenced field [%s] does not exist\n" % value)
raise
elif kind == 'date':
if not value or value == 'now':
v = datetime.date.today().strftime('%Y-%m-%d')
else:
v = value
#add time support
elif kind == 'time':
if not value or value == 'now':
v = datetime.datetime.now().strftime('%H:%M:%S')
else:
v = value
elif kind == 'datetime':
if not value or value == 'now':
v = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
else:
v = value
else:
raise Exception, "Cann't support this default type [%s]\n" % kind
if v is not None:
sql_fields.append(fd)
sql_values.append(v)
e_sql = sql % (quote_name(table),
','.join(map(quote_name, sql_fields)), ','.join(['%s'] * len(sql_fields)))
if stdout:
print e_sql, sql_values, '\n'
else:
try:
cursor.execute(e_sql, sql_values)
count += 1
except:
sys.stderr.write("Error sql: %s %s\n" % (e_sql, sql_values))
raise
if verbose:
print '(Total %d records)\n' % count
except Exception, e:
import traceback
traceback.print_exc()
sys.stderr.write("Problem loading %s format '%s' : %s\n" %
(format, filename, str(e)))
raise
def get_model_stru(model):
from django.db.models.fields import DateField, DateTimeField, TimeField, IntegerField
fields = []
default = {}
for f in model._meta.fields:
fields.append(f.column)
v = f.get_default()
if v is not None:
default[f.column] = ('value', v)
if isinstance(f, (DateTimeField, DateField, TimeField)):
if f.auto_now or f.auto_now_add:
v = datetime.datetime.now()
default[f.column] = ('value', f.get_db_prep_save(v))
# Need to fix sqlite defaulting None values to ''
if isinstance(f, IntegerField):
default[f.column] = ('value', None)
return fields, default
def get_model_many2many_stru(model):
try:
from django.db.models import GenericRel
except:
from django.contrib.contenttypes.generic import GenericRel
opts = model._meta
for f in opts.many_to_many:
fields = []
if not isinstance(f.rel, GenericRel):
fields.append('id')
fields.append(f.m2m_column_name())
fields.append(f.m2m_reverse_name())
yield f.m2m_db_table(), fields
def dumpdb(app_labels, format, options):
from django.db.models import get_app, get_apps, get_models
datadir, verbose, stdout = options.datadir, options.verbose, options.stdout
if verbose:
print "Begin to dump data for %s format...\n" % format
if len(app_labels) == 0:
app_list = get_apps()
else:
app_list = [get_app(app_label) for app_label in app_labels]
if not os.path.exists(datadir):
os.makedirs(datadir)
errornum = 0
for app in app_list:
for model in get_models(app):
try:
write_result(dump_model(model), format, options)
for result in dump_many2many(model):
write_result(result, format, options)
except Exception, e:
import traceback
traceback.print_exc()
sys.stderr.write("Unable to dump database: %s\n" % e)
errornum += 1
if options.errorquit:
raise
if errornum:
print "There are %d errors found!" % errornum
else:
print "Successful!"
def dump_model(model):
from django.db import connection
opts = model._meta
cursor = connection.cursor()
fields, default = get_model_stru(model)
cursor.execute('select %s from %s' %
(','.join(map(quote_name, fields)), quote_name(opts.db_table)))
return call_cursor(opts.db_table, fields, cursor)
def call_cursor(table, fields, cursor):
yield table
yield fields
while 1:
rows = cursor.fetchmany(100)
if rows:
for row in rows:
yield _pre_data(row)
else:
break
def _pre_data(row):
row = list(row)
for i, fd in enumerate(row):
if isinstance(fd, datetime.datetime):
row[i] = row[i].strftime('%Y-%m-%d %H:%M:%S') # + '.' + str(row[i].microsecond).rstrip('0')
elif isinstance(fd, datetime.date):
row[i] = row[i].strftime('%Y-%m-%d')
elif isinstance(fd, datetime.time):
row[i] = row[i].strftime('%H:%M:%S')
elif isinstance(fd, decimal.Decimal):
row[i] = row[i].__float__()
return row
def dump_many2many(model):
from django.db import connection
cursor = connection.cursor()
for table, fields in get_model_many2many_stru(model):
cursor.execute('select %s from %s' %
(','.join(map(quote_name, fields)), quote_name(table)))
yield call_cursor(table, fields, cursor)
def write_result(result, format, options):
table = result.next()
fields = result.next()
filename = os.path.join(options.datadir, table + '.%s' % format)
if options.verbose:
print '..Dumping %s ...\n' % filename
if not options.stdout:
f = file(filename, 'wb')
else:
f = sys.stdout
print >>f, 'table = %r' % table
print >>f, 'fields = %r' % fields
print >>f, '#default item format: "fieldname":("type", "value")'
print >>f, 'default = {}'
print >>f, 'records = ['
i = 0
for t in result:
print >>f, repr(t)
i += 1
print >>f, ']'
if options.verbose:
print '(Total %d records)\n' % i
if not options.stdout:
f.close()
def quote_name(s):
from django.db import backend
if quote_flag == 'old':
return backend.quote_name(s)
else:
return backend.DatabaseOperations().quote_name(s)
#thanks for Matthew Wensin
def setSequence(cursor, model):
from django.conf import settings
from django.db.models import AutoField
# postgresql: reset sequence
if settings.DATABASE_ENGINE in ('postgresql_psycopg2', 'postgresql'):
autofields = [field for field in model._meta.fields if isinstance(field, AutoField)]
for f in autofields:
seq = quote_name('%s_%s_seq' % (model._meta.db_table, f.name))
cursor.execute("SELECT nextval('%s');" % seq)
nb = cursor.fetchall()[0][0]
if nb:
cursor.execute('ALTER SEQUENCE %s RESTART WITH %d;' % (seq, nb))
def get_usage():
usage = """
%prog [options] action [applist]:
action: dump load
"""
return usage
def execute_from_command_line(argv=None):
# Use sys.argv if we've not passed in a custom argv
if argv is None:
argv = sys.argv
# Parse the command-line arguments. optparse handles the dirty work.
parser = OptionParser(usage=get_usage())
parser.add_option('--settings',
help='Python path to settings module, e.g. "myproject.settings.main". If this isn\'t provided, the DJANGO_SETTINGS_MODULE environment variable will be used.')
parser.add_option('-d', '--dir', help='Output/Input directory.', default="datadir", dest="datadir")
# parser.add_option('-f', '--format', help='Data format(json, xml, python).', type="choice",
# choices=['json', 'xml', 'python'], default='json')
parser.add_option('-v', '--verbose', help='Verbose mode', action='store_true')
parser.add_option('-s', '--stdout', help='Output the data to stdout', action='store_true')
parser.add_option('-r', '--remain', help='Remain the records of the tables, default will delete all the records. Only used for loading.', action='store_true')
parser.add_option('-e', '--errorquit', help='If there are errors occured, then exit the program.', action='store_true')
options, args = parser.parse_args(argv[1:])
if len(args) == 0:
parser.print_help()
sys.exit(0)
action = args[0]
apps = args[1:]
if options.settings:
os.environ['DJANGO_SETTINGS_MODULE'] = options.settings
else:
from django.core.management import setup_environ
try:
import settings
except ImportError:
print "You don't appear to have a settings file in this directory!"
print "Please run this from inside a project directory"
sys.exit()
setup_environ(settings)
global quote_flag
import django.db
try:
# Earlier Django versions.
django.db.backend.quote_name
quote_flag = 'old'
except AttributeError:
# Django after backend refactoring.
quote_flag = 'new'
if action == 'dump':
dumpdb(apps, 'py', options)
elif action == 'load':
loaddb(apps, 'py', options)
else:
parser.print_help()
if __name__ == '__main__':
execute_from_command_line()
|
Comments
I used this script to move my project from sqlite to MySQL.
My project was using sqlite. I ran 'python db_dump.py dump'; it didn't tell me much but I could see it created a new folder full of my data. I then modified settings.py to have details for my fresh MySQL database/user, ran './manage.py syncdb', then re-ran db_dump.py in 'load' mode. It perfectly copied all of my data into the new MySQL database and I'm now running at a much zippier speed.
Until now I wasn't aware of a simple way to move database platforms with Django. Good work, limodou!
#
You can add -v option to see the tedious output, and if you want to test the output data, you can add -s option. And after
dumporloadcommand, you can specify the apps, just like:user books, if there are more than one app, you should seperate them with blank. I'm very glad that you like it.#
Man what a useful script!
Thanks for this limodou!
#
this is one seriously cool script.
I was going to write a simple db dump & import tool for my dj projects extracting just some tables given the db but this takes the idea one step forward. Two questions I did have:
1) I tried "python db_dump.py --settings=/path/to/settings/ dump ". I could only get the script to work directly in the settings dir. Do you have any idea why?
2) I have lots of text with single quotes in it. In the stored version. How do you handle this case when storing the data in a list?
Hello there! It's certainly a hot today.
Would the result be stored as ...
[' Hello there! It's certainly a hot today.']
this will (should) fail for me.
#
for
--settings, it should be a module format but not a real path. So you can set PYTHONPATH to your project first, then you can just dump without--settingsparameter.because I using repr(r) to dump the data, so it'll convert to python string representation, and for "'" it'll convert to "'", so I think it no error for db_dump.py. And you can try to dump the data with -s parameter, it'll output the dump result to screen. Or directly dump the data to directory, and open the data file to see if it's correct.
#
Hi limodou, thanks for replying.
Beaut, I'll try that. I've seen something similar in code so I'll try it. I was using a 'real path'. Hence the problem.
I dumped the data as you suggest to stdout and the "'" & '"' are both covered. I wonder if you can mix a single quote and a double quote into the string? I think I'll check this. One thing you can do with django is write test cases so I might add some tests to check these boundary cases. Let you know when I'm done.
#
#
'I don't know how to add test case in django now'
It's pretty easy. You can use 'doctests' or 'unittests' ~ http://www.djangoproject.com/documentation/testing
#
See also the dbpickle.py script.
#
thanks. I'v seen it. And the design is different.
#
It seems that boolean fields dumped incorrectly. When dumping from mysql and loading into postgres database I saw an error:
#
I didn't test it in postgres, and how to deal with boolean field in postgres?
#
I edit file datadir/auth_user.py by hand and set fields corresponding to is_staff, is_active and is_superuser to True instead of 1. Then load to postgres with no errors.
Dumping from postgres gives True values in dump file.
#
I simply use
rows = cursor.fetchmany(100)to get the result, and directly dump the result by repr(). So maybe the boolean type processing is different between postgres and mysql or sqlite3.#
Version 1.6 does not work after django changeset 5172. Look at: http://code.djangoproject.com/changeset/5172 (Backwards incompatible change)
Exception Value: cannot import name GenericRel
;)
#
Regarding django changeset 5172, I think that this should fix the problem:
#
Yes, look at: http://code.djangoproject.com/wiki/BackwardsIncompatibleChanges#Genericrelationshavemoved
I made:
My diff: http://pylucid.net/trac/changeset/1000
#
Thanks, I'v changed it now.
#
Hi,
I have problem loading data to postgres. Data are imported succesfully, but no pkey_seq is set. It will cause "duplicate primary key" exception when adding new item.
I think the problem is following: When pk is specified in insert, sequence is not incremented. The solution could be to setval('seq', MAX(pk)+1) after inserts (only if database postgres ?).
What do you think?
#
I'm familiar with postgres, so could write a patch for that? I'll merge it.
#
If you are following the Django svn version, lines 332 and 367 of db_dump.py will need amending twice in each line replacing:
backend.quote_name
with:
backend.DatabaseOperations().quote_name
#
DatebaseOperations() insertion also required on lines 124, 126, 235, 236.
#
Thanks, I'v changed it.
#
limodou - thanks for the script! Here's a patch for the change lenducha suggested... the current method using count() doesn't work reliably because if a record in the middle of a sequence has been erased, a simple count will be off...
#
thanks thn, and I updated the source code. But there is somewhat different from your patch.
#
Why wouldn't you just use mysqldump?
#
First, it's not limited in data dump, you can use it to migrate data from one database to another type database.
Second, you can use it to resolve model structure change.
#
Looking quite tasty, I'll try this. But, what is the difference between 'db_dump' and already existing 'dumpdata' and 'loaddata' ?
#
I'm using django 0.96 and I was migrating from sqlite to MySQL. I ran into a problem loading into MySQL with this snippet due to a many-to-many relationship between my models. (Incidentally, Django 0.96 didn't handle this correctly for MySQL and I had to use a variant on this script).
The problem was that db_dump tried to populate the many to many table before both of the referenced tables were created. This caused a foreign key error since the referenced rows in one of the tables hadn't been created yet.
The patch below fixed this for me - basically delays many to many table loading until after the referenced tables are loaded:
#
ManyToManyFields with a "through" arg cause an IntegrityError on load, because db_dump tries to dump or load the table twice: once for the model with the ManyToManyField, once for the through Model. The following patch fixes it for me.
Index: db_dump.py
--- db_dump.py (revision 375) +++ db_dump.py (working copy) @@ -310,12 +310,16 @@
fields = []
if not isinstance(f.rel, GenericRel):
fields.append('id')
fields.append(f.m2m_column_name())
fields.append(f.m2m_reverse_name())
yield f.m2m_db_table(), fields
if this m2m field has a "through" model, the table is created by that model,
so we don't need to create it here
if not f.rel.through:
fields = []
if not isinstance(f.rel, GenericRel):
fields.append('id')
fields.append(f.m2m_column_name())
fields.append(f.m2m_reverse_name())
yield f.m2m_db_table(), fields
def dumpdb(app_labels, format, options):
#
oops, well the markdown tags don't seem to be working, but you can also find that patch here: http://code.google.com/p/db-dump/issues/detail?id=1
#
This appears not to work if you use inherited models. E.g. the approved Django way of doing user-profiles is now to derive an object from auth.User.
But if you do this, the code (v2.2) tries to insert all the fields into the child class, and not put anything into the parent class.
Anyone else come acrorss this?
#
I think the fix to work with inherited models properly is to just change line 292 above from:
to:
This works in my simple test, and appears to make sense, but I don't know enough about Django internals to be sure this is safe.
#