InnoDB tables within MySQL have no ability to defer reference checking until after a transaction is complete. This prevents most dumpdata/loaddata cycles unless the dump order falls so that referenced models are dumped before models that depend on them.
This code uses Ofer Faigon's topological sort to sort the models so that any models with a ForeignKey relationship are dumped after the models they reference.
class Entry(models.Model):
txt = ....
class Comment(models.Model):
entry = models.ForeignKey(Entry)
This code will ensure that Entry always gets dumped before Comment.
Fixtures are an important part of the django Unit Testing framework so I really needed to be able to test my more complicated models.
Caveats
-
You use this snippet to dump the data and the built in manage.py loaddata to load the fixture output by this program. A similar solution could be applied to the XML processing on the loaddata side but this sufficed for my situations.
-
This code does not handle Circular or self-references. The loaddata for those needs to be much smarter.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | #!/usr/bin/env python
# Author: <[email protected]>
#
# Purpose: Given a set of classes, sort them such that ones that have
# ForeignKey relationships with later keys are show up after
# the classes they depend on
#
# Created: 12/27/07
#
# Modified: 3/20/08
#
# Graham King added the abilility to walk other ManyToMany
# relationships as well as handling fixtures such as content types
from django.core.management import setup_environ
import settings
setup_environ(settings)
import sys
from django.db import models
from django.core.management.base import CommandError
from django.core import serializers
# Original topological sort code written by Ofer Faigon
# (www.bitformation.com) and used with permission
def foreign_key_sort(items):
"""Perform topological sort.
items is a list of django classes
Returns a list of the items in one of the possible orders, or None
if partial_order contains a loop.
"""
def add_node(graph, node):
"""Add a node to the graph if not already exists."""
if not graph.has_key(node):
graph[node] = [0] # 0 = number of arcs coming into this node.
def add_arc(graph, fromnode, tonode):
"""Add an arc to a graph. Can create multiple arcs.
The end nodes must already exist."""
graph[fromnode].append(tonode)
# Update the count of incoming arcs in tonode.
graph[tonode][0] = graph[tonode][0] + 1
# step 1 - create a directed graph with an arc a->b for each input
# pair (a,b).
# The graph is represented by a dictionary. The dictionary contains
# a pair item:list for each node in the graph. /item/ is the value
# of the node. /list/'s 1st item is the count of incoming arcs, and
# the rest are the destinations of the outgoing arcs. For example:
# {'a':[0,'b','c'], 'b':[1], 'c':[1]}
# represents the graph: c <-- a --> b
# The graph may contain loops and multiple arcs.
# Note that our representation does not contain reference loops to
# cause GC problems even when the represented graph contains loops,
# because we keep the node names rather than references to the nodes.
graph = {}
# iterate once for the nodes
for v in items:
add_node(graph, v)
# iterate again to pull out the dependency information
for a in items:
rel_lst = related_field_filter(a._meta.fields) # Add foreign keys
rel_lst.extend( a._meta.many_to_many ) # Add many to many
for b in rel_lst:
# print "adding arc %s <- %s" % (b.rel.to, a)
add_arc(graph, b.rel.to, a)
# Step 2 - find all roots (nodes with zero incoming arcs).
roots = [node for (node,nodeinfo) in graph.items() if nodeinfo[0] == 0]
# step 3 - repeatedly emit a root and remove it from the graph. Removing
# a node may convert some of the node's direct children into roots.
# Whenever that happens, we append the new roots to the list of
# current roots.
sorted = []
while len(roots) != 0:
# If len(roots) is always 1 when we get here, it means that
# the input describes a complete ordering and there is only
# one possible output.
# When len(roots) > 1, we can choose any root to send to the
# output; this freedom represents the multiple complete orderings
# that satisfy the input restrictions. We arbitrarily take one of
# the roots using pop(). Note that for the algorithm to be efficient,
# this operation must be done in O(1) time.
root = roots.pop()
sorted.append(root)
for child in graph[root][1:]:
graph[child][0] = graph[child][0] - 1
if graph[child][0] == 0:
roots.append(child)
del graph[root]
if len(graph.items()) != 0:
# There is a loop in the input.
raise CircularReferenceException, "Circular Dependency Detected in Input"
return sorted
# Problem:
#
class CircularReferenceException(Exception):
pass
def isclass(obj):
if str(obj).find("<class") == 0:
return True
return False
def find_classes(module):
classes = []
for k,obj in module.__dict__.iteritems():
if isclass(obj):
# print k, "is a class!"
classes.append(obj)
return classes
def model_filter(lst):
""" Given a list of classes, Filter out everything that's not an instance of models.Model """
return filter(lambda x: issubclass(x, models.Model), lst)
def related_field_filter(lst):
""" given a list of fields, return the ones that are related """
ret = []
for f in lst:
s = str(f)
if s.find('django.db.models.fields.related') >= 0:
ret.append(f)
return ret
def main():
from optparse import OptionParser, make_option
from django.db.models import get_app, get_apps, get_models
option_list = (make_option('--format', default='json', dest='format',
help='Specifies the output serialization format for fixtures.'),
make_option('--indent', default=None, dest='indent', type='int',
help='Specifies the indent level to use when pretty-printing output'),
make_option('--traceback', default=False, dest='traceback', type='int',
help='Specifies the indent level to use when pretty-printing output'),
)
help = 'Output the contents of the database as a fixture of the given format.'
args = '[appname ...]'
parser = OptionParser(option_list=option_list)
(options,app_labels) = parser.parse_args()
if len(app_labels) == 0:
app_list = get_apps()
else:
app_list = [get_app(app_label) for app_label in app_labels]
# Check that the serialization format exists; this is a shortcut to
# avoid collating all the objects and _then_ failing.
if options.format not in serializers.get_public_serializer_formats():
raise CommandError("Unknown serialization format: %s" % options.format)
try:
serializers.get_serializer(options.format)
except KeyError:
raise CommandError("Unknown serialization format: %s" % options.format)
objects = []
models = []
for app in app_list:
models.extend( get_models(app) )
models = foreign_key_sort(models)
for model in models:
objects.extend(model._default_manager.all())
try:
print serializers.serialize(options.format, objects, indent=options.indent)
except Exception, e:
if options.traceback:
raise
raise CommandError("Unable to serialize database: %s" % e)
if __name__=='__main__':
main()
|
More like this
- Template tag - list punctuation for a list of items by shapiromatron 8 months, 1 week ago
- JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 8 months, 2 weeks ago
- Serializer factory with Django Rest Framework by julio 1 year, 3 months ago
- Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 4 months ago
- Help text hyperlinks by sa2812 1 year, 4 months ago
Comments
Please login first before commenting.