Use this script to import the Maxmind GeoIP lite CSV datasets into your database. This takes at least 200MB of RAM; the resulting database will be ~400MB. Stick in the same directory as the models. Make sure to set DEBUG=False
to prevent running out of memory during import.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | # Copyright (c) 2007, Justin Bronn
# All rights reserved.
#
# Released under New BSD License
#
"""
These scripts are used to import the MaxMind(R) GeoIP Lite CSV files.
In order to save memory during import ensure DEBUG=False in your settings.
"""
from models import Country, CountryBlock, Location, LocationBlock
from django.contrib.gis.geos import Point
from csv import reader
import sys
def country_import(csv_file):
fh = open(csv_file)
table = reader(fh)
header = table.next()
for startip, endip, ipfrom, ipto, country, country_name in table:
cntry, created = Country.objects.get_or_create(name=country_name, code=country)
if created: print 'Created: %s' % cntry
block = CountryBlock(ipto=ipto, ipfrom=ipfrom, startip=startip, endip=endip, country=cntry)
block.save()
fh.close()
del table
def location_import(loc_csv, block_csv):
if loc_csv:
# First, importing from the Location CSV file.
fh = open(loc_csv)
table = reader(fh)
header = table.next()
# Caching the countries table in memory
countries = dict((m.code, m) for m in Country.objects.all())
i = 0
for locid, cntry, reg, cty, postal, lat, lon, dma, area in table:
pnt = Point(float(lon), float(lat))
# The points for the countries are in the first 244 entries --
# pulling these out and updating the points
if int(locid) < 244:
try:
country = Country.objects.get(code=cntry)
country.point = pnt
except:
country = Country(code=cntry, point=pnt)
country.save()
countries[cntry] = country # updating the country dictionary
else:
country = countries[cntry]
# region and city
region = reg.decode('UTF-8', 'ignore')
city = cty.decode('UTF-8', 'ignore')
# Constructing the Location
loc = Location(locid=locid, country=country, region=region, city=city,
postalcode=postal, point=pnt, dmacode=dma, areacode=area)
loc.save()
i += 1
if i % 10000 == 0: print 'Saved %d Locations so far ...' % i
fh.close()
del table
del countries
if block_csv:
# Second, importing from the Location IP block CSV file
fh = open(block_csv)
table = reader(fh)
header = table.next()
# This will take a little bit... and ~200+MB of RAM
print 'Caching Location table...',
sys.stdout.flush()
locations = dict((m.locid, m) for m in Location.objects.all())
print 'DONE.'
i = 0
for ipfrom, ipto, locid in table:
loc = locations[int(locid)] # pulling location from our cached table (less expensive than Location.objects.get())
loc_block = LocationBlock(location=loc, ipfrom=ipfrom, ipto=ipto)
loc_block.save()
i += 1
if i % 10000 == 0: print 'Saved %d Location Blocks so far ...' % i
fh.close()
del table
del locations
|
More like this
- Template tag - list punctuation for a list of items by shapiromatron 8 months ago
- JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 8 months, 1 week ago
- Serializer factory with Django Rest Framework by julio 1 year, 3 months ago
- Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 3 months ago
- Help text hyperlinks by sa2812 1 year, 4 months ago
Comments
If you are using Python < 2.5, csv.reader() will throw an error if any of the cells contain \r characters. To get around this, change this line:
to this:
#
the backslash before the r didn't show up in my previous comment, but it's a return - \r
#
The latest versions of CSV files include, on the first line, a Copyright statement.
The script above should be changed as follows:
Replace all occurrences of:
with:
Enjoy!
#
Please login first before commenting.