1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 | # Copyright (c) 2007, Justin Bronn
# All rights reserved.
#
# Released under New BSD License
#
"""
These scripts are used to import the MaxMind(R) GeoIP Lite CSV files.
In order to save memory during import ensure DEBUG=False in your settings.
"""
from models import Country, CountryBlock, Location, LocationBlock
from django.contrib.gis.geos import Point
from csv import reader
import sys
def country_import(csv_file):
fh = open(csv_file)
table = reader(fh)
header = table.next()
for startip, endip, ipfrom, ipto, country, country_name in table:
cntry, created = Country.objects.get_or_create(name=country_name, code=country)
if created: print 'Created: %s' % cntry
block = CountryBlock(ipto=ipto, ipfrom=ipfrom, startip=startip, endip=endip, country=cntry)
block.save()
fh.close()
del table
def location_import(loc_csv, block_csv):
if loc_csv:
# First, importing from the Location CSV file.
fh = open(loc_csv)
table = reader(fh)
header = table.next()
# Caching the countries table in memory
countries = dict((m.code, m) for m in Country.objects.all())
i = 0
for locid, cntry, reg, cty, postal, lat, lon, dma, area in table:
pnt = Point(float(lon), float(lat))
# The points for the countries are in the first 244 entries --
# pulling these out and updating the points
if int(locid) < 244:
try:
country = Country.objects.get(code=cntry)
country.point = pnt
except:
country = Country(code=cntry, point=pnt)
country.save()
countries[cntry] = country # updating the country dictionary
else:
country = countries[cntry]
# region and city
region = reg.decode('UTF-8', 'ignore')
city = cty.decode('UTF-8', 'ignore')
# Constructing the Location
loc = Location(locid=locid, country=country, region=region, city=city,
postalcode=postal, point=pnt, dmacode=dma, areacode=area)
loc.save()
i += 1
if i % 10000 == 0: print 'Saved %d Locations so far ...' % i
fh.close()
del table
del countries
if block_csv:
# Second, importing from the Location IP block CSV file
fh = open(block_csv)
table = reader(fh)
header = table.next()
# This will take a little bit... and ~200+MB of RAM
print 'Caching Location table...',
sys.stdout.flush()
locations = dict((m.locid, m) for m in Location.objects.all())
print 'DONE.'
i = 0
for ipfrom, ipto, locid in table:
loc = locations[int(locid)] # pulling location from our cached table (less expensive than Location.objects.get())
loc_block = LocationBlock(location=loc, ipfrom=ipfrom, ipto=ipto)
loc_block.save()
i += 1
if i % 10000 == 0: print 'Saved %d Location Blocks so far ...' % i
fh.close()
del table
del locations
|
Comments
If you are using Python < 2.5, csv.reader() will throw an error if any of the cells contain r characters. To get around this, change this line:
to this:
#
the backslash before the r didn't show up in my previous comment, but it's a return - \r
#