""" Retrieve a list of information about countries, pulled from GeoNames. Example entry: {u'Area(in sq km)': u'33843', u'Capital': u'Chi\u015fin\u0103u', u'Continent': u'EU', u'Country': u'Moldova', u'CurrencyCode': u'MDL', u'CurrencyName': u'Leu', u'EquivalentFipsCode': u'', u'ISO': u'MD', u'ISO-Numeric': u'498', u'ISO3': u'MDA', u'Languages': u'mo,ro,ru,gag,tr', u'Phone': u'373', u'Population': u'4324000', u'Postal Code Format': u'MD-####', u'Postal Code Regex': u'^(?:MD)*(\\d{4})$', u'fips': u'MD', u'geonameid': u'617790', u'neighbours': u'RO,UA', u'tld': u'.md'} """ import urllib, codecs COUNTRY_INFO_URL = "http://download.geonames.org/export/dump/countryInfo.txt" def get_geonames_country_data(): "Returns a list of dictionaries, each representing a country" udata = urllib.urlopen(COUNTRY_INFO_URL).read().decode('utf8') # Strip the BOM if udata[0] == codecs.BOM_UTF8.decode('utf8'): udata = udata[1:] # Ignore blank lines lines = [l for l in udata.split('\n') if l] # Find the line with the headers (starts #ISO) header_line = [l for l in lines if l.startswith('#ISO')][0] headers = header_line[1:].split('\t') # Now get all the countries country_lines = [l for l in lines if not l.startswith('#')] countries = [] for line in country_lines: countries.append(dict(zip(headers, line.split('\t')))) return countries