Convert a pandas dataframe to geojson
Views: 1030
Wrote on April 12, 2020, 2:34 a.m.
I was struggling with converting plain sheet data with coordinates info into spatial feature data that is applicable for web application for a long time, tried many ways but had a hard time configuring the environment on different platforms. It's true you can use ArcGIS or QGIS to easily convert the data, but sometimes an automation way with Python code is a better option when you need to bulk process a huge amount of data.
Now, pandas and pyproj are the only packages you need.
Define the coordinate projection of input data
import pandas as pd
from pyproj import CRS, Transformer
df = pd.read_csv('./data_security.csv')
projection_code = "2232"
df = df[0:5]
Define a function on converting coordinates
def convertXY(row, latlng=None):
proj_in = CRS("EPSG:"+projection_code)
proj_out = CRS("EPSG:4326")
transformer = Transformer.from_crs(proj_in, proj_out)
lat, long = transformer.transform(row['X_COORD'], row['Y_COORD'])
if latlng == "lat":
return lat
elif latlng == "lng":
return long
else:
return lat, long
Check number of datasets and get the field names
print('We have {} rows'.format(len(df)))
str(df.columns.tolist())
Turn data into a GeoJSON formatted python dictionary
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
"""
df : the dataframe to convert to geojson
properties : a list of columns in the dataframe to turn into geojson feature properties
lat : the name of the column in the dataframe that contains latitude data
lon : the name of the column in the dataframe that contains longitude data
"""
# create a new python dict to contain our geojson data, using geojson format
geojson = {'type':'FeatureCollection', 'features':[]}
# loop through each row in the dataframe and convert each row to geojson format
for _, row in df.iterrows():
# create a feature template to fill in
feature = {'type':'Feature',
'properties':{},
'geometry':{'type':'Point',
'coordinates':[]}}
# fill in the coordinates
feature['geometry']['coordinates'] = [row[lon],row[lat]]
# for each column, get the value and add it as a new feature property
for prop in properties:
feature['properties'][prop] = row[prop]
# add this feature (aka, converted dataframe row) to the list of features inside our dict
geojson['features'].append(feature)
return geojson
Choose the columns for GeoJSON
useful_columns = ['MONTH', 'YEAR', 'District', 'Source', 'LOCID', 'GPM_RATE', 'Date']
geojson_dict = df_to_geojson(df, properties=useful_columns)
geojson_str = json.dumps(geojson_dict, indent=2)
geojson_str
Output the data in .js format
# save the geojson result to a file
output_filename = './dataset.js'
with open(output_filename, 'w') as output_file:
output_file.write('var dataset = {};'.format(geojson_str))
# how many features did we save to the geojson file?
print('{} geotagged features saved to file'.format(len(geojson_dict['features'])))
Output the data in GeoJSON format
# save the geojson result to a file
output_filename = './dataset.geojson'
with open(output_filename, 'w') as output_file:
output_file.write(geojson_str)
# how many features did we save to the geojson file?
print('{} geotagged features saved to file'.format(len(geojson_dict['features'])))