Nothing new here. Just write a little bit code to do geocoding on a large file of addresses. The R code is as follows.
###########################################
# check and regeocoding NETS address
# using Google Map API
###########################################
setwd("C:/Geocoding")
source("C:/Geocoding/GIS_Google_Map_API.R") # code listed in the previous blog
address <- read.csv("addresses.csv",header=T)
GeoResult <- c()
for(i in 1:dim(address)[1]){
addr_item = address[i,]
Addr <- addr_item$Addr
Addr <- ifelse(is.na(Addr), "", paste(Addr,", ") )
City <- addr_item$City
City <- ifelse(is.na(City), "", paste(City,", ") )
State <- addr_item$State
State <- ifelse(is.na(State), "", paste(State,", ") )
Zip <- addr_item$ZIP
Zip <- ifelse(is.na(Zip), "", Zip )
Zip4 <-addr_item$Plus4
Zip4 <- ifelse(is.na(Zip4), "", paste("-",Zip4, sep="") )
full_addr <- paste(Addr,City,State,Zip,Zip4, sep="")
georesult <- gGeoCode(full_addr)
if(georesult$status == "OK"){
GeoResult_i <- cbind( rep(i,length(georesult$lat)) ,
rep(full_addr,length(georesult$lat)),
georesult$lat,
georesult$lng,
georesult$formatted_address,
georesult$location_type,
rep(georesult$status,length(georesult$lat))
)
}
if(georesult$status != "OK") GeoResult_i <- c(i, full_addr, NA, NA, NA, NA, georesult$status )
GeoResult <- rbind(GeoResult, GeoResult_i)
Sys.sleep(0.5) # we need to pose a little bit or else google will regard it as too-many queries a time.
}
colnames(GeoResult) <- c("item", "full_addr", "lat", "lng", "formatted_address", "location_type", "status")
write.csv(GeoResult,"GeoResult.csv")
Showing posts with label GIS. Show all posts
Showing posts with label GIS. Show all posts
Friday, March 15, 2013
Geocoding using Google Map API via R with examples
Although I've taken an ArcGIS course before, I still like to use R as much as possible to complete some daily tasks (the reason is obvious...). These days one issue about geocoding or location based analysis comes out, driving more attention from me. Basically the task is to geocode and to check the accuracy of business addresses.
People around me mostly are using ArcGIS or use other expensive tools like TeleAtlas to do the geocoding. As I am not the core GIS person, I would like just to use my usual way --R -- to do the task. Here is the code I learned from this blog "Calling Google Maps API from R". I feel the code that the blog offers works pretty well. So I borrow it for my future reference and made some modification and examples based on my work experience.
For more information about Google Map API, we can refer to google's official documents: https://developers.google.com/maps/documentation/geocoding/
#####################################################################
# R code to call Google Map API
# Source codes: http://svnwang.blogspot.com/
# Reference: http://statisfaction.wordpress.com/2011/10/05/calling-google-maps-api-from-r/
# Reference: http://statisfaction.wordpress.com/2011/10/05/calling-google-maps-api-from-r/
#####################################################################
library(XML) # use install.packages("XML") if you haven't install this XML library before
getDocNodeVal=function(doc, path)
{
sapply(getNodeSet(doc, path), function(el) xmlValue(el))
}
gGeoCode=function(str)
{
library(XML)
u=paste('http://maps.google.com/maps/api/geocode/xml?sensor=false&address=',str)
doc = xmlTreeParse(u, useInternal=TRUE)
lat=getDocNodeVal(doc, "/GeocodeResponse/result/geometry/location/lat")
lng=getDocNodeVal(doc, "/GeocodeResponse/result/geometry/location/lng")
formatted_address =getDocNodeVal(doc, "/GeocodeResponse/result/formatted_address")
location_type = getDocNodeVal(doc, "/GeocodeResponse/result/geometry/location_type")
status = getDocNodeVal(doc, "/GeocodeResponse/status")
list(lat = lat, # latitude
lng = lng, # longitude
formatted_address=formatted_address, # full addresses suggested by google
location_type = location_type, # geocoding accuracy
status=status # status of geocoding: OK or zero_results
)
}
#Example 1: you have a right address to geocode
str1 = "11 Wall St, New York, NY"
gGeoCode(str1)
#Example 2: your address is too general,
# but still google can map it by approximation.
str2 = "Wall St, New York, NY"
gGeoCode(str2)
#Comments:
#as you can see from the result,there are multiple matched pairs of geocode and matched address.
#in practice, you need to choose which one is more to your need.
#Example 3: your address is not accurate, but google can guess what it is.
str3= "11 Wall Rd, New York" #actually, it should be "Wall St"
gGeoCode(str3)
#Example 4: your address is too bad, far from accurate, thus cannot be geocoded
str4= "1021 Watl P1lz"
gGeoCode(str4)
getDocNodeVal=function(doc, path)
{
sapply(getNodeSet(doc, path), function(el) xmlValue(el))
}
gGeoCode=function(str)
{
library(XML)
u=paste('http://maps.google.com/maps/api/geocode/xml?sensor=false&address=',str)
doc = xmlTreeParse(u, useInternal=TRUE)
lat=getDocNodeVal(doc, "/GeocodeResponse/result/geometry/location/lat")
lng=getDocNodeVal(doc, "/GeocodeResponse/result/geometry/location/lng")
formatted_address =getDocNodeVal(doc, "/GeocodeResponse/result/formatted_address")
location_type = getDocNodeVal(doc, "/GeocodeResponse/result/geometry/location_type")
status = getDocNodeVal(doc, "/GeocodeResponse/status")
list(lat = lat, # latitude
lng = lng, # longitude
formatted_address=formatted_address, # full addresses suggested by google
location_type = location_type, # geocoding accuracy
status=status # status of geocoding: OK or zero_results
)
}
#Example 1: you have a right address to geocode
str1 = "11 Wall St, New York, NY"
gGeoCode(str1)
#Example 2: your address is too general,
# but still google can map it by approximation.
str2 = "Wall St, New York, NY"
gGeoCode(str2)
#Comments:
#as you can see from the result,there are multiple matched pairs of geocode and matched address.
#in practice, you need to choose which one is more to your need.
#Example 3: your address is not accurate, but google can guess what it is.
str3= "11 Wall Rd, New York" #actually, it should be "Wall St"
gGeoCode(str3)
#Example 4: your address is too bad, far from accurate, thus cannot be geocoded
str4= "1021 Watl P1lz"
gGeoCode(str4)
Subscribe to:
Posts (Atom)