Skip to content

Commit 4aad418

Browse files
authored
Update 4-AI_image_multimodal_location_GPT.R
Adding Azure Cognitive Services as an alternative to Google Cloud Vision to identify landmarks
1 parent 8b58043 commit 4aad418

File tree

1 file changed

+72
-28
lines changed

1 file changed

+72
-28
lines changed

4-AI_image_multimodal_location_GPT.R

Lines changed: 72 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#
44
# After that, run the below command in R:
55
# remotes::install_github("rstudio/tensorflow")
6+
# remotes::install_github("Azure/AzureCognitive")
67
#
78
# More information about the prediction models used, their parameters and expected results can also be found here:
89
# https://keras.io/api/applications/
@@ -45,6 +46,7 @@ if (!require("dplyr")) { install.packages("dplyr") }
4546
if (!require("imager")) { install.packages("imager") }
4647
if (!require("magick")) { install.packages("magick") }
4748
if (!require("AzureVision")) { install.packages("AzureVision") }
49+
if (!require("AzureCognitive")) { install.packages("AzureCognitive") }
4850
if (!require("googleAuthR")) { install.packages("googleAuthR") }
4951
if (!require("googleCloudVisionR")) { install.packages("googleCloudVisionR") }
5052
if (!require("ggmap")) { install.packages("ggmap") }
@@ -58,25 +60,28 @@ if (!require("purrr")) { install.packages("purrr") }
5860
# Set the API keys
5961
# Sys.setenv(AZURE_COMPUTER_VISION_KEY = azure_api_key)
6062
# Sys.setenv("GCV_AUTH_FILE" = "/fullpath/to/auth.json")
61-
62-
# Google Maps API
63-
google_api_key <- readLines("C:/XXXXX/Google_API_key.txt", warn=FALSE)
64-
register_google(key = google_api_key)
65-
# Bing Maps API
66-
bing_api_key <- readLines("C:/XXXXX/Bing_API_key.txt", warn=FALSE)
67-
# Google Cloud API
63+
64+
# Google Cloud API information
6865
options(googleAuthR.client_id = "XXXXX.apps.googleusercontent.com")
6966
options(googleAuthR.client_secret = "XXXXX")
7067
options(googleAuthR.scopes.selected = c("https://www.googleapis.com/auth/cloud-platform"))
71-
# Azure Cloud API
68+
# Google Maps API key
69+
google_api_key <- readLines("C:/XXXXX/Google_API_key.txt", warn=FALSE)
70+
register_google(key = google_api_key)
71+
# Bing Maps API key
72+
bing_api_key <- readLines("C:/XXXXX/Bing_API_key.txt", warn=FALSE)
73+
# Azure API key
7274
azure_api_key <- readLines("C:/XXXXX/Azure_API_key.txt", warn=FALSE)
75+
# Azure Computer Vision API information
76+
cognitiveservicesURL <-"https://XXXXX.cognitiveservices.azure.com/"
7377
vis <- computervision_endpoint(
74-
url="https://XXXXX.cognitiveservices.azure.com/",
78+
url=cognitiveservicesURL,
7579
key=azure_api_key
7680
)
7781

78-
# Authenticate with the Google Cloud Vision API
82+
# Authenticate with the Google Cloud Vision API (ONLY NEEDED FOR THE FIRST TIME OR IF TOKEN IS LOST)
7983
if (gar_has_token()!=TRUE) {
84+
# This code will run automatically if no Google auth token is found. However, the token might exist but has expired, so if this is the case, then please run the below line manually
8085
gar_auth(email = "XXXXX")
8186
}
8287

@@ -428,7 +433,7 @@ concat <- rbind(concat,resultsAzure)
428433
concat <- rbind(concat,resultsGoogle)
429434
############################### END Cloud ML Models
430435

431-
# Raw data from results of all models
436+
# Raw data from results of all 15 models + 2 APIs
432437
# print(concat)
433438

434439
# Create a dataframe with summarized results accross all models
@@ -484,21 +489,59 @@ date <- format(exif_data$CreateDate, format = "%d/%B/%Y")
484489
landmarks$source <- "Google Cloud Vision"
485490
}
486491

487-
# Add Google Maps Places API landmarks to the previous Google Vision results
488-
place_types <- c("accounting","airport","amusement_park","aquarium","art_gallery","atm","bakery","bank","bar","beauty_salon","bicycle_store","book_store","bowling_alley","bus_station","cafe","campground","car_dealer","car_rental","car_repair","car_wash","casino","cemetery","church","city_hall","clothing_store","convenience_store","courthouse","dentist","department_store","doctor","drugstore","electrician","electronics_store","embassy","fire_station","florist","funeral_home","furniture_store","gas_station","gym","hair_care","hardware_store","hindu_temple","home_goods_store","hospital","insurance_agency","jewelry_store","laundry","lawyer","library","light_rail_station","liquor_store","local_government_office","locksmith","lodging","meal_delivery","meal_takeaway","mosque","movie_rental","movie_theater","moving_company","museum","night_club","painter","park","pet_store","pharmacy","physiotherapist","plumber","police","post_office","primary_school","real_estate_agency","restaurant","roofing_contractor","rv_park","school","secondary_school","shoe_store","shopping_mall","spa","stadium","storage","store","subway_station","supermarket","synagogue","taxi_stand","tourist_attraction","train_station","transit_station","travel_agency","university","veterinary_care","zoo","landmark","place_of_worship","town_square")
492+
# Standalone Azure Computer Vision endpoint (must provide subscription key and cognitive services name)
493+
endp <- cognitive_endpoint(cognitiveservicesURL,
494+
service_type="ComputerVision", key=azure_api_key)
495+
496+
# read the local image file as raw bytes, reducing to 4Mb (if needed) which is Azure's max limit
497+
if (file.size(image_path)>4150000){
498+
raw_vector <- readBin(image_path, "raw", 4150000)
499+
} else {
500+
raw_vector <- readBin(image_path, "raw", file.info(image_path)$size)
501+
}
489502

490-
# Check if any word from the list is in the text
491-
result <- sapply(place_types, grepl, hashwords, fixed = TRUE)
503+
# call the cognitive endpoint to analyze the image for landmarks
504+
landmarkAzure <- call_cognitive_endpoint(endp, operation = "analyze",
505+
body = raw_vector,
506+
encode = "raw",
507+
options = list(details = "landmarks"),
508+
http_verb="POST")
492509

493-
# Print which words from the Google Places API were found in the hashtags based on the image
494-
found_words <- place_types[result]
495-
tourism_tags <- c("castle","monastery","bridge","palace","statue","bell_cote")
496-
if (length(tourism_tags[sapply(tourism_tags, grepl, hashwords, fixed = TRUE)])>0) {
497-
found_words <- c(found_words, "tourist_attraction")
498-
}
510+
if (length(landmarkAzure$categories[[1]]$detail$landmarks)>0){
511+
landmarkAzure <- dplyr::bind_rows(lapply(landmarkAzure$categories, as.data.frame.list))
512+
landmarkAzure <- landmarkAzure %>% select(detail.landmarks.name, detail.landmarks.confidence)
499513

500-
url <- ""
501-
url <- paste0("https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=",
514+
# Keep only the 3 columns that are of interest
515+
landmarkAzure <- landmarkAzure[, grepl("landmarks.name|confidence", colnames(landmarkAzure))]
516+
517+
# Rename, reorder and add blank columns to keep the df consistent with other solutions
518+
colnames(landmarkAzure) <- c("description", "score")
519+
landmarkAzure$latitude <- ""
520+
landmarkAzure$longitude <- ""
521+
landmarkAzure$source <- "Azure Vision API"
522+
landmarkAzure <- landmarkAzure %>% select(description, latitude, longitude, source, score)
523+
}
524+
525+
# Add Azure Vision landmarks to the previous Google Vision results, if any result was found
526+
if(exists("landmarkAzure") && all(c("description", "score") %in% names(landmarkAzure))){ # Azure Vision found landmarks
527+
landmarks <- rbind(landmarks, landmarkAzure, fill=TRUE)
528+
}
529+
530+
# Add Google Maps Places API landmarks to the previous Google Vision results
531+
place_types <- c("accounting","airport","amusement_park","aquarium","art_gallery","atm","bakery","bank","bar","beauty_salon","bicycle_store","book_store","bowling_alley","bus_station","cafe","campground","car_dealer","car_rental","car_repair","car_wash","casino","cemetery","church","city_hall","clothing_store","convenience_store","courthouse","dentist","department_store","doctor","drugstore","electrician","electronics_store","embassy","fire_station","florist","funeral_home","furniture_store","gas_station","gym","hair_care","hardware_store","hindu_temple","home_goods_store","hospital","insurance_agency","jewelry_store","laundry","lawyer","library","light_rail_station","liquor_store","local_government_office","locksmith","lodging","meal_delivery","meal_takeaway","mosque","movie_rental","movie_theater","moving_company","museum","night_club","painter","park","pet_store","pharmacy","physiotherapist","plumber","police","post_office","primary_school","real_estate_agency","restaurant","roofing_contractor","rv_park","school","secondary_school","shoe_store","shopping_mall","spa","stadium","storage","store","subway_station","supermarket","synagogue","taxi_stand","tourist_attraction","train_station","transit_station","travel_agency","university","veterinary_care","zoo","landmark","place_of_worship","town_square")
532+
533+
# Check if any word from the list is in the text
534+
result <- sapply(place_types, grepl, hashwords, fixed = TRUE)
535+
536+
# Print which words from the Google Places API were found in the hashtags based on the image
537+
found_words <- place_types[result]
538+
tourism_tags <- c("castle","monastery","bridge","palace","statue","bell_cote")
539+
if (length(tourism_tags[sapply(tourism_tags, grepl, hashwords, fixed = TRUE)])>0) {
540+
found_words <- c(found_words, "tourist_attraction")
541+
}
542+
543+
url <- ""
544+
url <- paste0("https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=",
502545
exif_data$GPSLatitude, ",", exif_data$GPSLongitude,
503546
"&types=point_of_interest&rankby=distance&key=",google_api_key)
504547

@@ -679,13 +722,14 @@ if (lon!="") {
679722

680723
# Extract the text from the image using Azure Computer Vision API (OCR)
681724
if (file.size(image_path)>4150000){
682-
con <- file(image_path, "rb")
683-
raw_vector <- readBin(con, what = raw(), n = 4150000)
684725
text <- read_text(vis, raw_vector, detect_orientation = TRUE, language = "en")
685-
close(con)
686726
} else {
687727
text <- read_text(vis, image_path, detect_orientation = TRUE, language = "en")
688-
# text <- analyze(vis, image_path, domain = "landmarks", feature_types = "description")$description$captions$text
728+
}
729+
730+
# If no text was found using OCR, try to describe the image using Azure AI:
731+
if (length(text)==0){
732+
text <- analyze(vis, image_path, domain = "landmarks", feature_types = "description")$description$captions$text
689733
}
690734

691735
# Define a string for the Bing Chat prompt, that will generate the text for the social media post. Feel free to change this to your liking
@@ -709,6 +753,6 @@ browseURL(url)
709753
# browseURL("https://www.bing.com/search?showconv=1&sendquery=1&q=Hello%20Bing")
710754

711755
# Show main results in R Console, which could be used on prompt for Bing Chat
712-
cat(" Hashtags: ", hashtags, "\n", "GPS Coordin.: ", lat, ",", lon, "\n", "Landmark Name:", name, "\n", "Landm. Source:", source, "\n", "Text Found: ", paste(text, collapse = ", "), "\n", "Full address: ", address, "\n", "City: ", city, "\n", "Country: ", country, "\n", "Camera : ", exif_data$Make, exif_data$Model, "\n", "Date : ", date, "\n")
756+
cat(" Hashtags: ", hashtags, "\n", "GPS Coordin.: ", lat, ",", lon, "\n", "Landmark Name:", name, "\n", "Landm. Source:", source, "\n", "Text/Descript:", paste(text, collapse = ", "), "\n", "Full address: ", address, "\n", "City: ", city, "\n", "Country: ", country, "\n", "Camera : ", exif_data$Make, exif_data$Model, "\n", "Date : ", date, "\n")
713757

714758

0 commit comments

Comments
 (0)