Skip to content

Commit ccdbbdc

Browse files
authored
Update 4-AI_image_multimodal_location_GPT.R
Removing redundancies on the creation of raw_vector and adding Image Description generated by Azure Vision
1 parent 4aad418 commit ccdbbdc

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

4-AI_image_multimodal_location_GPT.R

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ if (info$height > info$width) {
140140
}
141141
plot(img)
142142

143+
# read the local image file as raw bytes, reducing to 4Mb (if needed) which is Azure's max limit
144+
if (file.size(image_path)>4150000){
145+
raw_vector <- readBin(image_path, "raw", 4150000)
146+
} else {
147+
raw_vector <- readBin(image_path, "raw", file.info(image_path)$size)
148+
}
149+
143150
# Resizing image based on model requirements
144151
image_224 <- image_load(image_path, target_size = c(224, 224))
145152
image_299 <- image_load(image_path, target_size = c(299, 299))
@@ -405,12 +412,8 @@ concat <- rbind(concat,filtered_tags)
405412

406413
############################### Cloud ML Models (Require API keys)
407414
# Get tags from Azure Vision
408-
# img <- image_read(image_path) # needed?
409-
if (file.size(image_path)>4150000){
410-
con <- file(image_path, "rb")
411-
raw_vector <- readBin(con, what = raw(), n = 4150000)
415+
if (file.size(image_path)>4150000){ # needed?
412416
resultsAzure <- analyze(vis, raw_vector, domain = "landmarks", feature_types = "tags")$tags
413-
close(con)
414417
} else {
415418
resultsAzure <- analyze(vis, image_path, domain = "landmarks", feature_types = "tags")$tags
416419
}
@@ -493,13 +496,6 @@ date <- format(exif_data$CreateDate, format = "%d/%B/%Y")
493496
endp <- cognitive_endpoint(cognitiveservicesURL,
494497
service_type="ComputerVision", key=azure_api_key)
495498

496-
# read the local image file as raw bytes, reducing to 4Mb (if needed) which is Azure's max limit
497-
if (file.size(image_path)>4150000){
498-
raw_vector <- readBin(image_path, "raw", 4150000)
499-
} else {
500-
raw_vector <- readBin(image_path, "raw", file.info(image_path)$size)
501-
}
502-
503499
# call the cognitive endpoint to analyze the image for landmarks
504500
landmarkAzure <- call_cognitive_endpoint(endp, operation = "analyze",
505501
body = raw_vector,
@@ -721,16 +717,14 @@ if (lon!="") {
721717
}
722718

723719
# Extract the text from the image using Azure Computer Vision API (OCR)
724-
if (file.size(image_path)>4150000){
725-
text <- read_text(vis, raw_vector, detect_orientation = TRUE, language = "en")
720+
if (file.size(image_path)>4150000){ # needed?
721+
textOCR <- read_text(vis, raw_vector, detect_orientation = TRUE, language = "en")
726722
} else {
727-
text <- read_text(vis, image_path, detect_orientation = TRUE, language = "en")
723+
textOCR <- read_text(vis, image_path, detect_orientation = TRUE, language = "en")
728724
}
729725

730-
# If no text was found using OCR, try to describe the image using Azure AI:
731-
if (length(text)==0){
732-
text <- analyze(vis, image_path, domain = "landmarks", feature_types = "description")$description$captions$text
733-
}
726+
# Describe the image using Azure AI:
727+
text <- analyze(vis, image_path, domain = "landmarks", feature_types = "description")$description$captions$text
734728

735729
# Define a string for the Bing Chat prompt, that will generate the text for the social media post. Feel free to change this to your liking
736730
str <- ""
@@ -753,6 +747,5 @@ browseURL(url)
753747
# browseURL("https://www.bing.com/search?showconv=1&sendquery=1&q=Hello%20Bing")
754748

755749
# Show main results in R Console, which could be used on prompt for Bing Chat
756-
cat(" Hashtags: ", hashtags, "\n", "GPS Coordin.: ", lat, ",", lon, "\n", "Landmark Name:", name, "\n", "Landm. Source:", source, "\n", "Text/Descript:", paste(text, collapse = ", "), "\n", "Full address: ", address, "\n", "City: ", city, "\n", "Country: ", country, "\n", "Camera : ", exif_data$Make, exif_data$Model, "\n", "Date : ", date, "\n")
757-
750+
cat(" Hashtags: ", hashtags, "\n", "GPS Coordin.: ", lat, ",", lon, "\n", "Landmark Name:", name, "\n", "Landm. Source:", source, "\n", "Text OCR :", paste(textOCR, collapse = ", "), "\n", "Img. Descript:", paste(text, collapse = ", "), "\n", "Full address: ", address, "\n", "City: ", city, "\n", "Country: ", country, "\n", "Camera : ", exif_data$Make, exif_data$Model, "\n", "Date : ", date, "\n")
758751

0 commit comments

Comments
 (0)