Skip to content

Commit 9765d89

Browse files
Merge pull request #13 from Azure-Samples/changjian-wang/add-conversational-field-extraction
This pull request introduces a new `ConversationalFieldExtraction` module to support conversational field extraction workflows, including project structure, service interface, implementation skeletons, and integration tests. It also introduces a new model for handling pretranscribed data and updates the solution and test project references accordingly. **Conversational Field Extraction Module:** * Added new `ConversationalFieldExtraction` project, including its project file and integration into the solution (`AzureAiContentUnderstandingDotNet.sln`, `ConversationalFieldExtraction/ConversationalFieldExtraction.csproj`). [[1]](diffhunk://#diff-d53bb6c2b8eaef1281deb9ec4535623dad3dbc79afc4e06c239f9ff69d371372R26-R27) [[2]](diffhunk://#diff-add9b4fca3ac2bec65c35659855664088d93e698920bf63e42a77e9d38a6b0a8R1-R19) * Implemented the `IConversationalFieldExtractionService` interface, defining methods for creating analyzers from templates, extracting fields from conversations, and cleaning up analyzers (`ConversationalFieldExtraction/Interfaces/IConversationalFieldExtractionService.cs`). * Added a program entry point that demonstrates the conversational field extraction workflow, including configuration validation and service usage (`ConversationalFieldExtraction/Program.cs`). * Added an integration test for conversational field extraction, covering analyzer creation, field extraction, and cleanup (`AzureAiContentUnderstanding.Tests/ConversationalFieldExtractionIntegrationTest.cs`). **Shared Models and References:** * Introduced a new `PretranscribedData` model with segment support for handling pretranscribed conversation data (`ContentUnderstanding.Common/Models/PretranscribedData.cs`). * Updated test project and solution to reference the new `ConversationalFieldExtraction` project (`AzureAiContentUnderstanding.Tests/AzureAiContentUnderstanding.Tests.csproj`, `AzureAiContentUnderstandingDotNet.sln`). [[1]](diffhunk://#diff-71c43240298612aad459837d79160f1625ae8cbd476417d9bebae4de8ef6fcbfR26) [[2]](diffhunk://#diff-d53bb6c2b8eaef1281deb9ec4535623dad3dbc79afc4e06c239f9ff69d371372R68-R71) **Minor Fixes:** * Renamed a variable for clarity in the field extraction integration test (`AzureAiContentUnderstanding.Tests/FieldExtractionIntegrationTest.cs`).
2 parents 2d4a437 + 11ea8d1 commit 9765d89

10 files changed

+460
-5
lines changed

AzureAiContentUnderstanding.Tests/AzureAiContentUnderstanding.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
<ProjectReference Include="..\Classifier\Classifier.csproj" />
2424
<ProjectReference Include="..\ContentExtraction\ContentExtraction.csproj" />
2525
<ProjectReference Include="..\ContentUnderstanding.Common\ContentUnderstanding.Common.csproj" />
26+
<ProjectReference Include="..\ConversationalFieldExtraction\ConversationalFieldExtraction.csproj" />
2627
<ProjectReference Include="..\FieldExtractionProMode\FieldExtractionProMode.csproj" />
2728
<ProjectReference Include="..\FieldExtraction\FieldExtraction.csproj" />
2829
<ProjectReference Include="..\Management\Management.csproj" />
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
using ContentUnderstanding.Common;
2+
using ContentUnderstanding.Common.Extensions;
3+
using ConversationalFieldExtraction.Interfaces;
4+
using ConversationalFieldExtraction.Services;
5+
using Microsoft.Extensions.Configuration;
6+
using Microsoft.Extensions.DependencyInjection;
7+
using Microsoft.Extensions.Hosting;
8+
using System.Text.Json;
9+
10+
namespace AzureAiContentUnderstanding.Tests
11+
{
12+
public class ConversationalConversationalFieldExtractionIntegrationTest
13+
{
14+
private readonly IConversationalFieldExtractionService service;
15+
16+
public ConversationalConversationalFieldExtractionIntegrationTest()
17+
{
18+
var host = Host.CreateDefaultBuilder()
19+
.ConfigureServices((context, services) =>
20+
{
21+
if (string.IsNullOrWhiteSpace(context.Configuration.GetValue<string>("AZURE_CU_CONFIG:Endpoint")))
22+
{
23+
throw new ArgumentException("Endpoint must be provided in appsettings.json.");
24+
}
25+
if (string.IsNullOrWhiteSpace(context.Configuration.GetValue<string>("AZURE_CU_CONFIG:ApiVersion")))
26+
{
27+
throw new ArgumentException("API version must be provided in appsettings.json.");
28+
}
29+
services.AddConfigurations(opts =>
30+
{
31+
context.Configuration.GetSection("AZURE_CU_CONFIG").Bind(opts);
32+
// This header is used for sample usage telemetry, please comment out this line if you want to opt out.
33+
opts.UserAgent = "azure-ai-content-understanding-dotnet/conversational_field_extraction";
34+
});
35+
services.AddTokenProvider();
36+
services.AddHttpClient<AzureContentUnderstandingClient>();
37+
services.AddSingleton<IConversationalFieldExtractionService, ConversationalFieldExtractionService>();
38+
})
39+
.Build();
40+
41+
service = host.Services.GetService<IConversationalFieldExtractionService>()!;
42+
}
43+
44+
[Fact(DisplayName = "Conversational Field Extraction Integration Test")]
45+
[Trait("Category", "Integration")]
46+
public async Task RunAsync()
47+
{
48+
Exception? serviceException = null;
49+
try
50+
{
51+
var ExtractionTemplates = new Dictionary<string, (string, string)>
52+
{
53+
{ "call_recording_pretranscribe_batch", ("./analyzer_templates/call_recording_analytics_text.json", "./data/batch_pretranscribed.json") },
54+
{ "call_recording_pretranscribe_fast", ("./analyzer_templates/call_recording_analytics_text.json", "./data/fast_pretranscribed.json") },
55+
{ "call_recording_pretranscribe_cu", ("./analyzer_templates/call_recording_analytics_text.json", "./data/cu_pretranscribed.json") }
56+
};
57+
var analyzerId = $"conversational-field-extraction-sample-{Guid.NewGuid()}";
58+
59+
foreach (var item in ExtractionTemplates)
60+
{
61+
// Extract the template path and sample file path from the dictionary
62+
var (analyzerTemplatePath, analyzerSampleFilePath) = ExtractionTemplates[item.Key];
63+
64+
// Create the analyzer from the template
65+
await CreateAnalyzerFromTemplateAsync(analyzerId, analyzerTemplatePath);
66+
67+
// Extract fields using the created analyzer
68+
await ExtractFieldsWithAnalyzerAsync(analyzerId, analyzerSampleFilePath);
69+
70+
// Clean up the analyzer after use
71+
await service.DeleteAnalyzerAsync(analyzerId);
72+
}
73+
}
74+
catch (Exception ex)
75+
{
76+
serviceException = ex;
77+
}
78+
79+
// Assert that no exceptions were thrown during the test.
80+
Assert.Null(serviceException);
81+
}
82+
83+
private async Task CreateAnalyzerFromTemplateAsync(string analyzerId, string analyzerTemplatePath)
84+
{
85+
// Implementation for creating an analyzer from a template
86+
var resultJson = await service.CreateAnalyzerFromTemplateAsync(analyzerId, analyzerTemplatePath);
87+
Assert.NotNull(resultJson);
88+
Assert.True(resultJson.RootElement.TryGetProperty("result", out JsonElement result));
89+
Assert.True(result.TryGetProperty("warnings", out var warnings));
90+
Assert.False(warnings.EnumerateArray().Any(), "The warnings array should be empty");
91+
Assert.True(result.TryGetProperty("status", out JsonElement status));
92+
Assert.Equal("ready", status.ToString());
93+
Assert.True(result.TryGetProperty("mode", out JsonElement mode));
94+
Assert.Equal("standard", mode.ToString());
95+
Assert.True(result.TryGetProperty("fieldSchema", out JsonElement fieldSchema));
96+
Assert.True(fieldSchema.TryGetProperty("fields", out JsonElement fields));
97+
Assert.True(!string.IsNullOrWhiteSpace(fields.GetRawText()));
98+
}
99+
100+
private async Task ExtractFieldsWithAnalyzerAsync(string analyzerId, string analyzerSampleFilePath)
101+
{
102+
// Implementation for extracting fields using the created analyzer
103+
var resultJson = await service.ExtractFieldsWithAnalyzerAsync(analyzerId, analyzerSampleFilePath);
104+
Assert.NotNull(resultJson);
105+
Assert.True(resultJson.RootElement.TryGetProperty("result", out JsonElement result));
106+
Assert.True(result.TryGetProperty("warnings", out var warnings));
107+
Assert.False(warnings.EnumerateArray().Any(), "The warnings array should be empty");
108+
Assert.True(result.TryGetProperty("contents", out JsonElement contents));
109+
Assert.True(contents[0].TryGetProperty("markdown", out JsonElement markdown));
110+
Assert.True(!string.IsNullOrWhiteSpace(markdown.GetRawText()));
111+
Assert.True(contents[0].TryGetProperty("fields", out JsonElement fields));
112+
Assert.True(!string.IsNullOrWhiteSpace(fields.GetRawText()));
113+
}
114+
}
115+
}

AzureAiContentUnderstanding.Tests/FieldExtractionIntegrationTest.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,12 @@ public async Task RunAsync()
9090
{ "marketing_video", ("./analyzer_templates/marketing_video.json", "./data/FlightSimulator.mp4") }
9191
};
9292

93-
string field_extraction_analyzerId = $"field-extraction-sample-{Guid.NewGuid()}";
93+
string analyzerId = $"field-extraction-sample-{Guid.NewGuid()}";
9494

9595
foreach (var item in ExtractionTemplates)
9696
{
9797
var (analyzerTemplatePath, analyzerSampleFilePath) = ExtractionTemplates[item.Key];
98-
JsonDocument resultJson = await service.CreateAndUseAnalyzer(field_extraction_analyzerId, analyzerTemplatePath, analyzerSampleFilePath);
98+
JsonDocument resultJson = await service.CreateAndUseAnalyzer(analyzerId, analyzerTemplatePath, analyzerSampleFilePath);
9999

100100
Assert.NotNull(resultJson);
101101
Assert.True(resultJson.RootElement.TryGetProperty("result", out JsonElement result));

AzureAiContentUnderstandingDotNet.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FieldExtractionProMode", "F
2323
EndProject
2424
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{509D6291-0C37-40FA-9571-D04591158A66}"
2525
EndProject
26+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConversationalFieldExtraction", "ConversationalFieldExtraction\ConversationalFieldExtraction.csproj", "{FDADE6F4-750B-4DBB-2708-F1BFB872B3E4}"
27+
EndProject
2628
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureAiContentUnderstanding.Tests", "AzureAiContentUnderstanding.Tests\AzureAiContentUnderstanding.Tests.csproj", "{9A1205B7-852F-27D9-5441-AC948FF91008}"
2729
EndProject
2830
Global
@@ -63,6 +65,10 @@ Global
6365
{F88600E8-1ECA-60CB-B6C6-93950688E59C}.Debug|Any CPU.Build.0 = Debug|Any CPU
6466
{F88600E8-1ECA-60CB-B6C6-93950688E59C}.Release|Any CPU.ActiveCfg = Release|Any CPU
6567
{F88600E8-1ECA-60CB-B6C6-93950688E59C}.Release|Any CPU.Build.0 = Release|Any CPU
68+
{FDADE6F4-750B-4DBB-2708-F1BFB872B3E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
69+
{FDADE6F4-750B-4DBB-2708-F1BFB872B3E4}.Debug|Any CPU.Build.0 = Debug|Any CPU
70+
{FDADE6F4-750B-4DBB-2708-F1BFB872B3E4}.Release|Any CPU.ActiveCfg = Release|Any CPU
71+
{FDADE6F4-750B-4DBB-2708-F1BFB872B3E4}.Release|Any CPU.Build.0 = Release|Any CPU
6672
{9A1205B7-852F-27D9-5441-AC948FF91008}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
6773
{9A1205B7-852F-27D9-5441-AC948FF91008}.Debug|Any CPU.Build.0 = Debug|Any CPU
6874
{9A1205B7-852F-27D9-5441-AC948FF91008}.Release|Any CPU.ActiveCfg = Release|Any CPU
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace ContentUnderstanding.Common.Models
8+
{
9+
public class PretranscribedData
10+
{
11+
public List<Segment> Segments { get; set; } = [];
12+
}
13+
14+
public class Segment
15+
{
16+
public float Start { get; set; }
17+
18+
public float End { get; set; }
19+
20+
public string? Text { get; set; }
21+
}
22+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net8.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
<OutputType>Exe</OutputType>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<PackageReference Include="Microsoft.Extensions.Hosting" Version="9.0.7" />
12+
<PackageReference Include="Microsoft.Extensions.Http" Version="9.0.8" />
13+
</ItemGroup>
14+
15+
<ItemGroup>
16+
<ProjectReference Include="..\ContentUnderstanding.Common\ContentUnderstanding.Common.csproj" />
17+
</ItemGroup>
18+
19+
</Project>
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Text.Json;
6+
using System.Threading.Tasks;
7+
8+
namespace ConversationalFieldExtraction.Interfaces
9+
{
10+
public interface IConversationalFieldExtractionService
11+
{
12+
/// <summary>
13+
/// Creates a new analyzer from a specified template file and polls for the completion of the creation operation.
14+
/// This method initiates the analyzer creation process using the Azure Content Understanding service and waits
15+
/// for the operation to complete before returning. The analyzer can then be used for conversational field extraction.
16+
/// </summary>
17+
/// <param name="analyzerId">The unique identifier for the analyzer to be created. This value must not be null or empty.</param>
18+
/// <param name="analyzerTemplatePath">The file path to the analyzer template. This value must point to a valid template file and must not be null or
19+
/// empty.</param>
20+
/// <returns>A task representing the asynchronous operation.</returns>
21+
Task<JsonDocument> CreateAnalyzerFromTemplateAsync(string analyzerId, string analyzerTemplatePath);
22+
23+
/// <summary>
24+
/// Extracts structured fields from conversation data using a specified analyzer.
25+
/// This method converts the input JSON file to WebVTT format, validates the conversion,
26+
/// and then uses the Azure Content Understanding service to analyze the conversation
27+
/// and extract structured field data based on the analyzer's configuration.
28+
/// </summary>
29+
/// <param name="analyzerId">The unique identifier of the analyzer to use for field extraction. Must not be null or empty.</param>
30+
/// <param name="filePath">The file path to the JSON conversation data file to be analyzed. Must point to a valid JSON file and must not be null or empty.</param>
31+
/// <returns>
32+
/// A task that represents the asynchronous operation. The task result contains a <see cref="JsonDocument"/>
33+
/// with the extracted field data if successful, or null if the WebVTT conversion fails or is invalid.
34+
/// </returns>
35+
/// <exception cref="Exception">Thrown when there is an error during the field extraction process, including issues with file conversion, analyzer communication, or result polling.</exception>
36+
/// <remarks>
37+
/// The method performs the following operations:
38+
/// 1. Converts the input JSON file to WebVTT format for conversation analysis
39+
/// 2. Validates that the conversion produced valid WebVTT content
40+
/// 3. Initiates analysis using the specified analyzer through the Azure Content Understanding service
41+
/// 4. Polls for the completion of the analysis operation
42+
/// 5. Returns the structured field extraction results as JSON
43+
/// </remarks>
44+
Task<JsonDocument?> ExtractFieldsWithAnalyzerAsync(string analyzerId, string sampleFilePath);
45+
46+
/// <summary>
47+
/// Clean Up
48+
/// <remarks>Optionally, delete the sample analyzer from your resource. In typical usage scenarios, you would analyze multiple files using the same analyzer.</remarks>
49+
/// </summary>
50+
/// <param name="analyzerId">The unique identifier of the analyzer to delete. Cannot be null or empty.</param>
51+
/// <returns>A task that represents the asynchronous delete operation.</returns>
52+
Task DeleteAnalyzerAsync(string analyzerId);
53+
}
54+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
using ContentUnderstanding.Common;
2+
using ContentUnderstanding.Common.Extensions;
3+
using ConversationalFieldExtraction.Interfaces;
4+
using ConversationalFieldExtraction.Services;
5+
using Microsoft.Extensions.Configuration;
6+
using Microsoft.Extensions.DependencyInjection;
7+
using Microsoft.Extensions.Hosting;
8+
9+
namespace ConversationalFieldExtraction
10+
{
11+
public class Program
12+
{
13+
public static async Task Main(string[] args)
14+
{
15+
var host = Host.CreateDefaultBuilder(args)
16+
.ConfigureServices((context, services) =>
17+
{
18+
if (string.IsNullOrWhiteSpace(context.Configuration.GetValue<string>("AZURE_CU_CONFIG:Endpoint")))
19+
{
20+
throw new ArgumentException("Endpoint must be provided in appsettings.json.");
21+
}
22+
23+
if (string.IsNullOrWhiteSpace(context.Configuration.GetValue<string>("AZURE_CU_CONFIG:ApiVersion")))
24+
{
25+
throw new ArgumentException("API version must be provided in appsettings.json.");
26+
}
27+
28+
services.AddConfigurations(opts =>
29+
{
30+
context.Configuration.GetSection("AZURE_CU_CONFIG").Bind(opts);
31+
// This header is used for sample usage telemetry, please comment out this line if you want to opt out.
32+
opts.UserAgent = "azure-ai-content-understanding-dotnet/conversational_field_extraction";
33+
});
34+
services.AddTokenProvider();
35+
services.AddHttpClient<AzureContentUnderstandingClient>();
36+
services.AddSingleton<IConversationalFieldExtractionService, ConversationalFieldExtractionService>();
37+
})
38+
.Build();
39+
40+
var service = host.Services.GetService<IConversationalFieldExtractionService>()!;
41+
42+
var ExtractionTemplates = new Dictionary<string, (string, string)>
43+
{
44+
{ "call_recording_pretranscribe_batch", ("./analyzer_templates/call_recording_analytics_text.json", "./data/batch_pretranscribed.json") },
45+
{ "call_recording_pretranscribe_fast", ("./analyzer_templates/call_recording_analytics_text.json", "./data/fast_pretranscribed.json") },
46+
{ "call_recording_pretranscribe_cu", ("./analyzer_templates/call_recording_analytics_text.json", "./data/cu_pretranscribed.json") }
47+
};
48+
49+
var analyzerId = $"conversational-field-extraction-sample-{Guid.NewGuid()}";
50+
51+
foreach (var item in ExtractionTemplates)
52+
{
53+
// Extract the template path and sample file path from the dictionary
54+
var (analyzerTemplatePath, analyzerSampleFilePath) = ExtractionTemplates[item.Key];
55+
56+
// Create the analyzer from the template
57+
await service.CreateAnalyzerFromTemplateAsync(analyzerId, analyzerTemplatePath);
58+
59+
// Extract fields using the created analyzer
60+
await service.ExtractFieldsWithAnalyzerAsync(analyzerId, analyzerSampleFilePath);
61+
62+
// Clean up the analyzer after use
63+
await service.DeleteAnalyzerAsync(analyzerId);
64+
}
65+
}
66+
}
67+
}

0 commit comments

Comments
 (0)