Skip to content

Commit 616480a

Browse files
author
Yanan Wang
committed
Add FuzzySharp for NER
1 parent 904b9ad commit 616480a

25 files changed

+13483
-409
lines changed

BotSharp.sln

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ExcelHandle
149149
EndProject
150150
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ImageHandler", "src\Plugins\BotSharp.Plugin.ImageHandler\BotSharp.Plugin.ImageHandler.csproj", "{242F2D93-FCCE-4982-8075-F3052ECCA92C}"
151151
EndProject
152+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.FuzzySharp", "src\Plugins\BotSharp.Plugin.FuzzySharp\BotSharp.Plugin.FuzzySharp.csproj", "{E7C243B9-E751-B3B4-8F16-95C76CA90D31}"
153+
EndProject
152154
Global
153155
GlobalSection(SolutionConfigurationPlatforms) = preSolution
154156
Debug|Any CPU = Debug|Any CPU
@@ -629,6 +631,14 @@ Global
629631
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|Any CPU.Build.0 = Release|Any CPU
630632
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.ActiveCfg = Release|Any CPU
631633
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.Build.0 = Release|Any CPU
634+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
635+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.Build.0 = Debug|Any CPU
636+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.ActiveCfg = Debug|Any CPU
637+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.Build.0 = Debug|Any CPU
638+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.ActiveCfg = Release|Any CPU
639+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.Build.0 = Release|Any CPU
640+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.ActiveCfg = Release|Any CPU
641+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.Build.0 = Release|Any CPU
632642
EndGlobalSection
633643
GlobalSection(SolutionProperties) = preSolution
634644
HideSolutionNode = FALSE
@@ -701,6 +711,7 @@ Global
701711
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
702712
{FC63C875-E880-D8BB-B8B5-978AB7B62983} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
703713
{242F2D93-FCCE-4982-8075-F3052ECCA92C} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
714+
{E7C243B9-E751-B3B4-8F16-95C76CA90D31} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
704715
EndGlobalSection
705716
GlobalSection(ExtensibilityGlobals) = postSolution
706717
SolutionGuid = {A9969D89-C98B-40A5-A12B-FC87E55B3A19}

src/Infrastructure/BotSharp.Abstraction/FuzzSharp/Arguments/TextAnalysisRequest.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ public class TextAnalysisRequest
1313
/// <summary>
1414
/// Folder path containing CSV files (will read all .csv files from the folder or its 'output' subfolder)
1515
/// </summary>
16-
[JsonPropertyName("vocabulary_folder_path")]
17-
public string? VocabularyFolderPath { get; set; }
16+
[JsonPropertyName("vocabulary_folder_name")]
17+
public string? VocabularyFolderName { get; set; }
1818

1919
/// <summary>
2020
/// Domain term mapping CSV file

src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFramework>net8.0</TargetFramework>
5-
<ImplicitUsings>enable</ImplicitUsings>
4+
<TargetFramework>$(TargetFramework)</TargetFramework>
65
<Nullable>enable</Nullable>
6+
<LangVersion>$(LangVersion)</LangVersion>
7+
<VersionPrefix>$(BotSharpVersion)</VersionPrefix>
8+
<GeneratePackageOnBuild>$(GeneratePackageOnBuild)</GeneratePackageOnBuild>
9+
<GenerateDocumentationFile>$(GenerateDocumentationFile)</GenerateDocumentationFile>
10+
<OutputPath>$(SolutionDir)packages</OutputPath>
711
</PropertyGroup>
812

913
<ItemGroup>

src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/FuzzyMatcher.cs

Lines changed: 0 additions & 257 deletions
This file was deleted.

src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,23 @@ public static class TextConstants
88
/// Includes: parentheses, brackets, braces, punctuation marks, special symbols, etc.
99
/// This ensures "(IH)" is split into "(", "IH", ")"
1010
/// </summary>
11-
public static readonly char[] TokenSeparationChars =
11+
public static readonly char[] SeparatorChars =
1212
{
1313
// Parentheses and brackets
1414
'(', ')', '[', ']', '{', '}',
1515
// Punctuation marks
1616
',', '.', ';', ':', '!', '?',
1717
// Special symbols
18-
'=', '@', '#', '$', '%', '^', '&', '*', '+', '-', '/', '\\', '|', '<', '>', '~', '`'
18+
'=', '@', '#', '$', '%', '^', '&', '*', '+', '-', '\\', '|', '<', '>', '~', '`'
1919
};
2020

2121
/// <summary>
22-
/// Text separators used for tokenization and n-gram processing
23-
/// Includes: equals, colon, semicolon, question mark, exclamation mark, comma, period
22+
/// Whitespace characters used as token separators during tokenization.
23+
/// Includes: space, tab, newline, and carriage return.
2424
/// </summary>
25-
public static readonly char[] SeparatorChars = { '=', ':', ';', '?', '!', ',', '.' };
25+
public static readonly char[] TokenSeparators =
26+
{
27+
' ', '\t', '\n', '\r'
28+
};
2629
}
2730
}

0 commit comments

Comments
 (0)