Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/2.6, added PaddleNLP.Lac #86

Merged
merged 28 commits into from
Apr 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions PaddleSharp.sln
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{026E4A25-9
docs\detection.md = docs\detection.md
docs\ocr.md = docs\ocr.md
docs\paddle2onnx.md = docs\paddle2onnx.md
docs\paddlenlp-lac.md = docs\paddlenlp-lac.md
docs\rotation-detection.md = docs\rotation-detection.md
EndProjectSection
EndProject
Expand All @@ -80,11 +81,17 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.Paddle2Onnx", "src\Sdc
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.Paddle2Onnx.Tests", "tests\Sdcb.Paddle2Onnx.Tests\Sdcb.Paddle2Onnx.Tests.csproj", "{0432D4F5-1F7E-4A6E-A6DC-4A04C0F8E497}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sdcb.PaddleOCR.Models.LocalV4", "src\Sdcb.PaddleOCR.Models.LocalV4\Sdcb.PaddleOCR.Models.LocalV4.csproj", "{604827F0-00CB-48DC-AD4E-06AE386CD96A}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleOCR.Models.LocalV4", "src\Sdcb.PaddleOCR.Models.LocalV4\Sdcb.PaddleOCR.Models.LocalV4.csproj", "{604827F0-00CB-48DC-AD4E-06AE386CD96A}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleOCR.Models.Shared", "src\Sdcb.PaddleOCR.Models.Shared\Sdcb.PaddleOCR.Models.Shared.csproj", "{EC79D45E-85D8-40E2-9F95-78AB40977770}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sdcb.PaddleOCR.Models.Local", "src\Sdcb.PaddleOCR.Models.Local\Sdcb.PaddleOCR.Models.Local.csproj", "{0172BD09-B617-4FF6-8221-883029AD4877}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleOCR.Models.Local", "src\Sdcb.PaddleOCR.Models.Local\Sdcb.PaddleOCR.Models.Local.csproj", "{0172BD09-B617-4FF6-8221-883029AD4877}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleNLP.Lac", "src\Sdcb.PaddleNLP.Lac\Sdcb.PaddleNLP.Lac.csproj", "{5756186D-613D-4656-B21D-822AB4DD9F8F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleNLP.Lac.Tests", "tests\Sdcb.PaddleNLP.Lac.Tests\Sdcb.PaddleNLP.Lac.Tests.csproj", "{19222033-C5E1-4326-A597-75CF2DE4007F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleNLP.Lac.Model", "src\Sdcb.PaddleNLP.Lac.Model\Sdcb.PaddleNLP.Lac.Model.csproj", "{640420BD-CE2D-4108-B82D-8B4544FC2FB0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -148,6 +155,18 @@ Global
{0172BD09-B617-4FF6-8221-883029AD4877}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0172BD09-B617-4FF6-8221-883029AD4877}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0172BD09-B617-4FF6-8221-883029AD4877}.Release|Any CPU.Build.0 = Release|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Release|Any CPU.Build.0 = Release|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Release|Any CPU.Build.0 = Release|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -172,6 +191,9 @@ Global
{604827F0-00CB-48DC-AD4E-06AE386CD96A} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{EC79D45E-85D8-40E2-9F95-78AB40977770} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{0172BD09-B617-4FF6-8221-883029AD4877} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{5756186D-613D-4656-B21D-822AB4DD9F8F} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{19222033-C5E1-4326-A597-75CF2DE4007F} = {CA2A775C-763B-4B69-AC5B-4F90DD668E4A}
{640420BD-CE2D-4108-B82D-8B4544FC2FB0} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {083C9A35-8781-4D12-8146-B08E4A61DA8E}
Expand Down
8 changes: 5 additions & 3 deletions build/00-common.linq
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,17 @@ static ProjectVersion[] Projects = new[]
new ProjectVersion("Sdcb.Onnx", "1.11.22.423"), // 1.11.22.423
new ProjectVersion("Sdcb.Mkldnn", "0.19"), // 0.19
new ProjectVersion("Sdcb.Paddle2Onnx", "1.0.0.2"), // 1.0.0-rc.2
new ProjectVersion("Sdcb.PaddleInference", "2.5.1"),
new ProjectVersion("Sdcb.PaddleInference", "2.6.0-preview.2"),
new ProjectVersion("Sdcb.PaddleOCR", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.Online", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.Shared", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.Local", "2.7.0"),
new ProjectVersion("Sdcb.PaddleOCR.Models.LocalV3", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.LocalV4", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleDetection", "2.3.3"),
new ProjectVersion("Sdcb.RotationDetector", "1.0.3"),
new ProjectVersion("Sdcb.PaddleDetection", "2.3.3"),
new ProjectVersion("Sdcb.RotationDetector", "1.0.3"),
new ProjectVersion("Sdcb.PaddleNLP.Lac", "1.0.0-preview.6"),
new ProjectVersion("Sdcb.PaddleNLP.Lac.Model", "1.0.0"),
};

static async Task DownloadFile(Uri uri, string localFile, CancellationToken cancellationToken = default)
Expand Down
10 changes: 5 additions & 5 deletions build/01-build-native.linq
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ async Task Main()
await SetupAsync(QueryCancelToken);
//await new LinuxNuGetSource().Process(QueryCancelToken);

string mklDnnUrl = "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/mkldnn.zip";
string mklDnnUrl = "https://paddle-inference-lib.bj.bcebos.com/2.6.0/cxx_c/Windows/CPU/x86-64_avx-mkl-vs2019/paddle_inference_c.zip";

await MakeWin64Onnx(mklDnnUrl, QueryCancelToken);
await MakeWin64Mkldnn(mklDnnUrl, QueryCancelToken);
await MakeWin64Paddle2Onnx(mklDnnUrl, QueryCancelToken);

await MakeWin64PaddleMkl("mkl", mklDnnUrl, QueryCancelToken);
await MakeWin64PaddleOpenblas("openblas", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas.zip", QueryCancelToken);
await MakeWin64PaddleOpenblas("openblas-noavx", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas-noavx.zip", QueryCancelToken);
await MakeWin64PaddleMkl("cuda102_cudnn76_tr72_sm61_75", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu102.zip", QueryCancelToken);
await MakeWin64PaddleMkl("cuda118_cudnn86_tr85_sm86_89", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu118.zip", QueryCancelToken);
//await MakeWin64PaddleOpenblas("openblas", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas.zip", QueryCancelToken);
//await MakeWin64PaddleOpenblas("openblas-noavx", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas-noavx.zip", QueryCancelToken);
//await MakeWin64PaddleMkl("cuda102_cudnn76_tr72_sm61_75", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu102.zip", QueryCancelToken);
//await MakeWin64PaddleMkl("cuda118_cudnn86_tr85_sm86_89", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu118.zip", QueryCancelToken);
}

static Task MakeWin64PaddleOpenblas(string ridSuffix, string url, CancellationToken cancellationToken = default)
Expand Down
56 changes: 56 additions & 0 deletions docs/paddlenlp-lac.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Sdcb.PaddleNLP.Lac分词模型

## PaddleNLP Lac模型NuGet包

| 包名 💼 | 版本号 📌 | 描述 📚 |
| ------------------ | ---------------------------------------------------------------------------------------------------------------- | --------- |
| Sdcb.PaddleNLP.Lac | [![NuGet](https://img.shields.io/nuget/v/Sdcb.PaddleNLP.Lac.svg)](https://nuget.org/packages/Sdcb.PaddleNLP.Lac) | 模型自包含 |

# 使用方法及示例

## 需要安装的NuGet包
* Sdcb.PaddleNLP.Lac
* Sdcb.PaddleInference
* Sdcb.PaddleInference.runtime.win64.mkl

## 示例
## 1. 最简单的分词:
```csharp
string input = "我是中国人,我爱我的祖国。";
using ChineseSegmenter segmenter = new();
string[] result = segmenter.Segment(input);
Console.WriteLine(string.Join(",", result)); // 我,是,中国,人,,,我,爱,我的祖国,。
```

## 2. 词性标注:
```csharp
string input = "我爱北京天安门";
using ChineseSegmenter segmenter = new();
WordAndTag[] result = segmenter.Tagging(input);
string labels = string.Join(",", result.Select(x => x.Label));
string words = string.Join(",", result.Select(x => x.Word));
string tags = string.Join(",", result.Select(x => x.Tag));
Console.WriteLine(words); // 我,爱,北京,天安门
Console.WriteLine(labels); // r,v,LOC,LOC
Console.WriteLine(tags); // Pronoun,Verb,LocationName,LocationName
```

## 3. 自定义词库

```csharp
string input = "我爱北京天安门";
using ChineseSegmenter segmenter = new(new ()
{
CustomDictionary = new()
{
{ "北京天安门", WordTag.LocationName },
}
});
WordAndTag[] result = segmenter.Tagging(input);
string labels = string.Join(",", result.Select(x => x.Label));
string words = string.Join(",", result.Select(x => x.Word));
string tags = string.Join(",", result.Select(x => x.Tag));
Console.WriteLine(words); // 我,爱,北京天安门
Console.WriteLine(labels); // r,v,LOC
Console.WriteLine(tags); // Pronoun,Verb,LocationName
```
13 changes: 0 additions & 13 deletions src/Sdcb.PaddleInference/CompilerServices.cs

This file was deleted.

114 changes: 0 additions & 114 deletions src/Sdcb.PaddleInference/Native/PaddleNative.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,108 +16,6 @@ static PaddleNative()
#endif
}

private unsafe struct PdStringArray
{
#pragma warning disable CS0649
public nint Size;
public byte** Data;
#pragma warning restore CS0649

public readonly string[] ToArray()
{
var result = new string[Size];
for (int i = 0; i < Size; ++i)
{
result[i] = ((IntPtr)Data[i]).UTF8PtrToString()!;
}
return result;
}
}

/// <summary>
/// Wrapper for managing arrays of strings.
/// </summary>
public unsafe ref struct PdStringArrayWrapper
{
/// <summary>
/// Pointer to the managed stack array.
/// </summary>
public IntPtr ptr;

/// <summary>
/// Converts the array to an array of strings.
/// </summary>
/// <returns>The array of strings.</returns>
public readonly unsafe string[] ToArray()
{
return ((PdStringArray*)ptr)->ToArray();
}

/// <summary>
/// Releases the unmanaged resources used by the PdStringArrayWrapper,
/// and optionally releases the managed resources.
/// </summary>
public void Dispose()
{
PD_OneDimArrayCstrDestroy(ptr);
ptr = IntPtr.Zero;
}
}

private unsafe struct PdIntArray
{
public nint Size;
public int* Data;

public readonly int[] ToArray()
{
var result = new int[Size];
for (int i = 0; i < Size; ++i)
{
result[i] = Data[i];
}
return result;
}

public unsafe void Dispose()
{
fixed (PdIntArray* ptr = &this)
{
PD_OneDimArrayInt32Destroy((IntPtr)ptr);
}
}
}

/// <summary>
/// Wrapper for managing arrays of integers.
/// </summary>
public ref struct PdIntArrayWrapper
{
/// <summary>
/// Pointer to the managed stack array.
/// </summary>
public IntPtr ptr;

/// <summary>
/// Converts the array to an array of integers.
/// </summary>
/// <returns></returns>
public readonly unsafe int[] ToArray()
{
return ((PdIntArray*)ptr)->ToArray();
}

/// <summary>
/// Releases the unmanaged resources used by the PdIntArrayWrapper,
/// and optionally releases the managed resources.
/// </summary>
public void Dispose()
{
PD_OneDimArrayInt32Destroy(ptr);
ptr = IntPtr.Zero;
}
}

/// <summary>
/// Path of the Paddle Inference C library.
/// </summary>
Expand All @@ -127,16 +25,4 @@ public void Dispose()
#elif NETSTANDARD2_0_OR_GREATER || NET6_0_OR_GREATER || LINQPAD
@"paddle_inference_c";
#endif

[StructLayout(LayoutKind.Sequential)]
internal struct PdCStr
{
public uint Length;
public IntPtr Data;

public override readonly string? ToString()
{
return Data.ANSIToString((int)Length - 1);
}
}
}
Loading