Skip to content

Commit 5a39cad

Browse files
authored
plt add nlp case, test=model (#2998)
* plt update, test=model * plt update, test=model * plt update, test=model * plt update testing reporter, test=model * plt update testing reporter, test=model * plt update testing reporter, test=model * plt update test reporter, test=model * plt update test reporter, test=model * plt update test reporter, test=model * plt update test reporter, test=model * plt update test reporter, test=model * plt update test reporter, test=model * plt add pr_info * plt fix ocr case, test=model * plt fix ocr case, test=model * plt add nlp case, test=model * plt add nlp case, test=model * plt add nlp case, test=model * plt add nlp case, test=model * plt add nlp case, test=model * plt add more nlp case, test=model * plt add more nlp case, test=model * plt add nlp, test=model * plt fix case, test=model
1 parent 7815d61 commit 5a39cad

File tree

172 files changed

+2261
-18
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

172 files changed

+2261
-18
lines changed

framework/e2e/PaddleLT_new/engine/paddle_train.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ def _get_data_grad(self, data):
119119
"""记录list[inputs...]中的input.grad并生成list[input.grad...]"""
120120
data_grad = []
121121
for i in data:
122-
data_grad.append(i.grad)
122+
if isinstance(i, paddle.Tensor):
123+
data_grad.append(i.grad)
123124
return data_grad
124125

125126
def dy_train(self):

framework/e2e/PaddleLT_new/generator/builder_data.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@
1616
import layercase
1717

1818
if os.environ.get("USE_PADDLE_MODEL", "None") == "PaddleOCR":
19-
import layerModelcase
19+
import layerOCRcase
2020
import PaddleOCR
21+
elif os.environ.get("USE_PADDLE_MODEL", "None") == "PaddleNLP":
22+
import layerNLPcase
23+
import paddlenlp
2124
elif os.environ.get("FRAMEWORK") == "torch":
2225
import torch
2326
import layerTorchcase
@@ -40,16 +43,33 @@ def get_single_data(self):
4043
data = []
4144
# for i in eval(dataname):
4245
for i in getattr(self.layer_module, "create_numpy_inputs")():
43-
if os.environ.get("FRAMEWORK") == "paddle":
44-
if i.dtype == np.int64 or i.dtype == np.int32:
45-
data.append(paddle.to_tensor(i, stop_gradient=True))
46-
else:
47-
data.append(paddle.to_tensor(i, stop_gradient=False))
48-
elif os.environ.get("FRAMEWORK") == "torch":
49-
if i.dtype == np.int64 or i.dtype == np.int32:
50-
data.append(torch.tensor(i, requires_grad=False))
51-
else:
52-
data.append(torch.tensor(i, requires_grad=True))
46+
if isinstance(i, (tuple, list)): # 为了适配list输入的模型子图
47+
tmp = []
48+
for j in i:
49+
if os.environ.get("FRAMEWORK") == "paddle":
50+
if j.dtype == np.int64 or j.dtype == np.int32:
51+
tmp.append(paddle.to_tensor(j, stop_gradient=True))
52+
else:
53+
tmp.append(paddle.to_tensor(j, stop_gradient=False))
54+
elif os.environ.get("FRAMEWORK") == "torch":
55+
if j.dtype == np.int64 or j.dtype == np.int32:
56+
tmp.append(torch.tensor(j, requires_grad=False))
57+
else:
58+
tmp.append(torch.tensor(j, requires_grad=True))
59+
data.append(tmp)
60+
elif isinstance(i, np.ndarray):
61+
if os.environ.get("FRAMEWORK") == "paddle":
62+
if i.dtype == np.int64 or i.dtype == np.int32:
63+
data.append(paddle.to_tensor(i, stop_gradient=True))
64+
else:
65+
data.append(paddle.to_tensor(i, stop_gradient=False))
66+
elif os.environ.get("FRAMEWORK") == "torch":
67+
if i.dtype == np.int64 or i.dtype == np.int32:
68+
data.append(torch.tensor(i, requires_grad=False))
69+
else:
70+
data.append(torch.tensor(i, requires_grad=True))
71+
else:
72+
data.append(i)
5373
else:
5474
data = self.get_single_tensor()
5575

framework/e2e/PaddleLT_new/generator/builder_layer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414
import layercase
1515

1616
if os.environ.get("USE_PADDLE_MODEL", "None") == "PaddleOCR":
17-
import layerModelcase
17+
import layerOCRcase
1818
import PaddleOCR
19+
elif os.environ.get("USE_PADDLE_MODEL", "None") == "PaddleNLP":
20+
import layerNLPcase
21+
import paddlenlp
1922
elif os.environ.get("FRAMEWORK") == "torch":
2023
import torch
2124
import layerTorchcase

framework/e2e/PaddleLT_new/layerModelcase/__init__.py renamed to framework/e2e/PaddleLT_new/layerNLPcase/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
# 获取文件名(不含扩展名)
1616
module_name = os.path.basename(folder)
1717
# 导入模块
18-
__import__('layerModelcase.' + module_name, globals(), locals(), [])
18+
__import__('layerNLPcase.' + module_name, globals(), locals(), [])
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import os
2+
import glob
3+
4+
# 获取当前文件所在目录
5+
current_dir = os.path.dirname(__file__)
6+
7+
# 获取当前目录下所有的 .py 文件路径
8+
py_files = glob.glob(os.path.join(current_dir, "*.py"))
9+
10+
# 动态导入所有 .py 文件
11+
for py_file in py_files:
12+
# 获取文件名(不含扩展名)
13+
module_name = os.path.basename(py_file)[:-3]
14+
# 导入模块
15+
__import__("layerNLPcase.transformers.layoutlmv2." + module_name, globals(), locals(), [])
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import paddle
2+
import numpy as np
3+
from paddlenlp.transformers import LayoutLMv2Model, LayoutLMv2Tokenizer
4+
5+
def LayerCase():
6+
"""模型库中间态"""
7+
model = LayoutLMv2Model.from_pretrained('layoutlmv2-base-uncased')
8+
return model
9+
10+
def create_inputspec():
11+
inputspec = (
12+
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
13+
None,
14+
None,
15+
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
16+
)
17+
return inputspec
18+
19+
20+
def create_tensor_inputs():
21+
tokenizer = LayoutLMv2Tokenizer.from_pretrained('layoutlmv2-base-uncased')
22+
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
23+
inputs = (
24+
paddle.to_tensor(inputs_dict['input_ids'], stop_gradient=False),
25+
None,
26+
None,
27+
paddle.to_tensor(inputs_dict['token_type_ids'], stop_gradient=False),
28+
)
29+
return inputs
30+
31+
32+
def create_numpy_inputs():
33+
tokenizer = LayoutLMv2Tokenizer.from_pretrained('layoutlmv2-base-uncased')
34+
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
35+
inputs = (
36+
np.array([inputs_dict['input_ids']]),
37+
None,
38+
None,
39+
np.array([inputs_dict['token_type_ids']]),
40+
)
41+
return inputs
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import os
2+
import glob
3+
4+
# 获取当前文件所在目录
5+
current_dir = os.path.dirname(__file__)
6+
7+
# 获取当前目录下所有的 .py 文件路径
8+
py_files = glob.glob(os.path.join(current_dir, "*.py"))
9+
10+
# 动态导入所有 .py 文件
11+
for py_file in py_files:
12+
# 获取文件名(不含扩展名)
13+
module_name = os.path.basename(py_file)[:-3]
14+
# 导入模块
15+
__import__("layerNLPcase.transformers.layoutxlm." + module_name, globals(), locals(), [])
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import paddle
2+
import numpy as np
3+
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer
4+
5+
def LayerCase():
6+
"""模型库中间态"""
7+
model = LayoutXLMModel.from_pretrained('layoutxlm-base-uncased')
8+
return model
9+
10+
def create_inputspec():
11+
inputspec = (
12+
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
13+
None,
14+
None,
15+
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
16+
)
17+
return inputspec
18+
19+
20+
def create_tensor_inputs():
21+
tokenizer = LayoutXLMTokenizer.from_pretrained('layoutxlm-base-uncased')
22+
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
23+
inputs = (
24+
paddle.to_tensor(inputs_dict['input_ids'], stop_gradient=False),
25+
None,
26+
None,
27+
paddle.to_tensor(inputs_dict['token_type_ids'], stop_gradient=False),
28+
)
29+
return inputs
30+
31+
32+
def create_numpy_inputs():
33+
tokenizer = LayoutXLMTokenizer.from_pretrained('layoutxlm-base-uncased')
34+
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
35+
inputs = (
36+
np.array([inputs_dict['input_ids']]),
37+
None,
38+
None,
39+
np.array([inputs_dict['token_type_ids']]),
40+
)
41+
return inputs

framework/e2e/PaddleLT_new/layerModelcase/Ocr_cases/backbones/__init__.py renamed to framework/e2e/PaddleLT_new/layerNLPcase/debug/case_bug/transformers/reformer/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
# 获取文件名(不含扩展名)
1313
module_name = os.path.basename(py_file)[:-3]
1414
# 导入模块
15-
__import__("layerModelcase.Ocr_cases.backbones." + module_name, globals(), locals(), [])
15+
__import__("layerNLPcase.transformers.reformer." + module_name, globals(), locals(), [])
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import paddle
2+
import numpy as np
3+
from paddlenlp.transformers import ReformerModel, ReformerTokenizer
4+
5+
def LayerCase():
6+
"""模型库中间态"""
7+
model = ReformerModel.from_pretrained('reformer-crime-and-punishment')
8+
return model
9+
10+
def create_inputspec():
11+
inputspec = (
12+
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
13+
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
14+
)
15+
return inputspec
16+
17+
18+
def create_tensor_inputs():
19+
tokenizer = ReformerTokenizer.from_pretrained('reformer-crime-and-punishment')
20+
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
21+
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
22+
return inputs
23+
24+
25+
def create_numpy_inputs():
26+
tokenizer = ReformerTokenizer.from_pretrained('reformer-crime-and-punishment')
27+
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
28+
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
29+
return inputs

0 commit comments

Comments
 (0)