Skip to content

Commit dc3ba44

Browse files
author
刘宇
committed
.ignore some file
1 parent 960d173 commit dc3ba44

27 files changed

+144
-7365
lines changed

.gitignore

+3-3
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,6 @@ dmypy.json
138138

139139
# Cython debug symbols
140140
cython_debug/
141-
.txt
142-
.json
143-
.jsonl
141+
data/
142+
datasets/
143+
runs/

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/ERNIE_pretrain/README.md

-7
This file was deleted.

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/bert_pretrain/README.md

-7
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data form url: https://tianchi.aliyun.com/competition/entrance/532044/information

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/models/bert.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import torch
22
import torch.nn as nn
3-
# from pytorch_pretrained_bert import BertModel, BertTokenizer
4-
from pytorch_pretrained import BertModel, BertTokenizer
3+
from transformers import BertModel, BertTokenizer
54

65

76
class Config(object):

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/preprocess.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@ def load_json(path):
88
return data
99

1010

11-
data_dir = '../../../dataset'
12-
# data_dir = 'dataset'
11+
data_dir = os.path.join(os.path.dirname(__file__), "data")
1312
train_set = load_json(os.path.join(data_dir, 'train.json'))
1413
dev_set = load_json(os.path.join(data_dir, 'dev.json'))
1514
test_set = load_json(os.path.join(data_dir, 'test.json'))
1615

17-
saved_path = 'THUCNews/data'
16+
saved_path = os.path.join(os.path.dirname(__file__), "data/process_data")
1817
os.makedirs(saved_path, exist_ok=True)
1918

2019
tags = [
@@ -33,14 +32,18 @@ def make_tag(path):
3332
f.write(tag + '\n')
3433

3534

36-
def make_data(samples, path):
35+
def make_data(samples, path, is_train=True):
3736
out = ''
38-
for pid, sample in samples.items():
39-
for sent in sample['dialogue']:
37+
for pid, sample in samples.items(): # sample is list
38+
for sent in sample:
4039
x = sent['speaker'] + ':' + sent['sentence']
41-
assert sent['dialogue_act'] in tag2id
42-
y = tag2id.get(sent['dialogue_act'])
43-
out += x + '\t' + str(y) + '\n'
40+
if is_train:
41+
assert sent['dialogue_act'] in tag2id
42+
y = tag2id.get(sent['dialogue_act'])
43+
else:
44+
y = ""
45+
out += (x + '\t' + str(y)).strip() + '\n'
46+
print(path)
4447
with open(path, 'w', encoding='utf-8') as f:
4548
f.write(out)
4649
return out
@@ -50,4 +53,4 @@ def make_data(samples, path):
5053

5154
make_data(train_set, os.path.join(saved_path, 'train.txt'))
5255
make_data(dev_set, os.path.join(saved_path, 'dev.txt'))
53-
make_data(test_set, os.path.join(saved_path, 'test.txt'))
56+
make_data(test_set, os.path.join(saved_path, 'test.txt'), is_train=False)

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/pytorch_pretrained/__init__.py

-24
This file was deleted.

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/pytorch_pretrained/__main__.py

-89
This file was deleted.

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/pytorch_pretrained/convert_gpt2_checkpoint_to_pytorch.py

-72
This file was deleted.

项目实战/医疗诊疗对话意图识别挑战赛/BERT-DAC/pytorch_pretrained/convert_openai_checkpoint_to_pytorch.py

-72
This file was deleted.

0 commit comments

Comments
 (0)