From 8337bdc31918aa5c105058800dfddecf8fd6c933 Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 3 Nov 2023 16:38:28 +0800
Subject: [PATCH 1/3] huggingface using local path

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 nlp/question_answering/bert/pytorch/README.md | 33 ++++++++++++-------
 nlp/question_answering/bert/pytorch/run.sh    |  2 +-
 .../bert/pytorch/run_dist.sh                  |  2 +-
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/nlp/question_answering/bert/pytorch/README.md b/nlp/question_answering/bert/pytorch/README.md
index 5adbdb524..526de8681 100644
--- a/nlp/question_answering/bert/pytorch/README.md
+++ b/nlp/question_answering/bert/pytorch/README.md
@@ -4,32 +4,43 @@
 
 BERT-base SQuAD task Fine-tuning
 
-## Step 1: Installing packages
+## Step 1: Installation
 
-``` shell
-cd  <your_project_path>/nlp/querstion_answering/bert/pytorch
+``` bash
 pip3 install -r requirements.txt
 ```
 
-## Step 2: Training
+## Step 2: Preparing datasets
+```bash
+# Get "bert-base-uncased" from [Huggingface](https://huggingface.co/bert-base-uncased)
 
-### On single GPU
+## Install lfs
+wget https://packagecloud.io/github/git-lfs/packages/el/7/git-lfs-2.13.2-1.el7.x86_64.rpm/download -O lfs.rpm
+rpm -ivh lfs.rpm
 
-``` shell
-bash run.sh
+## Clone from Huggingface, it may take long time for large file
+git lfs install
+git clone https://huggingface.co/bert-base-uncased
 ```
 
-### Multiple GPUs on one machine
+## Step 3: Training
+
+**Make sure you've got "bert-base-uncased" ready in `./bert-base-uncased`**
 
-```shell
+```bash
+# On single GPU
+bash run.sh
+
+# Multiple GPUs on one machine
 bash run_dist.sh
 ```
+
 ## Results on BI-V100
 
 | GPUs | Samples/s | F1     |
 |------|-----------|--------|
-| 1x1  | 128.86    | 87     |
-| 1x8  | 208.6     | 78.69  |
+| BI-V100 x1 | 128.86 | 87 | 
+| BI-V100 x8 | 208.6 | 78.69 |
 
 ## Reference
 https://github.com/huggingface/
diff --git a/nlp/question_answering/bert/pytorch/run.sh b/nlp/question_answering/bert/pytorch/run.sh
index 921bd2e31..d64e59c6f 100644
--- a/nlp/question_answering/bert/pytorch/run.sh
+++ b/nlp/question_answering/bert/pytorch/run.sh
@@ -15,7 +15,7 @@
 
 
 python3 run_qa.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path ./bert-base-uncased \
   --dataset_name squad \
   --do_train \
   --do_eval \
diff --git a/nlp/question_answering/bert/pytorch/run_dist.sh b/nlp/question_answering/bert/pytorch/run_dist.sh
index 4bc503363..28a5c9851 100644
--- a/nlp/question_answering/bert/pytorch/run_dist.sh
+++ b/nlp/question_answering/bert/pytorch/run_dist.sh
@@ -15,7 +15,7 @@
 
 python3  -m torch.distributed.launch --nproc_per_node=8 --master_port 12333 \
   run_qa.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path ./bert-base-uncased \
   --dataset_name squad \
   --do_train \
   --do_eval \
-- 
Gitee


From 1da8c0794cdbfff0a9ade90830dfcadb78f70b48 Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 3 Nov 2023 17:19:17 +0800
Subject: [PATCH 2/3] update bert ner

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 nlp/ner/bert/pytorch/README.md                | 37 +++++++++++++------
 nlp/ner/bert/pytorch/run.sh                   |  2 +-
 nlp/ner/bert/pytorch/run_dist.sh              |  2 +-
 nlp/question_answering/bert/pytorch/README.md |  2 +-
 4 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/nlp/ner/bert/pytorch/README.md b/nlp/ner/bert/pytorch/README.md
index c528ec1b0..0d3d06e12 100644
--- a/nlp/ner/bert/pytorch/README.md
+++ b/nlp/ner/bert/pytorch/README.md
@@ -4,32 +4,45 @@
 
 BERT-base NER task Fine-tuning
 
-## Step 1: Installing packages
+## Step 1: Installation
 
-``` shell
-cd  <your_project_path>/nlp/ner/bert/pytorch
+```bash
 pip3 install -r requirements.txt
 ```
 
-## Step 2: Training
+## Step 2: Preparing datasets
 
-### On single GPU
+```bash
+# Get "bert-base-uncased" from [Huggingface](https://huggingface.co/bert-base-uncased)
 
-``` shell
-bash run.sh
+## Install lfs
+wget https://packagecloud.io/github/git-lfs/packages/el/7/git-lfs-2.13.2-1.el7.x86_64.rpm/download -O lfs.rpm
+rpm -ivh lfs.rpm
+
+## Clone from Huggingface, it may take long time for large file
+git lfs install
+git clone https://huggingface.co/bert-base-uncased
 ```
 
-### Multiple GPUs on one machine
 
-```shell
+## Step 3: Training
+
+**Make sure you've got "bert-base-uncased" ready in `./bert-base-uncased`**
+
+```bash
+# On single GPU
+bash run.sh
+
+# Multiple GPUs on one machine
 bash run_dist.sh
 ```
-## Results on BI-V100
+
+## Results
 
 | GPUs | Samples/s  | Loss |
 |------|------|----|
-| 1x1  | 100 | 0.0696 |
-| 1x8  | 252 | 0.0688 |
+| BI-V100 x1 | 100 | 0.0696 |
+| BI-V100 x8 | 252 | 0.0688 |
 
 ## Reference
 https://github.com/huggingface/
diff --git a/nlp/ner/bert/pytorch/run.sh b/nlp/ner/bert/pytorch/run.sh
index 2dd49117d..2e32b7e79 100755
--- a/nlp/ner/bert/pytorch/run.sh
+++ b/nlp/ner/bert/pytorch/run.sh
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 python3 run_ner.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path ./bert-base-uncased \
   --dataset_name conll2003 \
   --output_dir /tmp/test-ner \
   --do_train \
diff --git a/nlp/ner/bert/pytorch/run_dist.sh b/nlp/ner/bert/pytorch/run_dist.sh
index c55803c15..f80a1ef8f 100755
--- a/nlp/ner/bert/pytorch/run_dist.sh
+++ b/nlp/ner/bert/pytorch/run_dist.sh
@@ -15,7 +15,7 @@
 
 python3 -m torch.distributed.launch --nproc_per_node=8 --master_port 12333  \
   run_ner.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path ./bert-base-uncased \
   --dataset_name conll2003 \
   --output_dir /tmp/test-ner \
   --do_train \
diff --git a/nlp/question_answering/bert/pytorch/README.md b/nlp/question_answering/bert/pytorch/README.md
index 526de8681..9f525da3e 100644
--- a/nlp/question_answering/bert/pytorch/README.md
+++ b/nlp/question_answering/bert/pytorch/README.md
@@ -20,7 +20,7 @@ rpm -ivh lfs.rpm
 
 ## Clone from Huggingface, it may take long time for large file
 git lfs install
-git clone https://huggingface.co/bert-base-uncased
+git clone --depth 1 https://huggingface.co/bert-base-uncased
 ```
 
 ## Step 3: Training
-- 
Gitee


From 72c3d9d38c1a21547b8622f30f62995eeb8e72ac Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 3 Nov 2023 17:44:21 +0800
Subject: [PATCH 3/3] update bert text classification

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../bert/pytorch/README.md                    | 35 +++++++++++++------
 nlp/text_classification/bert/pytorch/train.sh |  2 +-
 .../bert/pytorch/train_dist.sh                |  2 +-
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/nlp/text_classification/bert/pytorch/README.md b/nlp/text_classification/bert/pytorch/README.md
index f1c8e77a6..c80c82085 100644
--- a/nlp/text_classification/bert/pytorch/README.md
+++ b/nlp/text_classification/bert/pytorch/README.md
@@ -4,32 +4,45 @@
 
 BERT-base WNLI task Fine-tuning
 
-## Step 1: Installing packages
+## Step 1: Installation
 
 ``` shell
-cd  <your_project_path>/nlp/text_classification/bert/pytorch
 pip3 install -r requirements.txt
 ```
 
-## Step 2: Training
+## Step 2: Preparing datasets
 
-### On single GPU
+```bash
+# Get "bert-base-uncased" from [Huggingface](https://huggingface.co/bert-base-uncased)
 
-``` shell
-bash train.sh
+## Install lfs
+wget https://packagecloud.io/github/git-lfs/packages/el/7/git-lfs-2.13.2-1.el7.x86_64.rpm/download -O lfs.rpm
+rpm -ivh lfs.rpm
+
+## Clone from Huggingface, it may take long time for large file
+git lfs install
+git clone https://huggingface.co/bert-base-uncased
 ```
 
-### Multiple GPUs on one machine
+## Step 3: Training
+
+**Make sure you've got "bert-base-uncased" ready in `./bert-base-uncased`**
 
-```shell
+
+```bash
+# On single GPU
+bash train.sh
+
+# Multiple GPUs on one machine
 bash train_dist.sh
 ```
-## Results on BI-V100
+
+## Results
 
 | GPUs | Samples/s | Loss |
 |------|-----------|------|
-| 1x1  | 144.5     | 0.74 |
-| 1x8  | 322.74    | 0.71 |
+| BI-V100 x1 | 144.5 | 0.74 |
+| BI-V100 x8 | 322.74 | 0.71 |
 
 ## Reference
 https://github.com/huggingface/
diff --git a/nlp/text_classification/bert/pytorch/train.sh b/nlp/text_classification/bert/pytorch/train.sh
index 238abee2f..abedf907c 100644
--- a/nlp/text_classification/bert/pytorch/train.sh
+++ b/nlp/text_classification/bert/pytorch/train.sh
@@ -16,7 +16,7 @@
 export TASK_NAME=WNLI
 
 python3 run_glue.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path ./bert-base-cased \
   --task_name $TASK_NAME \
   --do_train \
   --do_eval \
diff --git a/nlp/text_classification/bert/pytorch/train_dist.sh b/nlp/text_classification/bert/pytorch/train_dist.sh
index 458f1cb0a..05794aa34 100644
--- a/nlp/text_classification/bert/pytorch/train_dist.sh
+++ b/nlp/text_classification/bert/pytorch/train_dist.sh
@@ -17,7 +17,7 @@
 export TASK_NAME=WNLI
 python3  -m torch.distributed.launch --nproc_per_node=8 --master_port 12333 \
   run_glue.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path ./bert-base-cased \
   --task_name $TASK_NAME \
   --do_train \
   --do_eval \
-- 
Gitee