Merge remote-tracking branch 'gitee/lyh' into lyh

NetManAIOps · Mar 18, 2024 · 03b5591 · 03b5591
2 parents 9cb0e09 + 7be1ec2
commit 03b5591
Show file tree

Hide file tree

Showing 1,408 changed files with 74,766 additions and 9,799 deletions.
diff --git a/.codespellrc b/.codespellrc
@@ -2,4 +2,4 @@
 skip = *.ipynb
 count =
 quiet-level = 3
-ignore-words-list = nd, ans, ques, rouge, softwares
+ignore-words-list = nd, ans, ques, rouge, softwares, wit
diff --git a/.github/ISSUE_TEMPLATE/1_bug-report.yml b/.github/ISSUE_TEMPLATE/1_bug-report.yml
@@ -6,7 +6,7 @@ body:
   - type: markdown
     attributes:
       value: |
-        For general questions or idea discussions, please post it to our [**Forum**](https://github.com/InternLM/opencompass/discussions).
+        For general questions or idea discussions, please post it to our [**Forum**](https://github.com/open-compass/opencompass/discussions).
         If you have already identified the reason, we strongly appreciate you creating a new PR according to [the tutorial](https://opencompass.readthedocs.io/en/master/community/CONTRIBUTING.html)!
         If you need our help, please fill in the following form to help us to identify the bug.
 
@@ -15,9 +15,9 @@ body:
       label: Prerequisite
       description: Please check the following items before creating a new issue.
       options:
-      - label: I have searched [Issues](https://github.com/InternLM/opencompass/issues/) and [Discussions](https://github.com/InternLM/opencompass/discussions) but cannot get the expected help.
+      - label: I have searched [Issues](https://github.com/open-compass/opencompass/issues/) and [Discussions](https://github.com/open-compass/opencompass/discussions) but cannot get the expected help.
         required: true
-      - label: The bug has not been fixed in the [latest version](https://github.com/InternLM/opencompass).
+      - label: The bug has not been fixed in the [latest version](https://github.com/open-compass/opencompass).
         required: true
 
   - type: dropdown

diff --git a/.github/ISSUE_TEMPLATE/2_feature-request.yml b/.github/ISSUE_TEMPLATE/2_feature-request.yml
@@ -6,7 +6,7 @@ body:
   - type: markdown
     attributes:
       value: |
-        For general questions or idea discussions, please post it to our [**Forum**](https://github.com/InternLM/opencompass/discussions).
+        For general questions or idea discussions, please post it to our [**Forum**](https://github.com/open-compass/opencompass/discussions).
         If you have already implemented the feature, we strongly appreciate you creating a new PR according to [the tutorial](https://opencompass.readthedocs.io/en/master/community/CONTRIBUTING.html)!
 
   - type: textarea

diff --git a/.github/ISSUE_TEMPLATE/3_bug-report_zh.yml b/.github/ISSUE_TEMPLATE/3_bug-report_zh.yml
@@ -7,7 +7,7 @@ body:
     attributes:
       value: |
         我们推荐使用英语模板 Bug report，以便你的问题帮助更多人。
-        如果需要询问一般性的问题或者想法，请在我们的[**论坛**](https://github.com/InternLM/opencompass/discussions)讨论。
+        如果需要询问一般性的问题或者想法，请在我们的[**论坛**](https://github.com/open-compass/opencompass/discussions)讨论。
         如果你已经有了解决方案，我们非常欢迎你直接创建一个新的 PR 来解决这个问题。创建 PR 的流程可以参考[文档](https://opencompass.readthedocs.io/zh_CN/master/community/CONTRIBUTING.html)。
         如果你需要我们的帮助，请填写以下内容帮助我们定位 Bug。
 
@@ -16,9 +16,9 @@ body:
       label: 先决条件
       description: 在创建新问题之前，请检查以下项目。
       options:
-      - label: 我已经搜索过 [问题](https://github.com/InternLM/opencompass/issues/) 和 [讨论](https://github.com/InternLM/opencompass/discussions) 但未得到预期的帮助。
+      - label: 我已经搜索过 [问题](https://github.com/open-compass/opencompass/issues/) 和 [讨论](https://github.com/open-compass/opencompass/discussions) 但未得到预期的帮助。
         required: true
-      - label: 错误在 [最新版本](https://github.com/InternLM/opencompass) 中尚未被修复。
+      - label: 错误在 [最新版本](https://github.com/open-compass/opencompass) 中尚未被修复。
         required: true
 
   - type: dropdown

diff --git a/.github/ISSUE_TEMPLATE/4_feature-request_zh.yml b/.github/ISSUE_TEMPLATE/4_feature-request_zh.yml
@@ -7,7 +7,7 @@ body:
     attributes:
       value: |
         推荐使用英语模板 Feature request，以便你的问题帮助更多人。
-        如果需要询问一般性的问题或者想法，请在我们的[**论坛**](https://github.com/InternLM/opencompass/discussions)讨论。
+        如果需要询问一般性的问题或者想法，请在我们的[**论坛**](https://github.com/open-compass/opencompass/discussions)讨论。
         如果你已经实现了该功能，我们非常欢迎你直接创建一个新的 PR 来解决这个问题。创建 PR 的流程可以参考[文档](https://opencompass.readthedocs.io/zh_CN/master/community/CONTRIBUTING.html)。
 
   - type: textarea

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -5,7 +5,7 @@ contact_links:
     url: https://opencompass.readthedocs.io/en/latest/
     about: Check if your question is answered in docs
   - name: 💬 General questions (寻求帮助)
-    url: https://github.com/InternLM/OpenCompass/discussions
+    url: https://github.com/open-compass/opencompass/discussions
     about: Ask general usage questions and discuss with other OpenCompass community members
   - name: 🌐 Explore OpenCompass (官网)
     url: https://opencompass.org.cn/

diff --git a/.github/scripts/oc_score_assert.py b/.github/scripts/oc_score_assert.py
@@ -0,0 +1,96 @@
+import csv
+import os
+
+import pytest
+import yaml
+
+output_path = 'regression_result_daily'
+
+model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
+dataset_list = [
+    'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
+    'openbookqa_fact'
+]
+
+
+@pytest.fixture()
+def baseline_scores(request):
+    config_path = os.path.join(request.config.rootdir,
+                               '.github/scripts/oc_score_baseline.yaml')
+    with open(config_path) as f:
+        config = yaml.load(f.read(), Loader=yaml.SafeLoader)
+    return config
+
+
+@pytest.fixture()
+def result_scores():
+    file = find_csv_files(output_path)
+    if file is None:
+        return None
+    return read_csv_file(file)
+
+
+@pytest.mark.usefixtures('result_scores')
+@pytest.mark.usefixtures('baseline_scores')
+class TestChat:
+    """Test cases for chat model."""
+
+    @pytest.mark.parametrize('model, dataset', [(p1, p2) for p1 in model_list
+                                                for p2 in dataset_list])
+    def test_model_dataset_score(self, baseline_scores, result_scores, model,
+                                 dataset):
+        base_score = baseline_scores.get(model).get(dataset)
+        result_score = result_scores.get(model).get(dataset)
+        assert_score(result_score, base_score)
+
+
+def assert_score(score, baseline):
+    if score is None or score == '-':
+        assert False, 'value is none'
+    if float(score) < (baseline * 1.03) and float(score) > (baseline * 0.97):
+        print(score + ' between ' + str(baseline * 0.97) + ' and ' +
+              str(baseline * 1.03))
+        assert True
+    else:
+        assert False, score + ' not between ' + str(
+            baseline * 0.97) + ' and ' + str(baseline * 1.03)
+
+
+def find_csv_files(directory):
+    csv_files = []
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.csv'):
+                csv_files.append(os.path.join(root, file))
+    if len(csv_files) > 1:
+        raise 'have more than 1 result file, please check the result manually'
+    if len(csv_files) == 0:
+        return None
+    return csv_files[0]
+
+
+def read_csv_file(file_path):
+    with open(file_path, 'r') as csvfile:
+        reader = csv.DictReader(csvfile)
+        filtered_data = []
+
+        for row in reader:
+            filtered_row = {
+                k: v
+                for k, v in row.items()
+                if k not in ['version', 'metric', 'mode']
+            }
+            filtered_data.append(filtered_row)
+
+    result = {}
+    for data in filtered_data:
+        dataset = data.get('dataset')
+        for key in data.keys():
+            if key == 'dataset':
+                continue
+            else:
+                if key in result.keys():
+                    result.get(key)[dataset] = data.get(key)
+                else:
+                    result[key] = {dataset: data.get(key)}
+    return result
diff --git a/.github/scripts/oc_score_baseline.yaml b/.github/scripts/oc_score_baseline.yaml
@@ -0,0 +1,23 @@
+internlm-7b-hf:
+    ARC-c: 36.27
+    chid-dev: 81.68
+    chid-test: 83.67
+    openai_humaneval: 10.37
+    openbookqa: 44.4
+    openbookqa_fact: 73.2
+
+internlm-chat-7b-hf:
+    ARC-c: 36.95
+    chid-dev: 71.78
+    chid-test: 76.87
+    openai_humaneval: 21.34
+    openbookqa: 66.6
+    openbookqa_fact: 80.4
+
+chatglm3-6b-base-hf:
+    ARC-c: 43.05
+    chid-dev: 80.2
+    chid-test: 80.77
+    openai_humaneval: 20.73
+    openbookqa: 79.8
+    openbookqa_fact: 92.2
diff --git a/.github/workflows/daily-run-test.yml b/.github/workflows/daily-run-test.yml
@@ -0,0 +1,75 @@
+name: daily_run_test
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron:  '56 16 * * *'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CONDA_ENV: opencompass_regression
+  PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
+  USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
+  HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
+
+jobs:
+  daily_run_test:
+    runs-on: self-hosted
+    environment: 'prod'
+    timeout-minutes: 240 #4hours
+    steps:
+      - name: Clone repository
+        uses: actions/checkout@v2
+      - name: Prepare - create conda env and install torch
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda create -y --name ${{env.CONDA_ENV}} python=3.10
+          conda activate ${{env.CONDA_ENV}}
+          pip install torch torchvision torchaudio --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
+          conda info --envs
+      - name: Prepare - Pip install code
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda activate ${{env.CONDA_ENV}}
+          pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}}
+          pip install human_eval transformers==4.33.0 --cache-dir ${{env.PIP_CACHE_PATH}}
+          conda info --envs
+      - name: Prepare - prepare data and hf model
+        run: |
+          cp -r ${{env.USERSPACE_PREFIX}}/data .
+          rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
+          ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
+          export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
+      - name:  Run test
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda activate ${{env.CONDA_ENV}}
+          conda info --envs
+          rm -rf regression_result_daily
+          export from_tf=TRUE
+          python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
+      - name:  Get result
+        run: |
+          eval "$(conda shell.bash hook)"
+          pip install pytest --cache-dir ${{env.PIP_CACHE_PATH}}
+          python -m pytest -s -v --color=yes .github/scripts/oc_score_assert.py
+      - name:  Remove Conda Env
+        if: always()
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda env remove --name ${{env.CONDA_ENV}}
+          conda info --envs
+
+  notify_to_feishu:
+    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }}
+    needs: [daily_run_test]
+    environment: 'prod'
+    timeout-minutes: 5
+    runs-on: self-hosted
+    steps:
+      - name: notify
+        run: |
+          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}
diff --git a/.github/workflows/link-check.yml b/.github/workflows/link-check.yml
@@ -0,0 +1,21 @@
+name: 'Link check'
+
+on:
+  schedule:
+    # check links at 01:30 a.m. every day
+    - cron: '30 1 * * *'
+
+jobs:
+  link-check:
+    runs-on: ubuntu-latest
+    steps:
+      # - uses: actions/checkout@v3
+
+      - name: linkchecker
+        run: |
+          pip install linkchecker
+          linkchecker https://opencompass.readthedocs.io/ --no-robots -t 30 --no-warnings |
+            --ignore-url https://opencompass\.readthedocs\.io/.*/static/images/opencompass_logo\.svg |
+            --ignore-url https://opencompass\.readthedocs\.io/.*/_static/images/icon-menu-dots\.svg |
+            --ignore-url https://opencompass\.readthedocs\.io/policy |
+            --ignore-url https://opencompass\.readthedocs\.io/(en|zh_CN)/[0-9a-f]{40}/.*
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,23 @@
+name: lint
+
+on: [push, pull_request]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Install pre-commit hook
+        run: |
+          pip install pre-commit mmengine
+          pre-commit install
+      - name: Linting
+        run: pre-commit run --all-files
diff --git a/.github/workflows/pr-run-test.yml b/.github/workflows/pr-run-test.yml
@@ -0,0 +1,80 @@
+name: pr_run_test
+
+on:
+  pull_request:
+    paths-ignore:
+      - 'README.md'
+      - 'README_zh-CN.md'
+      - 'docs/**'
+      - 'configs/**'
+      - 'tools/**'
+
+  workflow_dispatch:
+  schedule:
+    - cron:  '56 22 * * *'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CONDA_ENV: opencompass_base
+  USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
+  HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
+
+jobs:
+  pr_run_test:
+    runs-on: self-hosted
+    environment: 'prod'
+    timeout-minutes: 30
+    steps:
+      - name: Clone repository
+        uses: actions/checkout@v2
+      - name: Prepare - Install opencompass
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda activate ${{env.CONDA_ENV}}
+          python3 -m pip uninstall opencompass -y
+          python3 -m pip install -e . --cache-dir ${{env.USERSPACE_PREFIX}}/.cache/pip
+          conda info --envs
+      - name: Prepare - prepare data and hf model
+        run: |
+          cp -r ${{env.USERSPACE_PREFIX}}/data .
+          rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
+          ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
+          export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
+      - name:  Run test
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda activate ${{env.CONDA_ENV}}
+          conda info --envs
+          rm -rf regression_result
+          python3 run.py --models hf_internlm_chat_7b --datasets siqa_gen --work-dir regression_result --debug
+      - name:  Get result
+        run: |
+          score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
+          if (( ${score%.*} >= 70 && ${score%.*} <= 75 )); then
+             echo "score is $score between 70 and 75"
+          else
+             echo "score is $score not between 70 and 75"
+             exit 1
+          fi
+          rm -rf regression_result
+      - name:  Uninstall opencompass
+        if: always()
+        run: |
+          eval "$(conda shell.bash hook)"
+          conda activate ${{env.CONDA_ENV}}
+          python3 -m pip uninstall opencompass -y
+          conda info --envs
+
+  notify_to_feishu:
+    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }}
+    needs: [pr_run_test]
+    environment: 'prod'
+    timeout-minutes: 5
+    runs-on: self-hosted
+    steps:
+      - name: notify
+        run: |
+          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}