Fix some typos (#10175)
* add llm-ppl workflow * update the DATASET_DIR * test multiple precisions * modify nightly test * match the updated ppl code * add matrix.include * fix the include error * update the include * add more model * update the precision of include * update nightly time and add more models * fix the workflow_dispatch description, change default model of pr and modify the env * modify workflow_dispatch language options * modify options * modify language options * modeify workflow_dispatch type * modify type * modify the type of language * change seq_len type * fix some typos * revert changes to stress_test.txt
This commit is contained in:
		
							parent
							
								
									add3899311
								
							
						
					
					
						commit
						6e10d98a8d
					
				
					 6 changed files with 20 additions and 20 deletions
				
			
		
							
								
								
									
										4
									
								
								.github/workflows/llm-c-evaluation.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/llm-c-evaluation.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -79,7 +79,7 @@ jobs:
 | 
			
		|||
            echo "model_name=$model_name" >> $GITHUB_OUTPUT
 | 
			
		||||
            echo "precision=$precision" >> $GITHUB_OUTPUT
 | 
			
		||||
            echo "runner=$runner" >> $GITHUB_OUTPUT
 | 
			
		||||
  llm-ceval-evalution:
 | 
			
		||||
  llm-ceval-evaluation:
 | 
			
		||||
    timeout-minutes: 1200
 | 
			
		||||
    needs: [llm-cpp-build, set-matrix]
 | 
			
		||||
    strategy:
 | 
			
		||||
| 
						 | 
				
			
			@ -175,7 +175,7 @@ jobs:
 | 
			
		|||
 | 
			
		||||
  llm-ceval-summary:
 | 
			
		||||
    if: ${{ always() }}
 | 
			
		||||
    needs: llm-ceval-evalution
 | 
			
		||||
    needs: llm-ceval-evaluation
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v3
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										16
									
								
								.github/workflows/llm-harness-evaluation.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								.github/workflows/llm-harness-evaluation.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -1,4 +1,4 @@
 | 
			
		|||
name: LLM Harness Evalution
 | 
			
		||||
name: LLM Harness Evaluation
 | 
			
		||||
 | 
			
		||||
# Cancel previous runs in the PR when you push new commits
 | 
			
		||||
concurrency:
 | 
			
		||||
| 
						 | 
				
			
			@ -20,19 +20,19 @@ on:
 | 
			
		|||
  workflow_dispatch:
 | 
			
		||||
    inputs:
 | 
			
		||||
      model_name:
 | 
			
		||||
        description: 'Model names, seperated by comma and must be quoted.'
 | 
			
		||||
        description: 'Model names, separated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      precision:
 | 
			
		||||
        description: 'Precisions, seperated by comma and must be quoted.'
 | 
			
		||||
        description: 'Precisions, separated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      task:
 | 
			
		||||
        description: 'Tasks, seperated by comma and must be quoted.'
 | 
			
		||||
        description: 'Tasks, separated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      runs-on:
 | 
			
		||||
        description: 'Labels to filter the runners, seperated by comma and must be quoted.'
 | 
			
		||||
        description: 'Labels to filter the runners, separated by comma and must be quoted.'
 | 
			
		||||
        default: "accuracy"
 | 
			
		||||
        required: false
 | 
			
		||||
        type: string
 | 
			
		||||
| 
						 | 
				
			
			@ -97,7 +97,7 @@ jobs:
 | 
			
		|||
            echo "precision=$precision" >> $GITHUB_OUTPUT
 | 
			
		||||
            echo "task=$task" >> $GITHUB_OUTPUT
 | 
			
		||||
            echo "runner=$runner" >> $GITHUB_OUTPUT
 | 
			
		||||
  llm-harness-evalution:
 | 
			
		||||
  llm-harness-evaluation:
 | 
			
		||||
    timeout-minutes: 1000
 | 
			
		||||
    needs: [llm-cpp-build, set-matrix]
 | 
			
		||||
    strategy:
 | 
			
		||||
| 
						 | 
				
			
			@ -201,7 +201,7 @@ jobs:
 | 
			
		|||
 | 
			
		||||
  llm-harness-summary:
 | 
			
		||||
    if: ${{ always() }}
 | 
			
		||||
    needs: llm-harness-evalution
 | 
			
		||||
    needs: llm-harness-evaluation
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
 | 
			
		||||
| 
						 | 
				
			
			@ -228,7 +228,7 @@ jobs:
 | 
			
		|||
  # TODO: change machine to store the results later        
 | 
			
		||||
  llm-harness-summary-nightly:
 | 
			
		||||
    if: ${{github.event_name == 'schedule' || github.event_name == 'pull_request'}}
 | 
			
		||||
    needs: [set-matrix, llm-harness-evalution]
 | 
			
		||||
    needs: [set-matrix, llm-harness-evaluation]
 | 
			
		||||
    runs-on: ["self-hosted", "llm", "accuracy1", "accuracy-nightly"]
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										10
									
								
								.github/workflows/llm-ppl-evaluation.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.github/workflows/llm-ppl-evaluation.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -1,4 +1,4 @@
 | 
			
		|||
name: LLM Perplexity Evalution
 | 
			
		||||
name: LLM Perplexity Evaluation
 | 
			
		||||
 | 
			
		||||
# Cancel previous runs in the PR when you push new commits
 | 
			
		||||
concurrency:
 | 
			
		||||
| 
						 | 
				
			
			@ -24,11 +24,11 @@ on:
 | 
			
		|||
        required: true
 | 
			
		||||
        type: string      
 | 
			
		||||
      model_name:
 | 
			
		||||
        description: 'Model names, seperated by comma and must be quoted.'
 | 
			
		||||
        description: 'Model names, separated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      precision:
 | 
			
		||||
        description: 'Precisions, seperated by comma and must be quoted.'
 | 
			
		||||
        description: 'Precisions, separated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      language:
 | 
			
		||||
| 
						 | 
				
			
			@ -36,7 +36,7 @@ on:
 | 
			
		|||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      runs-on:
 | 
			
		||||
        description: 'Labels to filter the runners, seperated by comma and must be quoted.'
 | 
			
		||||
        description: 'Labels to filter the runners, separated by comma and must be quoted.'
 | 
			
		||||
        default: "accuracy"
 | 
			
		||||
        required: false
 | 
			
		||||
        type: string
 | 
			
		||||
| 
						 | 
				
			
			@ -107,7 +107,7 @@ jobs:
 | 
			
		|||
            echo "precision=$precision" >> $GITHUB_OUTPUT
 | 
			
		||||
            echo "language=$language" >> $GITHUB_OUTPUT
 | 
			
		||||
            echo "runner=$runner" >> $GITHUB_OUTPUT
 | 
			
		||||
  llm-ppl-evalution:
 | 
			
		||||
  llm-ppl-evaluation:
 | 
			
		||||
    timeout-minutes: 1000
 | 
			
		||||
    needs: [llm-cpp-build, set-matrix]
 | 
			
		||||
    strategy:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,7 +8,7 @@ Chronos provides simulators to generate synthetic time series data for users who
 | 
			
		|||
```
 | 
			
		||||
 | 
			
		||||
## 1. DPGANSimulator
 | 
			
		||||
`DPGANSimulator` adopt DoppelGANger raised in [Using GANs for Sharing Networked Time Series Data: Challenges, Initial Promise, and Open Questions](http://arxiv.org/abs/1909.13403). The method is data-driven unsupervised method based on deep learning model with GAN (Generative Adversarial Networks) structure. The model features a pair of seperate attribute generator and feature generator and their corresponding discriminators `DPGANSimulator` also supports a rich and comprehensive input data (training data) format and outperform other algorithms in many evalution metrics.
 | 
			
		||||
`DPGANSimulator` adopt DoppelGANger raised in [Using GANs for Sharing Networked Time Series Data: Challenges, Initial Promise, and Open Questions](http://arxiv.org/abs/1909.13403). The method is data-driven unsupervised method based on deep learning model with GAN (Generative Adversarial Networks) structure. The model features a pair of separate attribute generator and feature generator and their corresponding discriminators `DPGANSimulator` also supports a rich and comprehensive input data (training data) format and outperform other algorithms in many evaluation metrics.
 | 
			
		||||
 | 
			
		||||
```eval_rst
 | 
			
		||||
.. note::
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -281,7 +281,7 @@
 | 
			
		|||
                </p>
 | 
			
		||||
            </summary>
 | 
			
		||||
            <img src="../../../_images/GitHub-Mark-32px.png"><a href="https://github.com/intel-analytics/BigDL/tree/main/python/chronos/example/onnx">View source on GitHub</a>
 | 
			
		||||
            <p>This example will demonstrate how to use ONNX to speed up the inferencing(prediction/evalution) on forecasters and AutoTSEstimator. In this example, onnx speed up the inferencing for ~4X.</p>
 | 
			
		||||
            <p>This example will demonstrate how to use ONNX to speed up the inferencing(prediction/evaluation) on forecasters and AutoTSEstimator. In this example, onnx speed up the inferencing for ~4X.</p>
 | 
			
		||||
        </details>
 | 
			
		||||
        <hr>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
# Harness Evalution
 | 
			
		||||
[Harness evalution](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evalution with BigDL-LLM under 
 | 
			
		||||
# Harness Evaluation
 | 
			
		||||
[Harness evaluation](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evaluation with BigDL-LLM under 
 | 
			
		||||
[Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) settings.
 | 
			
		||||
Before running, make sure to have [bigdl-llm](../../../README.md) installed.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -12,7 +12,7 @@ pip install -e .
 | 
			
		|||
```
 | 
			
		||||
 | 
			
		||||
## Run
 | 
			
		||||
run `python run_llb.py`. `run_llb.py` combines some arguments in `main.py` to make evalutions easier. The mapping of arguments is defined as a dict in [`llb.py`](llb.py).
 | 
			
		||||
run `python run_llb.py`. `run_llb.py` combines some arguments in `main.py` to make evaluations easier. The mapping of arguments is defined as a dict in [`llb.py`](llb.py).
 | 
			
		||||
 | 
			
		||||
### Evaluation on CPU
 | 
			
		||||
```python
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue