Skip to content

Commit d2382cc

Browse files
committed
Add additional Falcon convert requirements
1 parent 41fa2f3 commit d2382cc

8 files changed

+40
-3
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
# .gitignore
33
#
44

5-
# Ignore Sublime Text files.
65
*.sublime-workspace
6+
*.log

examples/server_llama_2_13b_chat.ps1

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Start-Process "http://127.0.0.1:8080"
2+
3+
../vendor/llama.cpp/build/bin/Release/server `
4+
--model "../vendor/llama.cpp/models/Llama-2-13b-chat-hf/model-quantized-q4_K_M.gguf" `
5+
--ctx-size 4096 `
6+
--threads 16 `
7+
--n-gpu-layers 20

examples/server_llama_2_7b_chat.ps1

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Start-Process "http://127.0.0.1:8080"
2+
3+
../vendor/llama.cpp/build/bin/Release/server `
4+
--model "../vendor/llama.cpp/models/Llama-2-7b-chat-hf/model-quantized-q4_K_M.gguf" `
5+
--ctx-size 4096 `
6+
--threads 16 `
7+
--n-gpu-layers 35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Start-Process "http://127.0.0.1:8080"
2+
3+
../vendor/llama.cpp/build/bin/Release/server `
4+
--model "../vendor/llama.cpp/models/Phind-CodeLlama-34B-v2/model-quantized-q4_K_M.gguf" `
5+
--ctx-size 4096 `
6+
--threads 16 `
7+
--n-gpu-layers 10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Start-Process "http://127.0.0.1:8080"
2+
3+
../vendor/llama.cpp/build/bin/Release/server `
4+
--model "../vendor/llama.cpp/models/Llama-2-70b-instruct-v2/model-quantized-q4_K_M.gguf" `
5+
--ctx-size 4096 `
6+
--threads 16 `
7+
--n-gpu-layers 0

rebuild_llama.cpp.ps1

+4-1
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,14 @@ Set-Location -Path "../"
147147

148148
conda activate llama.cpp
149149

150-
# We are making sure to always use the latest version.
150+
# We are making sure to always use the latest version of the "gguf" package.
151151
pip install --ignore-installed -r ./requirements.txt
152152

153153
Set-Location -Path "../../"
154154

155+
# We also need to install additional packages to support falcon conversion.
156+
pip install -r ./requirements.txt
157+
155158
$stopwatch.Stop()
156159
$durationInSeconds = [Math]::Floor([Decimal]($stopwatch.Elapsed.TotalSeconds))
157160

requirements.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# We are using a specific version of the "torch"
2+
# package which supports a specific CUDA version.
3+
--extra-index-url https://download.pytorch.org/whl/nightly/cu121
4+
torch==2.1.0.dev20230905+cu121
5+
6+
transformers==4.33.1

vendor/llama.cpp

0 commit comments

Comments
 (0)