Add additional Falcon convert requirements

countzero · countzero · commit d2382cc9ba9a · 2023-09-14T17:14:45.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -2,5 +2,5 @@
 # .gitignore
 #
 
-# Ignore Sublime Text files.
 *.sublime-workspace
+*.log
diff --git a/examples/server_llama_2_13b_chat.ps1 b/examples/server_llama_2_13b_chat.ps1
@@ -0,0 +1,7 @@
+Start-Process "http://127.0.0.1:8080"
+
+../vendor/llama.cpp/build/bin/Release/server `
+    --model "../vendor/llama.cpp/models/Llama-2-13b-chat-hf/model-quantized-q4_K_M.gguf" `
+    --ctx-size 4096 `
+    --threads 16 `
+    --n-gpu-layers 20
diff --git a/examples/server_llama_2_7b_chat.ps1 b/examples/server_llama_2_7b_chat.ps1
@@ -0,0 +1,7 @@
+Start-Process "http://127.0.0.1:8080"
+
+../vendor/llama.cpp/build/bin/Release/server `
+    --model "../vendor/llama.cpp/models/Llama-2-7b-chat-hf/model-quantized-q4_K_M.gguf" `
+    --ctx-size 4096 `
+    --threads 16 `
+    --n-gpu-layers 35
diff --git a/examples/server_phind_codellama_34b_v2.ps1 b/examples/server_phind_codellama_34b_v2.ps1
@@ -0,0 +1,7 @@
+Start-Process "http://127.0.0.1:8080"
+
+../vendor/llama.cpp/build/bin/Release/server `
+    --model "../vendor/llama.cpp/models/Phind-CodeLlama-34B-v2/model-quantized-q4_K_M.gguf" `
+    --ctx-size 4096 `
+    --threads 16 `
+    --n-gpu-layers 10
diff --git a/examples/server_upstage_llama_2_70b_instruct_v2.ps1 b/examples/server_upstage_llama_2_70b_instruct_v2.ps1
@@ -0,0 +1,7 @@
+Start-Process "http://127.0.0.1:8080"
+
+../vendor/llama.cpp/build/bin/Release/server `
+    --model "../vendor/llama.cpp/models/Llama-2-70b-instruct-v2/model-quantized-q4_K_M.gguf" `
+    --ctx-size 4096 `
+    --threads 16 `
+    --n-gpu-layers 0
diff --git a/rebuild_llama.cpp.ps1 b/rebuild_llama.cpp.ps1
@@ -147,11 +147,14 @@ Set-Location -Path "../"
 
 conda activate llama.cpp
 
-# We are making sure to always use the latest version.
+# We are making sure to always use the latest version of the "gguf" package.
 pip install --ignore-installed -r ./requirements.txt
 
 Set-Location -Path "../../"
 
+# We also need to install additional packages to support falcon conversion.
+pip install -r ./requirements.txt
+
 $stopwatch.Stop()
 $durationInSeconds = [Math]::Floor([Decimal]($stopwatch.Elapsed.TotalSeconds))
 
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+# We are using a specific version of the "torch"
+# package which supports a specific CUDA version.
+--extra-index-url https://download.pytorch.org/whl/nightly/cu121
+torch==2.1.0.dev20230905+cu121
+
+transformers==4.33.1
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 4dcd47d71df8ca4edcc31302744bd93f0c31298e
+Subproject commit 71ca2fad7d6c0ef95ef9944fb3a1a843e481f314

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`# .gitignore`
`3`	`3`	`#`
`4`	`4`
`5`		`-# Ignore Sublime Text files.`
`6`	`5`	`*.sublime-workspace`
	`6`	`+*.log`