From 0a85a1912b7cbd3d79bc4318013cf7651150e14a Mon Sep 17 00:00:00 2001
From: Hadrien <ketsapiwiq@protonmail.com>
Date: Sun, 9 Feb 2025 11:53:29 +0100
Subject: [PATCH 1/5] add other ollama visual llms

tosquash

to squash w/ first
---
 operate/models/apis.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/operate/models/apis.py b/operate/models/apis.py
index d0ccb0c4..273b6046 100644
--- a/operate/models/apis.py
+++ b/operate/models/apis.py
@@ -50,8 +50,8 @@ async def get_next_action(model, messages, objective, session_id):
         return "coming soon"
     if model == "gemini-pro-vision":
         return call_gemini_pro_vision(messages, objective), None
-    if model == "llava":
-        operation = call_ollama_llava(messages)
+    if model == "llava" or model == "llava:13b" or "bakllava" or "llava-llama3":
+        operation = call_ollama_llava(messages, model)
         return operation, None
     if model == "claude-3":
         operation = await call_claude_3_with_ocr(messages, objective, model)
@@ -558,9 +558,11 @@ async def call_gpt_4o_labeled(messages, objective, model):
         return call_gpt_4o(messages)
 
 
-def call_ollama_llava(messages):
+def call_ollama_llava(messages, model):
+    if model == "":
+        model = "llava"
     if config.verbose:
-        print("[call_ollama_llava]")
+        print(f"[call_ollama_llava] model {model}")
     time.sleep(1)
     try:
         model = config.initialize_ollama()
@@ -590,8 +592,8 @@ def call_ollama_llava(messages):
         }
         messages.append(vision_message)
 
-        response = model.chat(
-            model="llava",
+        response = ollama.chat(
+            model=model,
             messages=messages,
         )
 
@@ -633,7 +635,7 @@ def call_ollama_llava(messages):
         )
         if config.verbose:
             traceback.print_exc()
-        return call_ollama_llava(messages)
+        return call_ollama_llava(messages, model)
 
 
 async def call_claude_3_with_ocr(messages, objective, model):

From 3393989caa9bfad2e14f0ea7cbf063e130587f14 Mon Sep 17 00:00:00 2001
From: Hadrien <ketsapiwiq@protonmail.com>
Date: Sun, 9 Feb 2025 11:53:29 +0100
Subject: [PATCH 2/5] exhaustive "llava" match

---
 operate/models/apis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operate/models/apis.py b/operate/models/apis.py
index 273b6046..b34975df 100644
--- a/operate/models/apis.py
+++ b/operate/models/apis.py
@@ -50,7 +50,7 @@ async def get_next_action(model, messages, objective, session_id):
         return "coming soon"
     if model == "gemini-pro-vision":
         return call_gemini_pro_vision(messages, objective), None
-    if model == "llava" or model == "llava:13b" or "bakllava" or "llava-llama3":
+    if "llava" in model:
         operation = call_ollama_llava(messages, model)
         return operation, None
     if model == "claude-3":

From 77f3755970a9100dbfa6bd59714f194f3c21827b Mon Sep 17 00:00:00 2001
From: Hadrien <ketsapiwiq@protonmail.com>
Date: Sun, 9 Feb 2025 11:53:30 +0100
Subject: [PATCH 3/5] logs

---
 operate/models/apis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operate/models/apis.py b/operate/models/apis.py
index b34975df..4228669d 100644
--- a/operate/models/apis.py
+++ b/operate/models/apis.py
@@ -626,7 +626,7 @@ def call_ollama_llava(messages, model):
 
     except Exception as e:
         print(
-            f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_BRIGHT_MAGENTA}[llava] That did not work. Trying again {ANSI_RESET}",
+            f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_BRIGHT_MAGENTA}[{model}] That did not work. Trying again {ANSI_RESET}",
             e,
         )
         print(

From 8ffb5953947206bebaa29899396102512e2c98f9 Mon Sep 17 00:00:00 2001
From: Hadrien <ketsapiwiq@protonmail.com>
Date: Sun, 9 Feb 2025 11:53:30 +0100
Subject: [PATCH 4/5] fix: by default, try it in ollama

---
 README.md              | 16 ++++++++--------
 operate/models/apis.py | 14 ++++----------
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index ab24691c..0a74bda8 100644
--- a/README.md
+++ b/README.md
@@ -76,28 +76,28 @@ Use Claude 3 with Vision to see how it stacks up to GPT-4-Vision at operating a
 operate -m claude-3
 ```
 
-#### Try LLaVa Hosted Through Ollama `-m llava`
-If you wish to experiment with the Self-Operating Computer Framework using LLaVA on your own machine, you can with Ollama!   
+#### Try a model Hosted Through Ollama `-m llama3.2-vision`
+If you wish to experiment with the Self-Operating Computer Framework using e.g. LLaVA on your own machine, you can with Ollama!   
 *Note: Ollama currently only supports MacOS and Linux. Windows now in Preview*   
 
 First, install Ollama on your machine from https://ollama.ai/download.   
 
-Once Ollama is installed, pull the LLaVA model:
+Once Ollama is installed, pull the vision model:
 ```
-ollama pull llava
+ollama pull llama3.2-vision
 ```
 This will download the model on your machine which takes approximately 5 GB of storage.   
 
-When Ollama has finished pulling LLaVA, start the server:
+When Ollama has finished pulling llama3.2-vision, start the server:
 ```
 ollama serve
 ```
 
-That's it! Now start `operate` and select the LLaVA model:
+That's it! Now start `operate` and select the model:
 ```
-operate -m llava
+operate -m llama3.2-vision
 ```   
-**Important:** Error rates when using LLaVA are very high. This is simply intended to be a base to build off of as local multimodal models improve over time.
+**Important:** Error rates when using self-hosted models are very high. This is simply intended to be a base to build off of as local multimodal models improve over time.
 
 Learn more about Ollama at its [GitHub Repository](https://www.github.com/ollama/ollama)
 
diff --git a/operate/models/apis.py b/operate/models/apis.py
index 4228669d..43493bcb 100644
--- a/operate/models/apis.py
+++ b/operate/models/apis.py
@@ -50,14 +50,11 @@ async def get_next_action(model, messages, objective, session_id):
         return "coming soon"
     if model == "gemini-pro-vision":
         return call_gemini_pro_vision(messages, objective), None
-    if "llava" in model:
-        operation = call_ollama_llava(messages, model)
-        return operation, None
     if model == "claude-3":
         operation = await call_claude_3_with_ocr(messages, objective, model)
         return operation, None
-    raise ModelNotRecognizedException(model)
-
+    operation = call_ollama_llava(model, messages)
+    return operation, None
 
 def call_gpt_4o(messages):
     if config.verbose:
@@ -557,10 +554,7 @@ async def call_gpt_4o_labeled(messages, objective, model):
             traceback.print_exc()
         return call_gpt_4o(messages)
 
-
-def call_ollama_llava(messages, model):
-    if model == "":
-        model = "llava"
+def call_ollama_llava(model, messages):
     if config.verbose:
         print(f"[call_ollama_llava] model {model}")
     time.sleep(1)
@@ -635,7 +629,7 @@ def call_ollama_llava(messages, model):
         )
         if config.verbose:
             traceback.print_exc()
-        return call_ollama_llava(messages, model)
+        return call_ollama_llava(model, messages)
 
 
 async def call_claude_3_with_ocr(messages, objective, model):

From ebc89df9d59c352332e790d42da3e36cb3355ac2 Mon Sep 17 00:00:00 2001
From: Hadrien <ketsapiwiq@protonmail.com>
Date: Sun, 9 Feb 2025 11:53:30 +0100
Subject: [PATCH 5/5] chore: replace call_ollama_llava by call_ollama

---
 operate/models/apis.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/operate/models/apis.py b/operate/models/apis.py
index 43493bcb..1b8a2fc4 100644
--- a/operate/models/apis.py
+++ b/operate/models/apis.py
@@ -53,7 +53,7 @@ async def get_next_action(model, messages, objective, session_id):
     if model == "claude-3":
         operation = await call_claude_3_with_ocr(messages, objective, model)
         return operation, None
-    operation = call_ollama_llava(model, messages)
+    operation = call_ollama(model, messages)
     return operation, None
 
 def call_gpt_4o(messages):
@@ -554,9 +554,9 @@ async def call_gpt_4o_labeled(messages, objective, model):
             traceback.print_exc()
         return call_gpt_4o(messages)
 
-def call_ollama_llava(model, messages):
+def call_ollama(model, messages):
     if config.verbose:
-        print(f"[call_ollama_llava] model {model}")
+        print(f"[call_ollama] model {model}")
     time.sleep(1)
     try:
         model = config.initialize_ollama()
@@ -575,7 +575,7 @@ def call_ollama_llava(model, messages):
 
         if config.verbose:
             print(
-                "[call_ollama_llava] user_prompt",
+                "[call_ollama] user_prompt",
                 user_prompt,
             )
 
@@ -603,7 +603,7 @@ def call_ollama_llava(model, messages):
         assistant_message = {"role": "assistant", "content": content}
         if config.verbose:
             print(
-                "[call_ollama_llava] content",
+                "[call_ollama] content",
                 content,
             )
         content = json.loads(content)
@@ -629,7 +629,7 @@ def call_ollama_llava(model, messages):
         )
         if config.verbose:
             traceback.print_exc()
-        return call_ollama_llava(model, messages)
+        return call_ollama(model, messages)
 
 
 async def call_claude_3_with_ocr(messages, objective, model):