Skip to content

Commit 8dd9101

Browse files
authored
Merge pull request #653 from zsogitbe/master
Extension LLava with in memory images
2 parents c749139 + f4fad82 commit 8dd9101

File tree

5 files changed

+36
-20
lines changed

5 files changed

+36
-20
lines changed

LLama.Examples/Examples/LlavaInteractiveModeExecute.cs

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using LLama.Batched;
33
using LLama.Common;
44
using Spectre.Console;
5+
using LLama.Abstractions;
56

67
namespace LLama.Examples.Examples
78
{
@@ -99,7 +100,10 @@ public static async Task Run()
99100

100101
// Initilize Images in executor
101102
//
102-
ex.ImagePaths = imagePaths.ToList();
103+
foreach (var image in imagePaths)
104+
{
105+
ex.Images.Add(File.ReadAllBytes(image));
106+
}
103107
}
104108

105109
Console.ForegroundColor = Color.White;

LLama/Abstractions/ILLamaExecutor.cs

+5-6
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,13 @@ public interface ILLamaExecutor
2222
/// <summary>
2323
/// Muti-Modal Projections / Clip Model weights
2424
/// </summary>
25-
public LLavaWeights? ClipModel { get; }
26-
25+
public LLavaWeights? ClipModel { get; }
26+
2727
/// <summary>
28-
/// List of images: Image filename and path (jpeg images).
28+
/// List of images: Image filen path, uri or image byte array. See ImageData.
2929
/// </summary>
30-
public List<string> ImagePaths { get; set; }
31-
32-
30+
public List<byte[]> Images { get; }
31+
3332
/// <summary>
3433
/// Asynchronously infers a response from the model.
3534
/// </summary>

LLama/LLamaExecutorBase.cs

+11-5
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,11 @@ public bool IsMultiModal
7676
}
7777

7878
/// <inheritdoc />
79-
public LLavaWeights? ClipModel { get; }
80-
79+
public LLavaWeights? ClipModel { get; }
80+
8181
/// <inheritdoc />
82-
public List<string> ImagePaths { get; set; }
83-
82+
public List<byte[]> Images { get; set; }
83+
8484
/// <summary>
8585
/// Current "mu" value for mirostat sampling
8686
/// </summary>
@@ -95,7 +95,7 @@ public bool IsMultiModal
9595
/// <param name="logger"></param>
9696
protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null)
9797
{
98-
ImagePaths = new List<string>();
98+
Images = new List<byte[]>();
9999
_logger = logger;
100100
Context = context;
101101
_pastTokensCount = 0;
@@ -105,6 +105,12 @@ protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null)
105105
_decoder = new StreamingTokenDecoder(context);
106106
}
107107

108+
/// <summary>
109+
///
110+
/// </summary>
111+
/// <param name="context"></param>
112+
/// <param name="lLavaWeights"></param>
113+
/// <param name="logger"></param>
108114
public StatefulExecutorBase(LLamaContext context, LLavaWeights lLavaWeights, ILogger? logger = null) :
109115
this( context, logger )
110116
{

LLama/LLamaInteractExecutor.cs

+5-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
using LLama.Exceptions;
1212
using LLama.Extensions;
1313
using Microsoft.Extensions.Logging;
14+
using System.Net.Http;
1415

1516
namespace LLama
1617
{
@@ -148,13 +149,13 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru
148149
int usedTokens = 0;
149150
// If the prompt contains the tag <image> extract this.
150151
_imageInPrompt = text.Contains("<image>");
151-
if (_imageInPrompt)
152+
if (_imageInPrompt && ClipModel != null)
152153
{
153-
foreach (var image in ImagePaths)
154+
foreach (var image in Images)
154155
{
155-
_imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName( ClipModel.NativeHandle, Context, image ) );
156+
_imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, image));
156157
}
157-
158+
158159
int imageIndex = text.IndexOf("<image>");
159160
// Tokenize segment 1 (before <image> tag)
160161
string preImagePrompt = text.Substring(0, imageIndex);

LLama/LLamaStatelessExecutor.cs

+10-4
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,16 @@ public class StatelessExecutor
2626

2727
// LLava Section
2828
public bool IsMultiModal => false;
29+
30+
/// <inheritdoc />
2931
public bool MultiModalProject { get; }
30-
public LLavaWeights? ClipModel { get; }
31-
public List<string> ImagePaths { get; set; }
32-
32+
33+
/// <inheritdoc />
34+
public LLavaWeights? ClipModel { get; }
35+
36+
/// <inheritdoc />
37+
public List<byte[]> Images { get; set; }
38+
3339
/// <summary>
3440
/// The context used by the executor when running the inference.
3541
/// </summary>
@@ -43,7 +49,7 @@ public class StatelessExecutor
4349
/// <param name="logger"></param>
4450
public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
4551
{
46-
ImagePaths = new List<string>();
52+
Images = new List<byte[]>();
4753
_weights = weights;
4854
_params = @params;
4955
_logger = logger;

0 commit comments

Comments
 (0)