Skip to content

WIP for generative vision model #41

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions lib/llm/providers/openai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ def complete(message, **params)
Response::Completion.new(res.body, self).extend(response_parser)
end

%w[generation edit variation].each do |action|
define_method :"vision_#{action}" do |prompt, **params|
req = Net::HTTP::Post.new ["/v1", "images", "#{action}s"].join("/")
body = {prompt:, model: "dall-e-3", n: 1}.merge!(params)
req = preflight(req, body)
res = request @http, req
Response::Vision.new(res.body, self).extend(response_parser)
end
end

##
# @param prompt (see LLM::Provider#transform_prompt)
# @return (see LLM::Provider#transform_prompt)
Expand Down
8 changes: 8 additions & 0 deletions lib/llm/providers/openai/response_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,13 @@ def parse_completion(raw)
total_tokens: raw.dig("usage", "total_tokens")
}
end

def parse_vision(raw)
{
images: raw["data"].map do
URI(_1["url"])
end
}
end
end
end
1 change: 1 addition & 0 deletions lib/llm/response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class Response
require "json"
require_relative "response/completion"
require_relative "response/embedding"
require_relative "response/vision"

##
# @return [Hash]
Expand Down
22 changes: 22 additions & 0 deletions lib/llm/response/vision.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true

module LLM
class Response::Vision < Response
##
# @return [Array<URI>]
# Returns an array of image URIs
def images
parsed[:images]
end

private

##
# @private
# @return [Hash]
# Returns the parsed vision response from the provider
def parsed
@parsed ||= parse_vision(raw)
end
end
end
54 changes: 54 additions & 0 deletions spec/openai/vision_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# frozen_string_literal: true

require "webmock/rspec"

RSpec.describe "LLM::OpenAI" do
subject(:openai) { LLM.openai("") }

before(:each, :success) do
stub_request(:post, "https://api.openai.com/v1/images/generations")
.with(headers: {"Content-Type" => "application/json"})
.to_return(
status: 200,
body: {
created: 1731499418,
data: [
{
revised_prompt: "Create a detailed image showing a white Siamese cat. The cat has pierce blue eyes and slightly elongated ears. It should be sitting gracefully with its tail wrapped around its legs. The Siamese cat's unique color points on its ears, face, paws and tail are in a contrast with its creamy white fur. The background is peaceful and comforting, perhaps a softly lit quieter corner of a home, with tantalizing shadows and welcoming warm colors.",
url: "https://oaidalleapiprodscus.blob.core.windows.net/private/org-onsUXMUK28Zzsh9Vv8iWj80q/user-VcliHUdhkKDdohyDGnVsJzYg/img-C5OCBxw69p4vKtcLLIlL9xCz.png?st=2024-11-13T11%3A03%3A37Z&se=2024-11-13T13%3A03%3A37Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-11-12T19%3A49%3A57Z&ske=2024-11-13T19%3A49%3A57Z&sks=b&skv=2024-08-04&sig=9Bp9muevzDLdjymf%2BsnVuorprp6iCol/wI8Ih95xjhE%3D"
}
]
}.to_json
)
end

before(:each, :unauthorized) do
stub_request(:post, "https://api.openai.com/v1/images/generations")
.with(headers: {"Content-Type" => "application/json"})
.to_return(
status: 401,
body: '{
"error": {
"code": null,
"message": "Invalid authorization header",
"param": null,
"type": "server_error"
}
}'
)
end

context "with successful vision", :success do
let(:vision) { openai.vision_generation("a white siamese cat") }

it "returns a vision" do
expect(vision).to be_a(LLM::Response::Vision)
end

it "has images" do
expect(vision.images.first).to be_a(URI).and have_attributes(
to_s: "https://oaidalleapiprodscus.blob.core.windows.net/private/org-onsUXMUK28Zzsh9Vv8iWj80q/user-VcliHUdhkKDdohyDGnVsJzYg/img-C5OCBxw69p4vKtcLLIlL9xCz.png?st=2024-11-13T11%3A03%3A37Z&se=2024-11-13T13%3A03%3A37Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-11-12T19%3A49%3A57Z&ske=2024-11-13T19%3A49%3A57Z&sks=b&skv=2024-08-04&sig=9Bp9muevzDLdjymf%2BsnVuorprp6iCol/wI8Ih95xjhE%3D"
)
end
end
end