Skip to content

Commit 554e828

Browse files
bhaneynjooma
authored andcommitted
RSDK-7565, RSDK-7471: add new methods get_properties and capture_all_from_camera (#617)
1 parent c68fb1c commit 554e828

File tree

6 files changed

+360
-9
lines changed

6 files changed

+360
-9
lines changed

src/viam/services/vision/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
from viam.services.vision.service import VisionRPCService
33

44
from .client import Classification, Detection, VisionClient
5-
from .vision import Vision
5+
from .vision import Vision, CaptureAllResult
66

77
__all__ = [
8+
"CaptureAllResult",
89
"Classification",
910
"Detection",
1011
"VisionClient",

src/viam/services/vision/client.py

+57-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from viam.media.video import CameraMimeType, ViamImage
77
from viam.proto.common import DoCommandRequest, DoCommandResponse, PointCloudObject
88
from viam.proto.service.vision import (
9+
CaptureAllFromCameraRequest,
10+
CaptureAllFromCameraResponse,
911
Classification,
1012
Detection,
1113
GetClassificationsFromCameraRequest,
@@ -18,12 +20,14 @@
1820
GetDetectionsResponse,
1921
GetObjectPointCloudsRequest,
2022
GetObjectPointCloudsResponse,
23+
GetPropertiesRequest,
24+
GetPropertiesResponse,
2125
VisionServiceStub,
2226
)
2327
from viam.resource.rpc_client_base import ReconfigurableResourceRPCClientBase
2428
from viam.utils import ValueTypes, dict_to_struct, struct_to_dict
2529

26-
from .vision import Vision
30+
from .vision import Vision, CaptureAllResult
2731

2832

2933
class VisionClient(Vision, ReconfigurableResourceRPCClientBase):
@@ -39,6 +43,43 @@ def __init__(self, name: str, channel: Channel):
3943
self.channel = channel
4044
self.client = VisionServiceStub(channel)
4145

46+
async def capture_all_from_camera(
47+
self,
48+
camera_name: str,
49+
return_image: bool = False,
50+
return_classifications: bool = False,
51+
return_detections: bool = False,
52+
return_object_point_clouds: bool = False,
53+
*,
54+
extra: Optional[Mapping[str, Any]] = None,
55+
timeout: Optional[float] = None,
56+
) -> CaptureAllResult:
57+
if extra is None:
58+
extra = {}
59+
request = CaptureAllFromCameraRequest(
60+
name=self.name,
61+
camera_name=camera_name,
62+
return_image=return_image,
63+
return_classifications=return_classifications,
64+
return_detections=return_detections,
65+
return_object_point_clouds=return_object_point_clouds,
66+
extra=dict_to_struct(extra),
67+
)
68+
response: CaptureAllFromCameraResponse = await self.client.CaptureAllFromCamera(request, timeout=timeout)
69+
result = CaptureAllResult()
70+
result.extra = struct_to_dict(response.extra)
71+
if return_image:
72+
mime_type = CameraMimeType.from_proto(response.image.format)
73+
img = ViamImage(response.image.image, mime_type)
74+
result.image = img
75+
if return_classifications:
76+
result.classifications = list(response.classifications)
77+
if return_detections:
78+
result.detections = list(response.detections)
79+
if return_object_point_clouds:
80+
result.objects = list(response.objects)
81+
return result
82+
4283
async def get_detections_from_camera(
4384
self,
4485
camera_name: str,
@@ -135,6 +176,21 @@ async def get_object_point_clouds(
135176
response: GetObjectPointCloudsResponse = await self.client.GetObjectPointClouds(request, timeout=timeout)
136177
return list(response.objects)
137178

179+
async def get_properties(
180+
self,
181+
*,
182+
extra: Optional[Mapping[str, Any]] = None,
183+
timeout: Optional[float] = None,
184+
) -> Vision.Properties:
185+
if extra is None:
186+
extra = {}
187+
request = GetPropertiesRequest(
188+
name=self.name,
189+
extra=dict_to_struct(extra),
190+
)
191+
response : GetPropertiesResponse = await self.client.GetProperties(request, timeout=timeout)
192+
return response
193+
138194
async def do_command(
139195
self,
140196
command: Mapping[str, ValueTypes],

src/viam/services/vision/service.py

+49
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
from viam.media.video import CameraMimeType, ViamImage
44
from viam.proto.common import DoCommandRequest, DoCommandResponse
5+
from viam.proto.component.camera import Image
56
from viam.proto.service.vision import (
7+
CaptureAllFromCameraRequest,
8+
CaptureAllFromCameraResponse,
69
GetClassificationsFromCameraRequest,
710
GetClassificationsFromCameraResponse,
811
GetClassificationsRequest,
@@ -13,6 +16,8 @@
1316
GetDetectionsResponse,
1417
GetObjectPointCloudsRequest,
1518
GetObjectPointCloudsResponse,
19+
GetPropertiesRequest,
20+
GetPropertiesResponse,
1621
UnimplementedVisionServiceBase,
1722
)
1823
from viam.resource.rpc_service_base import ResourceRPCServiceBase
@@ -28,6 +33,35 @@ class VisionRPCService(UnimplementedVisionServiceBase, ResourceRPCServiceBase):
2833

2934
RESOURCE_TYPE = Vision
3035

36+
async def CaptureAllFromCamera(self, stream: Stream[CaptureAllFromCameraRequest, CaptureAllFromCameraResponse]) -> None:
37+
request = await stream.recv_message()
38+
assert request is not None
39+
vision = self.get_resource(request.name)
40+
extra = struct_to_dict(request.extra)
41+
timeout = stream.deadline.time_remaining() if stream.deadline else None
42+
result = await vision.capture_all_from_camera(
43+
request.camera_name,
44+
return_image=request.return_image,
45+
return_classifications=request.return_classifications,
46+
return_detections=request.return_detections,
47+
return_object_point_clouds=request.return_object_point_clouds,
48+
extra=extra,
49+
timeout=timeout,
50+
)
51+
img = None
52+
if result.image is not None:
53+
fmt = result.image.mime_type.to_proto()
54+
img_bytes = result.image.data
55+
img = Image(source_name=request.camera_name, format=fmt, image=img_bytes)
56+
response = CaptureAllFromCameraResponse(
57+
image=img,
58+
detections=result.detections,
59+
classifications=result.classifications,
60+
objects=result.objects,
61+
extra=dict_to_struct(result.extra),
62+
)
63+
await stream.send_message(response)
64+
3165
async def GetDetectionsFromCamera(self, stream: Stream[GetDetectionsFromCameraRequest, GetDetectionsFromCameraResponse]) -> None:
3266
request = await stream.recv_message()
3367
assert request is not None
@@ -88,6 +122,21 @@ async def GetObjectPointClouds(self, stream: Stream[GetObjectPointCloudsRequest,
88122
response = GetObjectPointCloudsResponse(mime_type=CameraMimeType.PCD.value, objects=result)
89123
await stream.send_message(response)
90124

125+
async def GetProperties(self, stream: Stream[GetPropertiesRequest, GetPropertiesResponse]) -> None:
126+
request = await stream.recv_message()
127+
assert request is not None
128+
name = request.name
129+
vision = self.get_resource(name)
130+
extra = struct_to_dict(request.extra)
131+
timeout = stream.deadline.time_remaining() if stream.deadline else None
132+
properties = await vision.get_properties(extra=extra, timeout=timeout)
133+
response = GetPropertiesResponse(
134+
classifications_supported=properties.classifications_supported,
135+
detections_supported=properties.detections_supported,
136+
object_point_clouds_supported=properties.object_point_clouds_supported,
137+
)
138+
await stream.send_message(response)
139+
91140
async def DoCommand(self, stream: Stream[DoCommandRequest, DoCommandResponse]) -> None:
92141
request = await stream.recv_message()
93142
assert request is not None

src/viam/services/vision/vision.py

+112-4
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,112 @@
11
import abc
2-
from typing import Any, Final, List, Mapping, Optional
2+
import sys
3+
from typing import Any, Final, List, Mapping, Optional, Union
34

45
from viam.media.video import ViamImage
56
from viam.proto.common import PointCloudObject
6-
from viam.proto.service.vision import Classification, Detection
7+
from viam.proto.service.vision import Classification, Detection, GetPropertiesResponse
78
from viam.resource.types import RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_SERVICE, Subtype
89

10+
if sys.version_info >= (3, 10):
11+
from typing import TypeAlias
12+
else:
13+
from typing_extensions import TypeAlias
14+
915
from ..service_base import ServiceBase
1016

1117

18+
class CaptureAllResult:
19+
"""
20+
CaptureAllResult represents the collection of things that you have requested from the
21+
CaptureAllFromCamera method. This is used most often for visualization purposes, since normally,
22+
returning the image on every call to a classifier/detector/etc would be costly and unnecessary.
23+
The default result for each field is None rather than the empty list to distinguish between
24+
"there was no request for the classifier/detector to return a result" vs.
25+
"the classifier/detector was requested, but there were no results".
26+
"""
27+
def __init__(self, image=None, classifications=None, detections=None, objects=None, extra={}):
28+
"""
29+
Args:
30+
image (ViamImage|None): The image from the GetImage request of the camera, if it was requested.
31+
classifications (List[Classification]|None): The classifications from GetClassifications, if it was requested.
32+
detections (List[Detection]|None): The detections from GetDetections, if it was requested.
33+
objects (List[PointCloudObject]|None): the object point clouds from GetObjectPointClouds, if it was requested.
34+
extra (dict): A catch all structure, usually for metadata, that a module writer might want to return. Default empty.
35+
36+
Returns:
37+
None
38+
"""
39+
self.image: Union[ViamImage, None] = image
40+
self.detections: Union[List[Detection], None] = detections
41+
self.classifications: Union[List[Classification], None] = classifications
42+
self.objects: Union[List[PointCloudObject], None] = objects
43+
self.extra: dict = extra
44+
45+
1246
class Vision(ServiceBase):
1347
"""
1448
Vision represents a Vision service.
1549
1650
This acts as an abstract base class for any drivers representing specific
17-
arm implementations. This cannot be used on its own. If the ``__init__()`` function is
51+
vision implementations. This cannot be used on its own. If the ``__init__()`` function is
1852
overridden, it must call the ``super().__init__()`` function.
1953
"""
20-
2154
SUBTYPE: Final = Subtype( # pyright: ignore [reportIncompatibleVariableOverride]
2255
RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_SERVICE, "vision"
2356
)
2457

58+
Properties: "TypeAlias" = GetPropertiesResponse
59+
"""
60+
Properties is a class that states what features are supported on the associated vision service.
61+
Currently, these are the following properties:
62+
- classifications_supported (bool): GetClassifications and GetClassificationsFromCamera are implemented.
63+
- detections_supported (bool): GetDetections and GetDetectionsFromCamera are implemented.
64+
- object_point_clouds_supported (bool): GetObjectPointClouds is implemented.
65+
"""
66+
67+
@abc.abstractmethod
68+
async def capture_all_from_camera(
69+
self,
70+
camera_name: str,
71+
return_image: bool = False,
72+
return_classifications: bool = False,
73+
return_detections: bool = False,
74+
return_object_point_clouds: bool = False,
75+
*,
76+
extra: Optional[Mapping[str, Any]] = None,
77+
timeout: Optional[float] = None,
78+
) -> CaptureAllResult:
79+
"""Get the next image, detections, classifications, and objects all together,
80+
given a camera name. Used for visualization.
81+
82+
::
83+
84+
camera_name = "cam1"
85+
86+
# Grab the detector you configured on your machine
87+
my_detector = VisionClient.from_robot(robot, "my_detector")
88+
89+
# capture all from the next image from the camera
90+
result = await my_detector.capture_all_from_camera(
91+
camera_name,
92+
return_image=True,
93+
return_detections=True,
94+
)
95+
96+
Args:
97+
camera_name (str): The name of the camera to use for detection
98+
return_image (bool): Ask the vision service to return the camera's latest image
99+
return_classifications (bool): Ask the vision service to return its latest classifications
100+
return_detections (bool): Ask the vision service to return its latest detections
101+
return_object_point_clouds (bool): Ask the vision service to return its latest 3D segmentations
102+
103+
Returns:
104+
vision.CaptureAllResult: A class that stores all potential returns from the vision service.
105+
It can return the image from the camera along with its associated detections, classifications,
106+
and objects, as well as any extra info the model may provide.
107+
"""
108+
...
109+
25110
@abc.abstractmethod
26111
async def get_detections_from_camera(
27112
self,
@@ -195,3 +280,26 @@ async def get_object_point_clouds(
195280
List[viam.proto.common.PointCloudObject]: The pointcloud objects with metadata
196281
"""
197282
...
283+
284+
@abc.abstractmethod
285+
async def get_properties(
286+
self,
287+
*,
288+
extra: Optional[Mapping[str, Any]] = None,
289+
timeout: Optional[float] = None,
290+
) -> Properties:
291+
"""
292+
Get info about what vision methods the vision service provides. Currently returns boolean values that
293+
state whether the service implements the classification, detection, and/or 3D object segmentation methods.
294+
295+
::
296+
# Grab the detector you configured on your machine
297+
my_detector = VisionClient.from_robot(robot, "my_detector")
298+
properties = await my_detector.get_properties()
299+
properties.detections_supported # returns True
300+
properties.classifications_supported # returns False
301+
302+
Returns:
303+
Properties: The properties of the vision service
304+
"""
305+
...

tests/mocks/services.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@
330330
from viam.services.mlmodel.utils import flat_tensors_to_ndarrays, ndarrays_to_flat_tensors
331331
from viam.services.navigation import Navigation
332332
from viam.services.slam import SLAM
333-
from viam.services.vision import Vision
333+
from viam.services.vision import Vision, CaptureAllResult
334334
from viam.utils import ValueTypes, datetime_to_timestamp, dict_to_struct, struct_to_dict
335335

336336

@@ -344,17 +344,52 @@ def __init__(
344344
classifications: List[Classification],
345345
segmenters: List[str],
346346
point_clouds: List[PointCloudObject],
347+
image: ViamImage,
348+
properties: Vision.Properties,
347349
):
348350
self.detectors = detectors
349351
self.detections = detections
350352
self.classifiers = classifiers
351353
self.classifications = classifications
352354
self.segmenters = segmenters
353355
self.point_clouds = point_clouds
356+
self.image = image
357+
self.properties = properties
354358
self.extra: Optional[Mapping[str, Any]] = None
355359
self.timeout: Optional[float] = None
356360
super().__init__(name)
357361

362+
async def get_properties(
363+
self, *, extra: Optional[Mapping[str, Any]] = None, timeout: Optional[float] = None,
364+
) -> Vision.Properties:
365+
self.extra = extra
366+
self.timeout = timeout
367+
return self.properties
368+
369+
async def capture_all_from_camera(
370+
self,
371+
camera_name: str,
372+
return_image: bool = False,
373+
return_classifications: bool = False,
374+
return_detections: bool = False,
375+
return_object_point_clouds: bool = False,
376+
*,
377+
extra: Optional[Mapping[str, Any]] = None,
378+
timeout: Optional[float] = None,
379+
) -> CaptureAllResult:
380+
self.extra = extra
381+
self.timeout = timeout
382+
result = CaptureAllResult()
383+
if return_image:
384+
result.image = self.image
385+
if return_classifications:
386+
result.classifications = self.classifications
387+
if return_detections:
388+
result.detections = self.detections
389+
if return_object_point_clouds:
390+
result.objects = self.point_clouds
391+
return result
392+
358393
async def get_detections_from_camera(
359394
self, camera_name: str, *, extra: Optional[Mapping[str, Any]] = None, timeout: Optional[float] = None
360395
) -> List[Detection]:

0 commit comments

Comments
 (0)