Windows10下使用SAM(Segment Anything Model)大模型實現萬物皆可分割!
SAM(Segment Anything Model),顧名思義,即為分割一切!該模型由Facebook的Meta AI實驗室,能夠根據文本指令或圖像識別,實現對任意物體的識別與分割。它的誕生,無疑是CV領域的一次重要里程碑。
論文地址:https://arxiv.org/abs/2304.02643
在前文《從零解讀SAM(Segment Anything Model)大模型!萬物皆可分割!(含源碼解析)》中從實現原理到源碼解析對SAM大模型進行了詳細解讀,本文將演示Windows10下SAM大模型的實際使用過程!
SAM模型運行環境安裝
1.環境要求
- Python 3.8+
- Pytorch 1.7+
- Torchvision>=0.8
2.查看CUDA版本號
nvidia-smi
3.安裝GPU版本的Pytorch
根據自己的cuda版本選擇對應的版本,生成安裝命令。
SAM模型代碼使用
首先下載Github源碼以及所提供的權重文件。
1.predictor_example
predictor_example.ipynb源碼在notebooks文件目錄下,可以本地運行測試。
步驟一:查看測試圖片
import cv2
import matplotlib.pyplot as plt
image = cv2.imread('img.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis('on')
plt.show()
步驟二:顯示前景和背景的標記點
import numpy as np
import matplotlib.pyplot as plt
import cv2
def show_points(coords, labels, ax, marker_size=375):
# 從coords中篩選出前景點(pos_points)和背景點(neg_points)
# 如果labels中的元素為1,則對應的坐標點被視為前景;如果為0,則視為背景
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
# 繪制前景點和背景點
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 前景的標記點顯示
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 背景的標記點顯示
# 讀取圖像文件
image = cv2.imread('img.png')
# 將圖像從BGR色彩空間轉換為RGB色彩空間
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 二維數組,存儲了三個預設的坐標點
input_point = np.array([[230, 194], [182, 63], [339, 279]])
# 一維數組,與坐標點一一對應,指定了每個點是前景(1)還是背景(0)
input_label = np.array([1, 1, 0])
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_points(input_point, input_label, plt.gca())
plt.axis('on')
plt.show()
這里圖片可以用畫圖軟件打開查看像素坐標輔助標定。
步驟三:標記點完成前景目標的分割
簡單的調用源碼模型,就能完成前景目標的分割,源碼提供了三種不同大小的模型,我們也可以自己去嘗試不同的模型效果。
import numpy as np
import matplotlib.pyplot as plt
import cv2
# 在matplotlib的坐標軸ax上展示一個掩膜圖像mask
def show_mask(mask, ax, random_color=False):
# 如果random_color為真,則通過np.random.random(3)生成一個隨機的RGB顏色向量,
# 并與透明度值(0.6)拼接,形成一個RGBA顏色數組
# 否則,使用預設的藍色透明色值
if random_color: # 掩膜顏色是否隨機決定
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
# 獲取掩膜的高h和寬w
h, w = mask.shape[-2:]
# 將掩膜重塑為(h, w, 1)形狀,以便于與顏色數組相乘,準備作為圖像顯示
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
# 從coords中篩選出前景點(pos_points)和背景點(neg_points)
# 如果labels中的元素為1,則對應的坐標點被視為前景;如果為0,則視為背景
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
# 繪制前景點和背景點
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 前景的標記點顯示
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 背景的標記點顯示
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor
image = cv2.imread('img.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#------加載模型
# 權重文件保存地址
sam_checkpoint = "model_save/sam_vit_b_01ec64.pth"
# sam_checkpoint = "model_save/sam_vit_h_4b8939.pth"
# sam_checkpoint = "model_save/sam_vit_l_0b3195.pth"
# 模型類型
model_type = "vit_b"
# model_type = "vit_h"
# model_type = "vit_l"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)
predictor.set_image(image)
#------加載模型---------------------
# 鼠標標定(x,y)位置
# 因為可以有多個標定,所以有多個坐標點
input_point = np.array([[230, 194], [182, 63], [339, 279]])
# 1表示前景目標,0表示背景
# input_point和input_label一一對應
input_label = np.array([1, 1, 0])
masks, scores, logits = predictor.predict(
point_coords=input_point,
point_labels=input_label,
multimask_output=True,
)
for i, (mask, score) in enumerate(zip(masks, scores)):
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_mask(mask, plt.gca())
show_points(input_point, input_label, plt.gca())
plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18)
plt.axis('off')
plt.show()
這里會輸出三個結果。
步驟四:標定框完成前景目標的分割
綠色的框是用戶自己標定的,根據框選的區域完成前景目標的分割。
import numpy as np
import matplotlib.pyplot as plt
import cv2
def show_mask(mask, ax, random_color=False):
if random_color: # 掩膜顏色是否隨機決定
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
# 篩選出前景目標標記點
pos_points = coords[labels == 1]
# 篩選出背景目標標記點
neg_points = coords[labels == 0]
# x-->pos_points[:, 0] y-->pos_points[:, 1]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 前景的標記點顯示
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 背景的標記點顯示
def show_box(box, ax):
# 畫出標定框 x0 y0是起始坐標
x0, y0 = box[0], box[1]
# w h 是框的尺寸
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor
image = cv2.imread('img.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#------加載模型
# 權重文件保存地址
sam_checkpoint = "model_save/sam_vit_b_01ec64.pth"
# sam_checkpoint = "model_save/sam_vit_h_4b8939.pth"
# sam_checkpoint = "model_save/sam_vit_l_0b3195.pth"
# 模型類型
model_type = "vit_b"
# model_type = "vit_h"
# model_type = "vit_l"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)
predictor.set_image(image)
#------加載模型----------------
# 標定框的起始坐標和終點坐標
input_box = np.array([112, 41, 373, 320])
masks, _, _ = predictor.predict(
point_coords=None,
point_labels=None,
box=input_box[None, :],
multimask_output=False,
)
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_mask(masks[0], plt.gca())
show_box(input_box, plt.gca())
plt.axis('off')
plt.show()
步驟五:標定框和標記點聯合完成前景目標的分割
對于一些復雜的目標,可能需要聯合使用提高前景目標的分割精度。box和points可以聯合標定完成圖像分割的,但是此時的box只能有一個,不能有多個。
import numpy as np
import matplotlib.pyplot as plt
import cv2
def show_mask(mask, ax, random_color=False):
if random_color: # 掩膜顏色是否隨機決定
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
# 篩選出前景目標標記點
pos_points = coords[labels == 1]
# 篩選出背景目標標記點
neg_points = coords[labels == 0]
# x-->pos_points[:, 0] y-->pos_points[:, 1]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 前景的標記點顯示
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 背景的標記點顯示
def show_box(box, ax):
# 畫出標定框 x0 y0是起始坐標
x0, y0 = box[0], box[1]
# w h 是框的尺寸
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor
image = cv2.imread('img.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#------加載模型
# 權重文件保存地址
sam_checkpoint = "model_save/sam_vit_b_01ec64.pth"
# sam_checkpoint = "model_save/sam_vit_h_4b8939.pth"
# sam_checkpoint = "model_save/sam_vit_l_0b3195.pth"
# 模型類型
model_type = "vit_b"
# model_type = "vit_h"
# model_type = "vit_l"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)
predictor.set_image(image)
#------加載模型
# 標定框的起始坐標和終點坐標
input_box = np.array([112, 41, 373, 320])
# 鼠標標定(x,y)位置
# 因為可以有多個標定,所以有多個坐標點
input_point = np.array([[230, 194], [182, 63], [339, 279]])
# 1表示前景目標,0表示背景
# input_point和input_label一一對應
input_label = np.array([1, 1, 0])
# 標定框和標記點聯合使用
masks, _, _ = predictor.predict(
point_coords=input_point,
point_labels=input_label,
box=input_box,
multimask_output=False,
)
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_mask(masks[0], plt.gca())
show_box(input_box, plt.gca())
show_points(input_point, input_label, plt.gca())
plt.axis('off')
plt.show()
步驟六:多標定框完成前景目標的分割
可以是多標定框對應多個目標,也可以是多標定框對應同一目標的不同部位。
import numpy as np
import matplotlib.pyplot as plt
import torch
import cv2
def show_mask(mask, ax, random_color=False):
if random_color: # 掩膜顏色是否隨機決定
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
# 篩選出前景目標標記點
pos_points = coords[labels == 1]
# 篩選出背景目標標記點
neg_points = coords[labels == 0]
# x-->pos_points[:, 0] y-->pos_points[:, 1]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 前景的標記點顯示
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 背景的標記點顯示
def show_box(box, ax):
# 畫出標定框 x0 y0是起始坐標
x0, y0 = box[0], box[1]
# w h 是框的尺寸
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor
image = cv2.imread('img.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#------加載模型
# 權重文件保存地址
sam_checkpoint = "model_save/sam_vit_b_01ec64.pth"
# sam_checkpoint = "model_save/sam_vit_h_4b8939.pth"
# sam_checkpoint = "model_save/sam_vit_l_0b3195.pth"
# 模型類型
model_type = "vit_b"
# model_type = "vit_h"
# model_type = "vit_l"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)
predictor.set_image(image)
#------加載模型
# 存在多個目標標定框
input_boxes = torch.tensor([
[121, 49, 361, 190],
[143, 101, 308, 312],
[366, 116, 451, 233],
], device=predictor.device)
transformed_boxes = predictor.transform.apply_boxes_torch(input_boxes, image.shape[:2])
masks, _, _ = predictor.predict_torch(
point_coords=None,
point_labels=None,
boxes=transformed_boxes,
multimask_output=False,
)
plt.figure(figsize=(10, 10))
plt.imshow(image)
for mask in masks:
show_mask(mask.cpu().numpy(), plt.gca(), random_color=True)
for box in input_boxes:
show_box(box.cpu().numpy(), plt.gca())
plt.axis('off')
plt.show()
步驟七:圖片批量完成前景目標的分割
源碼支持圖片的批量輸入,大大提升了分割效率。
import numpy as np
import matplotlib.pyplot as plt
import torch
import cv2
def show_mask(mask, ax, random_color=False):
if random_color: # 掩膜顏色是否隨機決定
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
# 篩選出前景目標標記點
pos_points = coords[labels == 1]
# 篩選出背景目標標記點
neg_points = coords[labels == 0]
# x-->pos_points[:, 0] y-->pos_points[:, 1]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 前景的標記點顯示
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25) # 背景的標記點顯示
def show_box(box, ax):
# 畫出標定框 x0 y0是起始坐標
x0, y0 = box[0], box[1]
# w h 是框的尺寸
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
def prepare_image(image, transform, device):
image = transform.apply_image(image)
image = torch.as_tensor(image, device=device.device)
return image.permute(2, 0, 1).contiguous()
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor
image1 = cv2.imread('img.png')
image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)
image2 = cv2.imread('img_1.png')
image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)
#------加載模型
# 權重文件保存地址
sam_checkpoint = "model_save/sam_vit_b_01ec64.pth"
# sam_checkpoint = "model_save/sam_vit_h_4b8939.pth"
# sam_checkpoint = "model_save/sam_vit_l_0b3195.pth"
# 模型類型
model_type = "vit_b"
# model_type = "vit_h"
# model_type = "vit_l"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
from segment_anything.utils.transforms import ResizeLongestSide
resize_transform = ResizeLongestSide(sam.image_encoder.img_size)
#------加載模型
# 存在多個目標標定框
image1_boxes = torch.tensor([
[121, 49, 361, 190],
[143, 101, 308, 312],
[366, 116, 451, 233],
], device=sam.device)
image2_boxes = torch.tensor([
[24, 4, 333, 265],
], device=sam.device)
# 批量輸入
batched_input = [
{
'image': prepare_image(image1, resize_transform, sam),
'boxes': resize_transform.apply_boxes_torch(image1_boxes, image1.shape[:2]),
'original_size': image1.shape[:2]
},
{
'image': prepare_image(image2, resize_transform, sam),
'boxes': resize_transform.apply_boxes_torch(image2_boxes, image2.shape[:2]),
'original_size': image2.shape[:2]
}
]
batched_output = sam(batched_input, multimask_output=False)
fig, ax = plt.subplots(1, 2, figsize=(20, 20))
# 批量輸出
ax[0].imshow(image1)
for mask in batched_output[0]['masks']:
show_mask(mask.cpu().numpy(), ax[0], random_color=True)
for box in image1_boxes:
show_box(box.cpu().numpy(), ax[0])
ax[0].axis('off')
ax[1].imshow(image2)
for mask in batched_output[1]['masks']:
show_mask(mask.cpu().numpy(), ax[1], random_color=True)
for box in image2_boxes:
show_box(box.cpu().numpy(), ax[1])
ax[1].axis('off')
plt.tight_layout()
plt.show()
2.automatic_mask_generator_example
源碼在notebooks文件內提供了一個Jupyter Notebook的自動分割教程,無需標定點和標定框。
步驟一:自動掩碼生成
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
image = cv2.imread('img.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 權重文件保存地址
sam_checkpoint = "model_save/sam_vit_b_01ec64.pth"
# sam_checkpoint = "model_save/sam_vit_h_4b8939.pth"
# sam_checkpoint = "model_save/sam_vit_l_0b3195.pth"
# 模型類型
model_type = "vit_b"
# model_type = "vit_h"
# model_type = "vit_l"
device = "cuda"
def show_anns(anns):
if len(anns) == 0:
return
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
ax = plt.gca()
ax.set_autoscale_on(False)
polygons = []
color = []
for ann in sorted_anns:
m = ann['segmentation']
img = np.ones((m.shape[0], m.shape[1], 3))
color_mask = np.random.random((1, 3)).tolist()[0] # 產生隨機顏色的mask
for i in range(3):
img[:, :, i] = color_mask[i]
ax.imshow(np.dstack((img, m*0.35)))
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
mask_generator = SamAutomaticMaskGenerator(sam)
masks = mask_generator.generate(image)
plt.figure(figsize=(20, 20))
plt.imshow(image)
show_anns(masks)
plt.axis('off')
plt.show()
在自動掩模生成中有幾個可調參數,用于控制采樣點的密度以及去除低質量或重復掩模的閾值。此外,生成可以在圖像的裁剪上自動運行,以提高較小對象的性能,后處理可以去除雜散像素和孔洞。
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
# 讀取圖片文件
image = cv2.imread('img.png')
# 將圖像從OpenCV默認的BGR格式轉換為RGB格式
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 權重文件保存地址
sam_checkpoint = "model_save/sam_vit_b_01ec64.pth"
# sam_checkpoint = "model_save/sam_vit_h_4b8939.pth"
# sam_checkpoint = "model_save/sam_vit_l_0b3195.pth"
# 模型類型
model_type = "vit_b"
# model_type = "vit_h"
# model_type = "vit_l"
device = "cuda"
# 掩膜顯示函數
def show_anns(anns):
# 檢查傳入的注釋列表anns的長度
# 如果列表為空(即沒有注釋),函數直接返回,不執行后續操作
if len(anns) == 0:
return
# 使用sorted()函數按照每個注釋的面積(x['area'])對anns進行降序排序
# 這樣可以確保繪制時較大的對象先被繪制,較小的對象后繪制,避免被遮擋
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
# 獲取當前的matplotlib坐標軸對象ax
ax = plt.gca()
# 關閉坐標軸的自動縮放功能
ax.set_autoscale_on(False)
polygons = []
color = []
# 循環處理每個注釋
for ann in sorted_anns:
# 獲取注釋的掩模信息
m = ann['segmentation']
# 創建一個與掩模形狀相同的全白色圖像img,尺寸為(m的高度, m的寬度, 3通道),用于混合顏色
img = np.ones((m.shape[0], m.shape[1], 3))
# 生成一個隨機顏色color_mask,并將其應用于img的每個通道,使得每個掩模都擁有獨一無二的顏色
color_mask = np.random.random((1, 3)).tolist()[0] # 產生隨機顏色的mask
# 將隨機顏色圖像img與透明度調整過的掩模m*0.35垂直堆疊,實現顏色遮罩效果
for i in range(3):
img[:, :, i] = color_mask[i]
# 將堆疊后的圖像通過ax.imshow()方法顯示在當前坐標軸上,逐步疊加各個掩模
ax.imshow(np.dstack((img, m*0.35)))
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
# 根據選定的model_type實例化SAM模型,并從指定的檢查點加載
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
# 默認版本
# mask_generator = SamAutomaticMaskGenerator(sam)
# 自定義參數版本
# 掩模生成
mask_generator_2 = SamAutomaticMaskGenerator(
model=sam,
points_per_side=32,
pred_iou_thresh=0.86,
stability_score_thresh=0.92,
crop_n_layers=1,
crop_n_points_downscale_factor=2,
min_mask_region_area=100, # Requires open-cv to run post-processing
)
masks = mask_generator_2.generate(image)
# 掩??梢暬?plt.figure(figsize=(20, 20))
plt.imshow(image)
show_anns(masks)
plt.axis('off')
plt.show()