MediaPipe를 사용한 포즈 추정

import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

MediaPipe 포즈 추정 개요

MediaPipe는 Google에서 개발한 오픈소스 프레임워크로, 실시간 미디어 파이프라인을 구축할 수 있습니다. MediaPipe Pose는 이미지나 비디오에서 사람의 포즈를 추정하는 모델입니다.

참고: 이 예제는 MediaPipe 0.10.x 버전의 새로운 tasks API를 사용합니다. 구버전(0.9.x 이하)의 solutions API와는 다릅니다.

주요 특징:

33개의 랜드마크 포인트: 얼굴, 몸통, 팔, 다리의 주요 관절 위치
실시간 처리: 빠른 추론 속도
높은 정확도: 다양한 포즈와 각도에서도 안정적인 추정

In [20]:

# MediaPipe Pose 초기화 (MediaPipe 0.10.x 버전)
# MediaPipe 0.10.x에서는 새로운 tasks API를 사용합니다

import urllib.request
import os

# 모델 파일 저장 디렉토리 설정
model_dir = '../../.data'
model_path = os.path.join(model_dir, 'pose_landmarker.task')
model_url = 'https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task'

# 디렉토리가 없으면 생성
os.makedirs(model_dir, exist_ok=True)

# 모델 파일 다운로드 (없는 경우에만)
if not os.path.exists(model_path):
    print(f"모델 파일을 다운로드 중입니다: {model_url}")
    urllib.request.urlretrieve(model_url, model_path)
    print(f"다운로드 완료! 저장 위치: {model_path}")
else:
    print(f"모델 파일이 이미 존재합니다: {model_path}")

# PoseLandmarker 옵션 설정
base_options = python.BaseOptions(
    model_asset_path=model_path  # 다운로드한 모델 파일 경로
)

pose_landmarker_options = vision.PoseLandmarkerOptions(
    base_options=base_options,
    output_segmentation_masks=False,  # 배경 분할 사용 여부
    min_pose_detection_confidence=0.5,  # 최소 감지 신뢰도
    min_pose_presence_confidence=0.5,  # 최소 포즈 존재 신뢰도
    min_tracking_confidence=0.5,  # 최소 추적 신뢰도
    num_poses=1  # 감지할 최대 포즈 수
)

# PoseLandmarker 생성
pose_landmarker = vision.PoseLandmarker.create_from_options(pose_landmarker_options)
print("PoseLandmarker 초기화 완료!")

모델 파일이 이미 존재합니다: ../../.data/pose_landmarker.task
PoseLandmarker 초기화 완료!

W0000 00:00:1768957729.978007  274564 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1768957730.053123  274574 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.

이미지에서 포즈 추정하기

이미지를 읽어서 MediaPipe로 포즈를 추정하고 결과를 시각화합니다.

In [21]:

def detect_pose(image_path):
    """
    이미지에서 포즈를 추정하는 함수 (MediaPipe 0.10.x 버전)
    
    Args:
        image_path: 이미지 파일 경로
        
    Returns:
        image: 원본 이미지 (BGR)
        mp_image: MediaPipe Image 객체
        detection_result: MediaPipe 포즈 추정 결과
    """
    # 이미지 읽기
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"이미지를 읽을 수 없습니다: {image_path}")
    
    # BGR을 RGB로 변환
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # MediaPipe Image 객체 생성
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_rgb)
    
    # 포즈 추정 수행
    detection_result = pose_landmarker.detect(mp_image)
    
    return image, mp_image, detection_result

# 사용 예시 (이미지 경로를 실제 경로로 변경하세요)
# image, mp_image, detection_result = detect_pose('path/to/your/image.jpg')

포즈 랜드마크 시각화

MediaPipe는 33개의 랜드마크 포인트를 제공합니다. 각 포인트는 다음과 같은 인덱스를 가집니다:

0-10: 얼굴
11-16: 왼쪽 팔
17-22: 오른쪽 팔
23-28: 왼쪽 다리
29-32: 몸통

In [22]:

def draw_pose_landmarks(image, detection_result):
    """
    포즈 랜드마크를 이미지에 그리는 함수 (MediaPipe 0.10.x 버전)
    
    Args:
        image: 원본 이미지 (BGR)
        detection_result: MediaPipe 포즈 추정 결과
        
    Returns:
        annotated_image: 랜드마크가 그려진 이미지
    """
    # 이미지 복사
    annotated_image = image.copy()
    
    # RGB로 변환
    annotated_image_rgb = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
    
    # 포즈 랜드마크 그리기
    if detection_result.pose_landmarks:
        for pose_landmarks in detection_result.pose_landmarks:
            # 랜드마크 연결선 그리기
            connections = vision.PoseLandmarksConnections.POSE_LANDMARKS
            for connection in connections:
                start_idx = connection.start
                end_idx = connection.end
                if start_idx < len(pose_landmarks) and end_idx < len(pose_landmarks):
                    start_point = pose_landmarks[start_idx]
                    end_point = pose_landmarks[end_idx]
                    start_x = int(start_point.x * annotated_image_rgb.shape[1])
                    start_y = int(start_point.y * annotated_image_rgb.shape[0])
                    end_x = int(end_point.x * annotated_image_rgb.shape[1])
                    end_y = int(end_point.y * annotated_image_rgb.shape[0])
                    cv2.line(annotated_image_rgb, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
            
            # 랜드마크 포인트 그리기
            for landmark in pose_landmarks:
                x = int(landmark.x * annotated_image_rgb.shape[1])
                y = int(landmark.y * annotated_image_rgb.shape[0])
                cv2.circle(annotated_image_rgb, (x, y), 5, (0, 0, 255), -1)
    
    # RGB를 BGR로 다시 변환
    annotated_image = cv2.cvtColor(annotated_image_rgb, cv2.COLOR_RGB2BGR)
    
    return annotated_image

랜드마크 좌표 추출

각 랜드마크의 좌표를 추출하여 활용할 수 있습니다.

In [23]:

def get_landmark_coordinates(detection_result, image_shape):
    """
    랜드마크 좌표를 픽셀 좌표로 변환하는 함수 (MediaPipe 0.10.x 버전)
    
    Args:
        detection_result: MediaPipe 포즈 추정 결과
        image_shape: 이미지 크기 (height, width)
        
    Returns:
        landmarks: 랜드마크 좌표 딕셔너리 {landmark_name: (x, y, z)}
    """
    landmarks = {}
    
    if detection_result.pose_landmarks:
        height, width = image_shape[:2]
        
        # MediaPipe 랜드마크 인덱스 정의 (33개 포인트)
        landmark_names = {
            0: 'NOSE',
            2: 'LEFT_EYE_INNER', 4: 'LEFT_EYE', 5: 'LEFT_EYE_OUTER',
            7: 'RIGHT_EYE_INNER', 9: 'RIGHT_EYE', 10: 'RIGHT_EYE_OUTER',
            11: 'LEFT_EAR', 12: 'RIGHT_EAR',
            13: 'MOUTH_LEFT', 14: 'MOUTH_RIGHT',
            15: 'LEFT_SHOULDER', 16: 'RIGHT_SHOULDER',
            17: 'LEFT_ELBOW', 18: 'RIGHT_ELBOW',
            19: 'LEFT_WRIST', 20: 'RIGHT_WRIST',
            21: 'LEFT_PINKY', 22: 'RIGHT_PINKY',
            23: 'LEFT_INDEX', 24: 'RIGHT_INDEX',
            25: 'LEFT_THUMB', 26: 'RIGHT_THUMB',
            27: 'LEFT_HIP', 28: 'RIGHT_HIP',
            29: 'LEFT_KNEE', 30: 'RIGHT_KNEE',
            31: 'LEFT_ANKLE', 32: 'RIGHT_ANKLE',
            33: 'LEFT_HEEL', 34: 'RIGHT_HEEL',
            35: 'LEFT_FOOT_INDEX', 36: 'RIGHT_FOOT_INDEX'
        }
        
        # 첫 번째 포즈의 랜드마크 사용
        pose_landmarks = detection_result.pose_landmarks[0]
        
        for idx, landmark in enumerate(pose_landmarks):
            # 정규화된 좌표를 픽셀 좌표로 변환
            x = int(landmark.x * width)
            y = int(landmark.y * height)
            z = landmark.z  # 깊이 정보 (상대적)
            
            if idx in landmark_names:
                landmarks[landmark_names[idx]] = (x, y, z)
            else:
                landmarks[f'LANDMARK_{idx}'] = (x, y, z)
    
    return landmarks

완전한 예제

이미지를 읽어서 포즈를 추정하고 결과를 시각화하는 완전한 예제입니다.

In [33]:

def process_image(image_path):
    """
    이미지에서 포즈를 추정하고 결과를 시각화하는 완전한 함수 (MediaPipe 0.10.x 버전)
    
    Args:
        image_path: 입력 이미지 경로
        
    Returns:
        annotated_image: 랜드마크가 그려진 이미지
        landmarks: 랜드마크 좌표 딕셔너리
    """
    # 이미지 읽기 및 포즈 추정
    image, mp_image, detection_result = detect_pose(image_path)
    
    # 포즈가 감지되었는지 확인
    if detection_result.pose_landmarks:
        print("포즈가 성공적으로 감지되었습니다!")
        
        # 랜드마크 그리기
        annotated_image = draw_pose_landmarks(image, detection_result)
        
        # 랜드마크 좌표 추출
        landmarks = get_landmark_coordinates(detection_result, image.shape)
        
        # 주요 관절 좌표 출력
        key_points = ['LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 
                     'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST',
                     'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE',
                     'LEFT_ANKLE', 'RIGHT_ANKLE']
        
        print("\n주요 관절 좌표:")
        for point in key_points:
            if point in landmarks:
                x, y, z = landmarks[point]
                print(f"{point}: ({x}, {y}, z={z:.3f})")
        
        return annotated_image, landmarks
    else:
        print("포즈를 감지할 수 없습니다.")
        return image, None

# 사용 예시
# annotated_image, landmarks = process_image('input.jpg', 'output.jpg')

Matplotlib을 사용한 시각화

Matplotlib을 사용하여 원본 이미지와 포즈 추정 결과를 함께 표시할 수 있습니다.

In [25]:

def visualize_pose_comparison(image_path):
    """
    원본 이미지와 포즈 추정 결과를 나란히 비교하는 함수 (MediaPipe 0.10.x 버전)
    
    Args:
        image_path: 이미지 파일 경로
    """
    # 이미지 읽기 및 포즈 추정
    image, mp_image, detection_result = detect_pose(image_path)
    
    # 포즈 랜드마크 그리기
    annotated_image = draw_pose_landmarks(image, detection_result)
    
    # Matplotlib으로 표시
    fig, axes = plt.subplots(1, 2, figsize=(15, 7))
    
    # 원본 이미지
    axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[0].set_title('원본 이미지', fontsize=14)
    axes[0].axis('off')
    
    # 포즈 추정 결과
    axes[1].imshow(cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB))
    axes[1].set_title('포즈 추정 결과', fontsize=14)
    axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return annotated_image, detection_result

# 사용 예시
# visualize_pose_comparison('path/to/your/image.jpg')

실제 실행 예제

이미지 경로를 지정하여 포즈 추정을 실행하고 결과를 출력합니다.

# process_image 함수를 사용한 간단한 방법
image_path = '../../.data/man.jpg'
annotated_image, landmarks = process_image(image_path)

# 결과 시각화
if landmarks:
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    
    # 원본 이미지 읽기
    original = cv2.imread(image_path)
    axes[0].imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
    axes[0].set_title('Original Image', fontsize=16, fontweight='bold')
    axes[0].axis('off')
    
    # 포즈 추정 결과
    axes[1].imshow(cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB))
    axes[1].set_title('Pose Estimation Result (Skeleton)', fontsize=16, fontweight='bold')
    axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()

TypeError: process_image() got an unexpected keyword argument 'save_path'

[31m---------------------------------------------------------------------------[39m
[31mTypeError[39m                                 Traceback (most recent call last)
[36mCell[39m[36m [39m[32mIn[34][39m[32m, line 3[39m
[32m      1[39m [38;5;66;03m# process_image 함수를 사용한 간단한 방법[39;00m
[32m      2[39m image_path = [33m'[39m[33m../../.data/man.jpg[39m[33m'[39m
[32m----> [39m[32m3[39m annotated_image, landmarks = [43mprocess_image[49m[43m([49m[43mimage_path[49m[43m,[49m[43m [49m[43msave_path[49m[43m=[49m[33;43m'[39;49m[33;43moutput_pose.jpg[39;49m[33;43m'[39;49m[43m)[49m
[32m      5[39m [38;5;66;03m# 결과 시각화[39;00m
[32m      6[39m [38;5;28;01mif[39;00m landmarks:

[31mTypeError[39m: process_image() got an unexpected keyword argument 'save_path'

대안: process_image 함수 사용

위의 코드 대신 process_image 함수를 사용할 수도 있습니다.

포즈가 성공적으로 감지되었습니다!

주요 관절 좌표:
LEFT_SHOULDER: (301, 265, z=-0.481)
RIGHT_SHOULDER: (247, 246, z=-0.529)
LEFT_ELBOW: (291, 280, z=-0.558)
RIGHT_ELBOW: (255, 260, z=-0.621)
LEFT_WRIST: (284, 272, z=-0.578)
RIGHT_WRIST: (259, 254, z=-0.635)
LEFT_HIP: (323, 500, z=0.205)
RIGHT_HIP: (227, 497, z=-0.005)
LEFT_KNEE: (324, 509, z=0.220)
RIGHT_KNEE: (240, 515, z=0.009)
LEFT_ANKLE: (319, 543, z=0.004)
RIGHT_ANKLE: (194, 527, z=-0.194)

결과 이미지가 저장되었습니다: output_pose.jpg

/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 50896 (\N{HANGUL SYLLABLE WEON}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 48376 (\N{HANGUL SYLLABLE BON}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 51060 (\N{HANGUL SYLLABLE I}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 48120 (\N{HANGUL SYLLABLE MI}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 51648 (\N{HANGUL SYLLABLE JI}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 54252 (\N{HANGUL SYLLABLE PO}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 51592 (\N{HANGUL SYLLABLE JEU}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 52628 (\N{HANGUL SYLLABLE CU}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 44208 (\N{HANGUL SYLLABLE GYEOL}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 44284 (\N{HANGUL SYLLABLE GWA}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 48904 (\N{HANGUL SYLLABLE BBYEO}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/tmp/ipykernel_258194/2244087904.py:20: UserWarning: Glyph 45824 (\N{HANGUL SYLLABLE DAE}) missing from font(s) DejaVu Sans.
  plt.tight_layout()
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 50896 (\N{HANGUL SYLLABLE WEON}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 48376 (\N{HANGUL SYLLABLE BON}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 51060 (\N{HANGUL SYLLABLE I}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 48120 (\N{HANGUL SYLLABLE MI}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 51648 (\N{HANGUL SYLLABLE JI}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 54252 (\N{HANGUL SYLLABLE PO}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 51592 (\N{HANGUL SYLLABLE JEU}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 52628 (\N{HANGUL SYLLABLE CU}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 44208 (\N{HANGUL SYLLABLE GYEOL}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 44284 (\N{HANGUL SYLLABLE GWA}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 48904 (\N{HANGUL SYLLABLE BBYEO}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)
/home/woolimi/venv/cv/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 45824 (\N{HANGUL SYLLABLE DAE}) missing from font(s) DejaVu Sans.
  fig.canvas.print_figure(bytes_io, **kw)

리소스 정리

작업이 끝나면 MediaPipe Pose 객체를 정리해야 합니다.

In [17]:

# 리소스 정리 (MediaPipe 0.10.x 버전)
pose_landmarker.close()