'데이터 수집 및 가공' 카테고리의 글 목록

데이터 수집 및 가공

동영상 캡쳐 2023.10.20
음향데이터(Waveshow,STFT,MelSpectrogram)이미지 가공 2023.10.19 1
음원 데이터 수집하기(yt_dlp/window/python) 2023.10.15

동영상 캡쳐

퀀텀리프하는 개발자 2023. 10. 20. 00:12

2023. 10. 20. 00:12

pip install cv2

pip install Pyside6

사용중인 가상환경에 위 라이브러리를 설치합니다.

아래 코드를 실행합니다.

import sys

import cv2

import os

import PySide6.QtGui as QtGui

from PySide6.QtCore import Qt, QTimer

from PySide6.QtWidgets import QMainWindow, QPushButton, QVBoxLayout, QFileDialog, QLabel, QApplication, QSizePolicy, QWidget, QStatusBar

class ViewVideo(QMainWindow):

def __init__(self):

super().__init__()

self.setWindowTitle("Video View")

self.resize(800,600)

# button

self.view_file_button = QPushButton("video open")

self.view_file_button.clicked.connect(self.open_video_file_dialog) #

self.play_button = QPushButton("Play")

self.play_button.clicked.connect(self.play_video)

self.pause_button = QPushButton("Stop")

self.pause_button.clicked.connect(self.pause_video) #

self.capture_button = QPushButton("Capture")

self.capture_button.clicked.connect(self.capture_frame) #

# video view label

self.video_view_label = QLabel()

self.video_view_label.setAlignment(Qt.AlignCenter)

self.video_view_label.setSizePolicy(QSizePolicy.Expanding,

QSizePolicy.Expanding)

# main layout

main_layout = QVBoxLayout()

main_layout.addWidget(self.view_file_button)

main_layout.addWidget(self.play_button)

main_layout.addWidget(self.pause_button)

main_layout.addWidget(self.capture_button)

main_layout.addWidget(self.video_view_label)

central_widget = QWidget()

central_widget.setLayout(main_layout)

self.setCentralWidget(central_widget)

self.video_path = ""

self.video_width = 720

self.video_height = 640

self.video_capture = None

self.video_timer = QTimer()

self.video_timer.timeout.connect(self.display_next_frame) #

self.paused = False

self.current_frame = 0

self.capture_count = 0

self.status_bar = QStatusBar()

self.setStatusBar(self.status_bar)

def open_video_file_dialog(self):

file_dialog = QFileDialog(self)

# QF필터는 공백으로 형식을 구분해야한다!

file_dialog.setNameFilter("Video Files (*.mp4 *.avi *.mov *.mkv)")

if file_dialog.exec():

selected_files = file_dialog.selectedFiles()

if selected_files:

self.video_path = selected_files[0]

self.status_bar.showMessage(f"select video Path : {self.video_path}")

def display_next_frame(self):

if self.video_path :

ret, frame = self.video_capture.read()

if ret :

frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

frame_resize = self.resize_frame(frame_rgb) # def resize_frame(image)

h, w, _ = frame_resize.shape

if w > 0 and h > 0:

frame_image = QtGui.QImage(frame_resize, w, h,

QtGui.QImage.Format_RGB888)

pixmap = QtGui.QPixmap.fromImage(frame_image)

self.video_view_label.setPixmap(pixmap)

self.video_view_label.setScaledContents(True)

self.current_frame +=1

else :

self.video_timer.stop()

def play_video(self):

if self.video_path:

if self.paused:

self.video_capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)

self.paused = False

else :

self.video_capture = cv2.VideoCapture(self.video_path)

self.current_frame = 0

self.play_button.setEnabled(False)

self.pause_button.setEnabled(True)

self.capture_button.setEnabled(True)

self.video_timer.start(30)

def resize_frame(self, frame):

height, width, _ = frame.shape

if width > self.video_width or height > self.video_height:

ratio_width = self.video_width / width

ratio_height = self.video_height / height

scale = min(ratio_width, ratio_height)

new_width = int(width * scale)

new_height = int(height * scale)

frame = cv2.resize(frame, (new_width, new_height))

return frame

def pause_video(self):

self.video_timer.stop()

self.play_button.setEnabled(True)

self.pause_button.setEnabled(False)

self.capture_button.setEnabled(not self.paused)

self.paused=True

def capture_frame(self):

ret, frame = self.video_capture.read()

if ret:

frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

frame_resize = self.resize_frame(frame_rgb)

h, w, _ = frame_resize.shape

if w > 0 and h > 0:

folder_name = os.path.splitext(os.path.basename(self.video_path))[0]

file_name = f"{folder_name}_{self.capture_count:04d}_image.png"

os.makedirs("./capture_data", exist_ok=True)

file_path = os.path.join("./capture_data", file_name)

self.capture_count += 1

cv2.imwrite(file_path, cv2.cvtColor(frame_resize, cv2.COLOR_RGB2BGR))

self.status_bar.showMessage(f"capture ok {file_path}")

def open_video_file_dialog(self):

file_dialog = QFileDialog(self)

file_dialog.setNameFilter("Video Files (*.mp4 *.avi *.mov *.mkv)")

if file_dialog.exec():

selected_files = file_dialog.selectedFiles()

if selected_files:

new_video_path = selected_files[0]

if new_video_path != self.video_path:

self.video_path = new_video_path

self.status_bar.showMessage(f"Selected video path: {self.video_path}")

self.reset_video_player()

def reset_video_player(self):

self.video_timer.stop()

if self.video_capture:

self.video_capture.release()

self.current_frame = 0

self.capture_count = 0

self.paused = False

self.play_button.setEnabled(True)

self.pause_button.setEnabled(False)

self.capture_button.setEnabled(False)

self.video_view_label.clear()

def closeEvent(self, event):

self.video_timer.stop()

if self.video_capture:

self.video_capture.release()

event.accept()

if __name__ == "__main__":

app = QApplication(sys.argv)

window = ViewVideo()

window.show()

app.exit(app.exec())

저작자표시 비영리

'데이터 수집 및 가공 > 데이터 수집' 카테고리의 다른 글

음원 데이터 수집하기(yt_dlp/window/python) (0)	2023.10.15

음향데이터(Waveshow,STFT,MelSpectrogram)이미지 가공

퀀텀리프하는 개발자 2023. 10. 19. 18:00

2023. 10. 19. 18:00

음향데이터 이미지 저장하기 파일

1 데이터 불러오기

2 waveshow 이미지 저장하기

2-1 원본이미지 저장하기

2-2 noise 또는 stretch 처리된 이미지 저장하기

3 STFT 이미지 저장하기3-1 원본이미지 저장하기

3-2 noise 또는 stretch 처리된 이미지 저장하기

4 MelSpectrogram 이미지 저장하기

4-1 원본이미지 저장하기

4-2 noise 또는 stretch 처리된 이미지 저장하기

5 이미지 리사이즈 하기데이터 불러오기

* 원하는 이미지만 저장하여, image_extraction_data 폴더 안의 모든 이미지를 리사이즈 할 수 있게 만들었습니다.

원하시는 가상환경에 pip install librosa, pip install matplotlib, pip install numpy, pip install tqdm 해주세요.

1. 데이터 불러오기

import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
from tqdm import tqdm 

dir = []
input_dir = 'raw_data'

for root, dirs, files in os.walk(input_dir):
    current_dir = []
    
    for file in tqdm(files, desc=f"Scanning {root}"):
        if file.endswith(".wav"):
            file_path = os.path.join(root, file)
            current_dir.append(file_path)
    
    if current_dir:
        dir.append(current_dir)

for directory_files in dir:
    for wav in directory_files:
        print(wav)

# start와 end 값을 조절하여 wav데이터 시간을 슬라이싱 할 수 있습니다.
def skip_wav(input_dir, start=0, end=10, wav_data_list=None):
    if wav_data_list is None:
        wav_data_list = []

    for root, dirs, files in os.walk(input_dir):
        for file in tqdm(files, desc=f"Scanning {root}"):
            if file.endswith(".wav"):
                file_path = os.path.join(root, file)

                processed = any(file_path == item[2] for item in wav_data_list)

                if not processed:
                    try:
                        data, sr = librosa.load(file_path)
                    except Exception as e:
                        print(f"손상된 WAV 파일을 스킵합니다: {file_path}")
                        continue 

                    start_sample = int(start * sr)
                    end_sample = min(int(end * sr), len(data))
                    data = data[start_sample:end_sample]

                    wav_data_list.append((np.array(data), sr, file_path))

    return wav_data_list

2 Waveshow 이미지 저장하기

2 -1 원본이미지(waveshow) 저장하기

In [ ]:

input_dir = 'raw_data' 
output_dir = 'image_extraction_data/waveshow'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    image_file_name = os.path.splitext(os.path.basename(file_path))[0] + ".png"
    image_file_path = os.path.join(image_output_dir, image_file_name)

    plt.figure(figsize=(12, 4))
    librosa.display.waveshow(data, sr=sr)
    plt.axis("off")

    plt.savefig(image_file_path, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"이미지 저장: {image_file_path}")

print(f"이미지 저장 완료")

2-2 waveshow에 (noise 또는 stretch)가 적용된 이미지 저장하기

In [ ]:

input_dir = 'raw_data'
output_dir = 'image_extraction_data/waveshow'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    ##############파라미터 조정#################
         
    noise_amp = np.amax(data) * 0.2 * np.random.uniform()
    noise = noise_amp * np.random.normal(size=data.shape)
    data_noise = data + noise

    image_file_name_noise = os.path.splitext(os.path.basename(file_path))[0] + "_noise.png"
    image_file_path_noise = os.path.join(image_output_dir, image_file_name_noise)

    plt.figure(figsize=(12, 4))
    librosa.display.waveshow(data_noise, sr=sr)
    plt.axis("off")

    plt.savefig(image_file_path_noise, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Noise 이미지 저장: {image_file_path_noise}")
    
    ##############파라미터 조정################

    rate = 0.8 + np.random.random() * 0.4  
    data_stretch = librosa.effects.time_stretch(data, rate=rate)

    image_file_name_stretch = os.path.splitext(os.path.basename(file_path))[0] + "_stretch.png"
    image_file_path_stretch = os.path.join(image_output_dir, image_file_name_stretch)

    plt.figure(figsize=(12, 4))
    librosa.display.waveshow(data_stretch, sr=sr)
    plt.axis("off")

    plt.savefig(image_file_path_stretch, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Stretch 이미지 저장: {image_file_path_stretch}")

print(f"이미지 저장 완료")

3 STFT 이미지 저장하기

3-1 원본이미지(STFT) 저장하기

In [ ]:

input_dir = 'raw_data'
output_dir = 'image_extraction_data/STFT'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    stft = librosa.stft(data)
    stft_db = librosa.amplitude_to_db(abs(stft))

    image_file_name = os.path.splitext(os.path.basename(file_path))[0] + ".png"
    image_file_path = os.path.join(image_output_dir, image_file_name)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(stft_db, sr=sr, x_axis="time", y_axis="hz")
    plt.axis("off")

    plt.savefig(image_file_path, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"STFT 이미지 저장: {image_file_path}")

print(f"이미지 저장 완료")

3-2 STFT에 (noise 또는 stretch)가 적용된 이미지 저장하기

In [ ]:

input_dir = 'raw_data'
output_dir = 'image_extraction_data/STFT'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    ##############파라미터 조정################
    
    noise_amp = np.amax(np.abs(stft)) * 0.05 * np.random.uniform()
    noise = noise_amp * (2 * np.random.random(stft.shape) - 1) 
    stft_noisy = stft + noise
    stft_db_noisy = librosa.amplitude_to_db(S=abs(stft_noisy))

    image_file_name_noise = os.path.splitext(os.path.basename(file_path))[0] + "_stft_noise.png"
    image_file_path_noise = os.path.join(image_output_dir, image_file_name_noise)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(stft_db_noisy, sr=sr, x_axis="time", y_axis="hz")
    plt.axis("off")

    plt.savefig(image_file_path_noise, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"STFT 이미지 (Noise) 저장: {image_file_path_noise}")

    ##############파라미터 조정################

    rate = 0.8 + np.random.random() * 0.4  
    data_stretched = librosa.effects.time_stretch(data, rate=rate)
    stft_stretched = librosa.stft(data_stretched)
    stft_db_stretched = librosa.amplitude_to_db(S=abs(stft_stretched))

    image_file_name_stretch = os.path.splitext(os.path.basename(file_path))[0] + "_stft_stretch.png"
    image_file_path_stretch = os.path.join(image_output_dir, image_file_name_stretch)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(stft_db_stretched, sr=sr, x_axis="time", y_axis="hz")
    plt.axis("off")

    plt.savefig(image_file_path_stretch, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"STFT 이미지 (Stretch) 저장: {image_file_path_stretch}")

print(f"이미지 저장 완료")

4 MelSpectrogram 이미지 저장하기

4-1 원본이미지(MelSpectrogram) 저장하기

In [ ]:

input_dir = 'raw_data'
output_dir = 'image_extraction_data/mel_spectrogram'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)
    
    stft = librosa.stft(data)

    mel_spec = librosa.feature.melspectrogram(S=abs(stft))
    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)

    image_file_name = os.path.splitext(os.path.basename(file_path))[0] + ".png"
    image_file_path = os.path.join(image_output_dir, image_file_name)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(mel_spec_db, sr=sr, x_axis="time", y_axis="mel")
    plt.axis("off")

    plt.savefig(image_file_path, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Mel Spectrogram 이미지 저장: {image_file_path}")

print(f"이미지 저장 완료")

4-2 MelSpectrogram에 (noise 또는 stretch)가 적용된 이미지 저장하기

In [ ]:

input_dir = 'raw_data'
output_dir = 'image_extraction_data/mel_spectrogram'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)
    
    ##############파라미터 조정################

    noise_amp = np.amax(np.abs(data)) * 0.05 * np.random.uniform()
    noise = noise_amp * np.random.normal(size=data.shape)
    data_noise = data + noise

    mel_spec_noise = librosa.feature.melspectrogram(y=data_noise, sr=sr)
    mel_spec_db_noise = librosa.amplitude_to_db(mel_spec_noise, ref=np.max)

    image_file_name_noise = os.path.splitext(os.path.basename(file_path))[0] + "_MelSpec_noise.png"
    image_file_path_noise = os.path.join(image_output_dir, image_file_name_noise)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(mel_spec_db_noise, sr=sr, x_axis="time", y_axis="mel")
    plt.axis("off")

    plt.savefig(image_file_path_noise, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Mel Spectrogram 이미지 (Noise) 저장: {image_file_path_noise}")

    ##############파라미터 조정################

    rate = 0.8 + np.random.random() * 0.4 
    data_stretched = librosa.effects.time_stretch(data, rate=rate)
    
    mel_spec_stretched = librosa.feature.melspectrogram(y=data_stretched, sr=sr)
    mel_spec_db_stretched = librosa.amplitude_to_db(mel_spec_stretched, ref=np.max)

    image_file_name_stretch = os.path.splitext(os.path.basename(file_path))[0] + "_MelSpec_stretch.png"
    image_file_path_stretch = os.path.join(image_output_dir, image_file_name_stretch)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(mel_spec_db_stretched, sr=sr, x_axis="time", y_axis="mel")
    plt.axis("off")

    plt.savefig(image_file_path_stretch, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Mel Spectrogram 이미지 (Stretch) 저장: {image_file_path_stretch}")

print(f"이미지 저장 완료")

5 모든 이미지 리사이즈 후 정제 이미지 저장하기

In [ ]:

import cv2
import numpy as np
import os
from tqdm import tqdm

input_dir = 'image_extraction_data' 
output_dir = 'final_data' 

target_size = (255, 255)

def padding_to_square(img: np.ndarray, square_size: int) -> np.ndarray:
    h, w, c = img.shape

    if h == w:
        bg = img
    
    if h < w:
        padding_size = (w - h) // 2
        bg = np.zeros((w, w, c), dtype=np.uint8)
        bg[padding_size:padding_size + h, :] = img.copy()

    else: 
        padding_size = (h - w) // 2
        bg = np.zeros((h, h, c), dtype=np.uint8)
        bg[:, padding_size : padding_size + w] = img.copy()
    
    result = cv2.resize(bg, (square_size, square_size))
    return result

for root, dirs, files in os.walk(input_dir):
    for file in tqdm(files, desc=f"Scanning {root}"):
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):

            subdir = os.path.relpath(root, input_dir)
            
            output_subdir = os.path.join(output_dir, subdir)
            os.makedirs(output_subdir, exist_ok=True)
            
            image_file_path = os.path.join(root, file)
            
            try:
                image = cv2.imread(image_file_path)
                if image is None:
                    print(f"이미지 읽기 실패: {image_file_path}")
                    continue
                
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                
                padded_img = padding_to_square(image, 255)
                
                output_image_file_path = os.path.join(output_subdir, file)
                cv2.imwrite(output_image_file_path, cv2.cvtColor(padded_img, cv2.COLOR_RGB2BGR))
                
                print(f"변환된 이미지 저장: {output_image_file_path}")
            except Exception as e:
                print(f"이미지 처리 중 오류 발생: {e}")

print("이미지 변환 및 저장 완료")