음향데이터 이미지 저장하기 파일

목록

1 데이터 불러오기
2 waveshow 이미지 저장하기
2-1 원본이미지 저장하기
2-2 noise 또는 stretch 처리된 이미지 저장하기
3 STFT 이미지 저장하기3-1 원본이미지 저장하기
3-2 noise 또는 stretch 처리된 이미지 저장하기
4 MelSpectrogram 이미지 저장하기
4-1 원본이미지 저장하기
4-2 noise 또는 stretch 처리된 이미지 저장하기
5 이미지 리사이즈 하기데이터 불러오기 
* 원하는 이미지만 저장하여, image_extraction_data 폴더 안의 모든 이미지를 리사이즈 할 수 있게 만들었습니다.
 
원하시는 가상환경에 pip install librosa, pip install matplotlib, pip install numpy, pip install tqdm 해주세요.

1. 데이터 불러오기

 
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
from tqdm import tqdm 

dir = []
input_dir = 'raw_data'

for root, dirs, files in os.walk(input_dir):
    current_dir = []
    
    for file in tqdm(files, desc=f"Scanning {root}"):
        if file.endswith(".wav"):
            file_path = os.path.join(root, file)
            current_dir.append(file_path)
    
    if current_dir:
        dir.append(current_dir)

for directory_files in dir:
    for wav in directory_files:
        print(wav)

# start와 end 값을 조절하여 wav데이터 시간을 슬라이싱 할 수 있습니다.
def skip_wav(input_dir, start=0, end=10, wav_data_list=None):
    if wav_data_list is None:
        wav_data_list = []

    for root, dirs, files in os.walk(input_dir):
        for file in tqdm(files, desc=f"Scanning {root}"):
            if file.endswith(".wav"):
                file_path = os.path.join(root, file)

                processed = any(file_path == item[2] for item in wav_data_list)

                if not processed:
                    try:
                        data, sr = librosa.load(file_path)
                    except Exception as e:
                        print(f"손상된 WAV 파일을 스킵합니다: {file_path}")
                        continue 

                    start_sample = int(start * sr)
                    end_sample = min(int(end * sr), len(data))
                    data = data[start_sample:end_sample]

                    wav_data_list.append((np.array(data), sr, file_path))

    return wav_data_list

2 Waveshow 이미지 저장하기

2 -1 원본이미지(waveshow) 저장하기
In [ ]:
input_dir = 'raw_data' 
output_dir = 'image_extraction_data/waveshow'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    image_file_name = os.path.splitext(os.path.basename(file_path))[0] + ".png"
    image_file_path = os.path.join(image_output_dir, image_file_name)

    plt.figure(figsize=(12, 4))
    librosa.display.waveshow(data, sr=sr)
    plt.axis("off")

    plt.savefig(image_file_path, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"이미지 저장: {image_file_path}")

print(f"이미지 저장 완료")
2-2 waveshow에 (noise 또는 stretch)가 적용된 이미지 저장하기
In [ ]:
input_dir = 'raw_data'
output_dir = 'image_extraction_data/waveshow'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    ##############파라미터 조정#################
         
    noise_amp = np.amax(data) * 0.2 * np.random.uniform()
    noise = noise_amp * np.random.normal(size=data.shape)
    data_noise = data + noise

    image_file_name_noise = os.path.splitext(os.path.basename(file_path))[0] + "_noise.png"
    image_file_path_noise = os.path.join(image_output_dir, image_file_name_noise)

    plt.figure(figsize=(12, 4))
    librosa.display.waveshow(data_noise, sr=sr)
    plt.axis("off")

    plt.savefig(image_file_path_noise, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Noise 이미지 저장: {image_file_path_noise}")
    
    ##############파라미터 조정################

    rate = 0.8 + np.random.random() * 0.4  
    data_stretch = librosa.effects.time_stretch(data, rate=rate)

    image_file_name_stretch = os.path.splitext(os.path.basename(file_path))[0] + "_stretch.png"
    image_file_path_stretch = os.path.join(image_output_dir, image_file_name_stretch)

    plt.figure(figsize=(12, 4))
    librosa.display.waveshow(data_stretch, sr=sr)
    plt.axis("off")

    plt.savefig(image_file_path_stretch, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Stretch 이미지 저장: {image_file_path_stretch}")

print(f"이미지 저장 완료")

3 STFT 이미지 저장하기

3-1 원본이미지(STFT) 저장하기
In [ ]:
input_dir = 'raw_data'
output_dir = 'image_extraction_data/STFT'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    stft = librosa.stft(data)
    stft_db = librosa.amplitude_to_db(abs(stft))

    image_file_name = os.path.splitext(os.path.basename(file_path))[0] + ".png"
    image_file_path = os.path.join(image_output_dir, image_file_name)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(stft_db, sr=sr, x_axis="time", y_axis="hz")
    plt.axis("off")

    plt.savefig(image_file_path, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"STFT 이미지 저장: {image_file_path}")

print(f"이미지 저장 완료")
3-2 STFT에 (noise 또는 stretch)가 적용된 이미지 저장하기
In [ ]:
input_dir = 'raw_data'
output_dir = 'image_extraction_data/STFT'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)

    ##############파라미터 조정################
    
    noise_amp = np.amax(np.abs(stft)) * 0.05 * np.random.uniform()
    noise = noise_amp * (2 * np.random.random(stft.shape) - 1) 
    stft_noisy = stft + noise
    stft_db_noisy = librosa.amplitude_to_db(S=abs(stft_noisy))

    image_file_name_noise = os.path.splitext(os.path.basename(file_path))[0] + "_stft_noise.png"
    image_file_path_noise = os.path.join(image_output_dir, image_file_name_noise)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(stft_db_noisy, sr=sr, x_axis="time", y_axis="hz")
    plt.axis("off")

    plt.savefig(image_file_path_noise, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"STFT 이미지 (Noise) 저장: {image_file_path_noise}")

    ##############파라미터 조정################

    rate = 0.8 + np.random.random() * 0.4  
    data_stretched = librosa.effects.time_stretch(data, rate=rate)
    stft_stretched = librosa.stft(data_stretched)
    stft_db_stretched = librosa.amplitude_to_db(S=abs(stft_stretched))

    image_file_name_stretch = os.path.splitext(os.path.basename(file_path))[0] + "_stft_stretch.png"
    image_file_path_stretch = os.path.join(image_output_dir, image_file_name_stretch)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(stft_db_stretched, sr=sr, x_axis="time", y_axis="hz")
    plt.axis("off")

    plt.savefig(image_file_path_stretch, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"STFT 이미지 (Stretch) 저장: {image_file_path_stretch}")

print(f"이미지 저장 완료")

4 MelSpectrogram 이미지 저장하기

4-1 원본이미지(MelSpectrogram) 저장하기
In [ ]:
input_dir = 'raw_data'
output_dir = 'image_extraction_data/mel_spectrogram'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)
    
    stft = librosa.stft(data)

    mel_spec = librosa.feature.melspectrogram(S=abs(stft))
    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)

    image_file_name = os.path.splitext(os.path.basename(file_path))[0] + ".png"
    image_file_path = os.path.join(image_output_dir, image_file_name)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(mel_spec_db, sr=sr, x_axis="time", y_axis="mel")
    plt.axis("off")

    plt.savefig(image_file_path, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Mel Spectrogram 이미지 저장: {image_file_path}")

print(f"이미지 저장 완료")
4-2 MelSpectrogram에 (noise 또는 stretch)가 적용된 이미지 저장하기
In [ ]:
input_dir = 'raw_data'
output_dir = 'image_extraction_data/mel_spectrogram'

skipped_wav_data_list = []

wav_data_list = skip_wav(input_dir, wav_data_list=skipped_wav_data_list)

for data, sr, file_path in wav_data_list:
    subdir = os.path.relpath(os.path.dirname(file_path), input_dir)
    image_output_dir = os.path.join(output_dir, subdir)
    os.makedirs(image_output_dir, exist_ok=True)
    
    ##############파라미터 조정################

    noise_amp = np.amax(np.abs(data)) * 0.05 * np.random.uniform()
    noise = noise_amp * np.random.normal(size=data.shape)
    data_noise = data + noise

    mel_spec_noise = librosa.feature.melspectrogram(y=data_noise, sr=sr)
    mel_spec_db_noise = librosa.amplitude_to_db(mel_spec_noise, ref=np.max)

    image_file_name_noise = os.path.splitext(os.path.basename(file_path))[0] + "_MelSpec_noise.png"
    image_file_path_noise = os.path.join(image_output_dir, image_file_name_noise)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(mel_spec_db_noise, sr=sr, x_axis="time", y_axis="mel")
    plt.axis("off")

    plt.savefig(image_file_path_noise, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Mel Spectrogram 이미지 (Noise) 저장: {image_file_path_noise}")

    ##############파라미터 조정################

    rate = 0.8 + np.random.random() * 0.4 
    data_stretched = librosa.effects.time_stretch(data, rate=rate)
    
    mel_spec_stretched = librosa.feature.melspectrogram(y=data_stretched, sr=sr)
    mel_spec_db_stretched = librosa.amplitude_to_db(mel_spec_stretched, ref=np.max)

    image_file_name_stretch = os.path.splitext(os.path.basename(file_path))[0] + "_MelSpec_stretch.png"
    image_file_path_stretch = os.path.join(image_output_dir, image_file_name_stretch)

    plt.figure(figsize=(12, 4))
    librosa.display.specshow(mel_spec_db_stretched, sr=sr, x_axis="time", y_axis="mel")
    plt.axis("off")

    plt.savefig(image_file_path_stretch, bbox_inches="tight", pad_inches=0)
    plt.close()

    print(f"Mel Spectrogram 이미지 (Stretch) 저장: {image_file_path_stretch}")

print(f"이미지 저장 완료")

5 모든 이미지 리사이즈 후 정제 이미지 저장하기

In [ ]:
import cv2
import numpy as np
import os
from tqdm import tqdm

input_dir = 'image_extraction_data' 
output_dir = 'final_data' 

target_size = (255, 255)

def padding_to_square(img: np.ndarray, square_size: int) -> np.ndarray:
    h, w, c = img.shape

    if h == w:
        bg = img
    
    if h < w:
        padding_size = (w - h) // 2
        bg = np.zeros((w, w, c), dtype=np.uint8)
        bg[padding_size:padding_size + h, :] = img.copy()

    else: 
        padding_size = (h - w) // 2
        bg = np.zeros((h, h, c), dtype=np.uint8)
        bg[:, padding_size : padding_size + w] = img.copy()
    
    result = cv2.resize(bg, (square_size, square_size))
    return result

for root, dirs, files in os.walk(input_dir):
    for file in tqdm(files, desc=f"Scanning {root}"):
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):

            subdir = os.path.relpath(root, input_dir)
            
            output_subdir = os.path.join(output_dir, subdir)
            os.makedirs(output_subdir, exist_ok=True)
            
            image_file_path = os.path.join(root, file)
            
            try:
                image = cv2.imread(image_file_path)
                if image is None:
                    print(f"이미지 읽기 실패: {image_file_path}")
                    continue
                
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                
                padded_img = padding_to_square(image, 255)
                
                output_image_file_path = os.path.join(output_subdir, file)
                cv2.imwrite(output_image_file_path, cv2.cvtColor(padded_img, cv2.COLOR_RGB2BGR))
                
                print(f"변환된 이미지 저장: {output_image_file_path}")
            except Exception as e:
                print(f"이미지 처리 중 오류 발생: {e}")

print("이미지 변환 및 저장 완료")

+ Recent posts