WavEncoder is a Python library for encoding audio signals, transforms for audio augmentation, and training audio classification models with PyTorch backend.
Layers | Models | Transforms | Trainer and utils |
---|---|---|---|
|
|
|
|
- wav2vec [1]
- wav2vec2 [2]
- SincNet [3]
- PASE [4]
- MockingJay [5]
- RawNet [6]
- GaborNet [7]
- LEAF [8]
- CNN-1D
- CNN-LSTM
- CNN-LSTM-Attn
Check the Demo Colab Notebook.
Use the package manager pip to install wavencoder.
pip install wavencoder
import torch
import wavencoder
x = torch.randn(1, 16000) # [1, 16000]
encoder = wavencoder.models.Wav2Vec(pretrained=True)
z = encoder(x) # [1, 512, 98]
classifier = wavencoder.models.LSTM_Attn_Classifier(512, 64, 2,
return_attn_weights=True,
attn_type='soft')
y_hat, attn_weights = classifier(z) # [1, 2], [1, 98]
import torch
import torch.nn as nn
import wavencoder
model = nn.Sequential(
wavencoder.models.Wav2Vec(),
wavencoder.models.LSTM_Attn_Classifier(512, 64, 2,
return_attn_weights=True,
attn_type='soft')
)
x = torch.randn(1, 16000) # [1, 16000]
y_hat, attn_weights = model(x) # [1, 2], [1, 98]
import torch
import torch.nn as nn
import wavencoder
class AudioClassifier(nn.Module):
def __init__(self):
super(AudioClassifier, self).__init__()
self.encoder = wavencoder.models.Wav2Vec(pretrained=True)
self.classifier = nn.Linear(512, 2)
def forward(self, x):
z = self.encoder(x)
z = torch.mean(z, dim=2)
out = self.classifier(z)
return out
model = AudioClassifier()
x = torch.randn(1, 16000) # [1, 16000]
y_hat = model(x) # [1, 2]
from wavencoder.models import Wav2Vec, LSTM_Attn_Classifier
from wavencoder.trainer import train, test_evaluate_classifier, test_predict_classifier
model = nn.Sequential(
Wav2Vec(pretrained=False),
LSTM_Attn_Classifier(512, 64, 2)
)
trainloader = ...
valloader = ...
testloader = ...
trained_model, train_dict = train(model, trainloader, valloader, n_epochs=20)
test_prediction_dict = test_predict_classifier(trained_model, testloader)
from wavencoder.transforms import Compose, AdditiveNoise, SpeedChange, Clipping, PadCrop, Reverberation
audio, _ = torchaudio.load('test.wav')
transforms = Compose([
AdditiveNoise('path-to-noise-folder', p=0.5, snr_levels=[5, 10, 15], p=0.5),
SpeedChange(factor_range=(-0.5, 0.0), p=0.5),
Clipping(p=0.5),
PadCrop(48000, crop_position='random', pad_position='random')
])
transformed_audio = transforms(audio)
Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
Please make sure to update tests as appropriate.