Skip to content

Commit

Permalink
wip: working on publishing using torchhub
Browse files Browse the repository at this point in the history
  • Loading branch information
ex3ndr committed May 24, 2024
1 parent d138666 commit bb33591
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 14 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# ✨ Supervoice Enhance [BETA]

Enhancing diffusion neural network for a single speaker speech.

# Features

* ⚡️ Restoring and improving audio
* 🎤 16khz mono audio
* 🤹‍♂️ Can work with unknown languages

# Usage
Supervoice Enhance consists of multiple networks, but they are all loaded using a single command and published using Torch Hub, so you can use it as follows:

```python
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
enhance = torch.hub.load(repo_or_dir='ex3ndr/supervoice-enhance', model='enhance')
enhance.to(device)
enhance.eval()
```

# License

MIT
21 changes: 21 additions & 0 deletions hubconf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
dependencies = ['torch', 'torchaudio']

def enhance():

# Imports
import torch
import os
from supervoice_enhance.model import EnhanceModel
from supervoice_enhance.config import config

# Model
vocoder = torch.hub.load(repo_or_dir='ex3ndr/supervoice-vocoder', model='bigvsan')
flow = torch.hub.load(repo_or_dir='ex3ndr/supervoice-flow', model='flow')
model = SuperVoiceEnhance(flow, vocoder)

# Load checkpoint
checkpoint = torch.hub.load_state_dict_from_url("https://shared.korshakov.com/models/supervoice-enhance-600000.pt", map_location="cpu")
model.diffusion.load_state_dict(checkpoint['model'])

return model

30 changes: 30 additions & 0 deletions supervoice_enhance/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import torch
from .model import EnhanceModel
from .config import config

class SuperVoiceEnhance(torch.nn.Module):
def __init__(self, flow, vocoder):
super(SupervoiceEnhance, self).__init__()
self.diffusion = EnhanceModel(flow, config)
self.vocoder = vocoder

def enhance(self, waveform, *, steps = 8, alpha = None):

# Convert to spectogram
spec = spectogram(waveform,
n_fft = config.audio.n_fft,
n_mels = config.audio.n_mels,
n_hop = config.audio.hop_size,
n_window = config.audio.win_size,
mel_norm = config.audio.mel_norm,
mel_scale = config.audio.mel_scale,
sample_rate = config.audio.sample_rate
)

# Enhance
spec = (spec - config.audio.norm_mean) / config.audio.norm_std # Normalize
enhanced = self.diffusion.sample(source = spec.to(torch.float32), steps = steps, alpha = alpha)
enhanced = ((enhanced * config.audio.norm_std) + config.audio.norm_mean).to(torch.float32) # Denormalize

# Vocoder
return vocoder.generate(enhanced)
30 changes: 16 additions & 14 deletions welcome.ipynb

Large diffs are not rendered by default.

0 comments on commit bb33591

Please sign in to comment.