Prototype of a Vocoder in Sonic Pi

Status update:

//PitchShiftPA is based on formant preserving pitch-synchronous overlap-add re-synthesis, as developed by Keith Lent
//based on real-time implementation by Juan Pampin, combined with non-real-time implementation by Joseph Anderson
//This synthdef is based on the pseudo-UGen by Marcin Pączkowski, using GrainBuf and a circular buffer at https://github.com/dyfer/PitchShiftPA

(
SynthDef('sonic-pi-fx_vocoder', {|
		pitch_ratio = 1, formant_ratio = 1,
		min_freq = 10, max_formant_ratio = 10, grains_period = 2,
		out_bus=0, in_bus=0, time_dispersion|

		var in, localbuf, grainDur, wavePeriod, trigger, freqPhase, maxdelaytime, grainFreq, bufSize, delayWritePhase, grainPos, snd, freq;
		var absolutelyMinValue = 0.01; // used to ensure positive values before reciprocating
		var numChannels = 1;

		//multichanel expansion
		[pitch_ratio, formant_ratio].do({ arg item;
			item.isKindOf(Collection).if({ numChannels = max(numChannels, item.size) });
		});

		in = In.ar(in_bus,1).asArray.wrapExtend(numChannels);
	    freq = Pitch.kr(in)[0];
	    //freq = freq.asArray.wrapExtend(numChannels);
		pitch_ratio = pitch_ratio.asArray.wrapExtend(numChannels);

		min_freq = min_freq.max(absolutelyMinValue);
		maxdelaytime = min_freq.reciprocal;

		freq = freq.max(min_freq);

		wavePeriod = freq.reciprocal;
		grainDur = grains_period * wavePeriod;
		grainFreq = freq * pitch_ratio;

		if(formant_ratio.notNil, { //regular version

			formant_ratio = formant_ratio.asArray.wrapExtend(numChannels);

			max_formant_ratio = max_formant_ratio.max(absolutelyMinValue);
			formant_ratio = formant_ratio.clip(max_formant_ratio.reciprocal, max_formant_ratio);

			bufSize = ((SampleRate.ir * maxdelaytime * max_formant_ratio) + (SampleRate.ir * ControlDur.ir)).roundUp; //extra padding for maximum delay time
			freqPhase = LFSaw.ar(freq, 1).range(0, wavePeriod) + ((formant_ratio.max(1) - 1) * grainDur);//phasor offset for formant shift up - in seconds; positive here since phasor is subtracted from the delayWritePhase

		}, { //slightly lighter version, without formant manipulation

			formant_ratio = 1 ! numChannels;

			bufSize = ((SampleRate.ir * maxdelaytime) + (SampleRate.ir * ControlDur.ir)).roundUp; //extra padding for maximum delay time
			freqPhase = LFSaw.ar(freq, 1).range(0, wavePeriod);
		});

		localbuf = numChannels.collect({LocalBuf(bufSize, 1).clear});
		delayWritePhase = numChannels.collect({|ch| BufWr.ar(in[ch], localbuf[ch], Phasor.ar(0, 1, 0, BufFrames.kr(localbuf[ch])))});
		grainPos = (delayWritePhase / BufFrames.kr(localbuf)) - (freqPhase / BufDur.kr(localbuf)); //scaled to 0-1 for use in GrainBuf
		if(time_dispersion.isNil, {
			trigger = Impulse.ar(grainFreq);
		}, {
			trigger = Impulse.ar(grainFreq + (LFNoise0.kr(grainFreq) * time_dispersion));
		});
		snd = numChannels.collect({|ch| GrainBuf.ar(1, trigger[ch], grainDur[ch], localbuf[ch], formant_ratio[ch], grainPos[ch])});

		Out.ar(out_bus, snd.dup)
	}
).writeDefFile("/Users/xavierriley/Downloads/Sonic Pi.app/Contents/Resources/etc/synthdefs/compiled/")
)
# in synthinfo.rb
    class FXVocoder < FXInfo
      def name
        "Vocoder"
      end

      def introduced
        Version.new(3,2,0)
      end

      def synth_name
        "fx_vocoder"
      end

      def doc
        ""
      end

      def arg_defaults
        super.merge({
          :pitch => 440,
          :pitch_ratio => 1.0,
          :formant_ratio => 1.0,
          :min_freq => 10,
          :max_formant_ratio => 10,
          :grains_Period => 2.0,
        })
      end
    end
...
        :fx_vocoder => FXVocoder.new,
# Sonic Pi code

# harmonises a vocal sample as a major chord

load_synthdefs
sn = "~/Downloads/acappella.wav"

sample sn

in_thread do
  with_fx :vocoder, pitch_ratio: 1.5, formant_ratio: 0.5 do
    sample sn
    sleep sample_duration(sn)
  end
end

in_thread do
  with_fx :vocoder, pitch_ratio: 1.25, formant_ratio: 0.5 do
    sample sn
    sleep sample_duration(sn)
  end
end

The API could do with tweaking to make it more intuitive but the noises are there.

Internally it pitch tracks the input sound, and then pitch shifts against the tracked pitch. That’s why this example file has those moments of “distortion” as it’s not able to track a clear pitch at those points. With a good input source the effect should be super smooth.

This also opens the door to a T-Pain style autotune but I need to do a bit more work to get that.

3 Likes