; program: pitch_shifter-16b-ducking.asm
; UID = 000020 - this is a unique id so variables dont conflict
; 16b address space with mono data (1.5s sample time)
; pitch shifter - loops past n seconds at a higher or lower rate to change
; the apparent pitch of the sound - rotary encoder controlled playback speed

; program overview
;
; data is sent out and taken in from the codec.  data is taken in on the
; left channel, and played out on both left and right.  a buffer of the
; past n seconds is kept and the output is the result of sampling this
; buffer at varying playback speeds. the speed at which it plays through
; the memory is controlled by the rotary encoder.  turning the encoder to
; the right slows playback down, and turning it the left speeds playback up.  the
; audio is kept clean over fractional sample periods by interpolating
; between the two closest samples.

; constant definitions
;
.equ step_size_000020 = $02 ; this is the amount a single detent on the rotary
; encoder changes the read address increment by.  each bit is 1/256 of
; normal playback speed.

; register usage - may be redefined in other sections
;
; r0  multiply result lsb
; r1  multiply result msb
; r2  sample 3/4 lsb
; r3  sample 3/4 msb
; r4  left/right lsb out
; r5  left/right msb out
; r6  left lsb in
; r7  left msb in
; r8  
; r9  adc msb accumulator
; r10 adc fractional byte accumulator
; r11 adc lsb accumulator
; r12 playback speed increment lsb value ($0100 is normal speed)
; r13 playback speed increment msb value
; r14 rotary encoder counter
; r15 switch\adc counter
; r16 temporary swap register
; r17 temporary swap register
; r18 signed multiply register
; r19 signed multiply register
; r20 unsigned multiply register
; r21 unsigned multiply register
; r22 write address third byte/null register
; r23 read address fractional byte
; r24 write address lsb
; r25 write address msb
; r26 buffer length lsb
; r27 buffer length msb
; r28 read address lsb
; r29 read address msb
; r30 jump location for interrupt lsb
; r31 jump location for interrupt msb

; program starts here first time
; intialize registers
ldi r30,$06 ; increment z pointer to new jump location
ldi r17,$01 ; initialize playback speed to normal
mov r13,r17
clr r12
ldi r26,$04 ; initialize buffer size
clr r27

; program begins here
; initiate data transfer to codec
sbi portb,portb0 ; toggle slave select pin
out spdr,r5 ; send out left channel msb
cbi portb,portb0
ldi r22,$00 ; set up high byte write register

wait1_000020: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait1_000020
in r7,spdr ; recieve in left channel msb
out spdr,r4 ; send out left channel lsb

wait2_000020: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait2_000020
in r6,spdr ; recieve in left channel lsb
out spdr,r5 ; send out right channel msb
;write left channel data to sram
out portd,r24 ; set address
sts porth,r25
out portg,r22 ; pull ce low,we low,and set high bits of address
ldi r17,$ff
out ddra,r17 ; set porta as output for data write
out ddrc,r17 ; set portc as output for data write
out porta,r6 ; set data
out portc,r7
sbi portg,portg2 ; pull we high to write
out ddra,r22 ; set porta as input for data lines
out ddrc,r22 ; set portc as input for data lines

wait3_000020: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait3_000020
in r17,spdr ; recieve in right channel msb
out spdr,r4 ; send out right channel lsb

wait4_000020: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait4_000020
in r17,spdr ; recieve in right channel lsb

;increment write addresses
adiw r25:r24,$01 ; increment write address
cp r24,r26 ; check if at end of buffer
cpc r25,r27
brlo interpolate_000020 ; do nothing if not at end of buffer
clr r24 ; reset buffer to bottom
clr r25

interpolate_000020: ; interpolate data based upon speed setting

add r23,r12 ; increment read register
adc r28,r13
adc r29,r22 ; r22 is cleared above
cp r28,r26 ; check if at end of buffer
cpc r29,r27
brlo getsample1_000020 ; do nothing if not at end of buffer
clr r28 ; reset buffer to bottom
clr r29

getsample1_000020: ;get left channel sample 1 data from sram

movw r17:r16,r29:r28 ; move read address to temporary register
out portd,r16 ; set address
sts porth,r17
ldi r21,$01 ; increment read address
add r16,r21 ; placed here to use 2 cycle wait
adc r17,r22 ; r22 is cleared above
in r6,pina ; get data
in r18,pinc ; get data
cp r16,r26 ; check if at end of buffer
cpc r17,r27
brlo getsample2_000020 ; do nothing if not at end of buffer
clr r16 ; reset buffer to bottom
clr r17

getsample2_000020: ;get left channel sample 2 data from sram

out portd,r16 ; set address
sts porth,r17
nop ; wait 2 cycle setup time
nop
in r7,pina ; get data
in r19,pinc ; get data

interpolate2_000020: ; multiply samples by distance

mov r20,r23 ; get distance from sample 1
com r20
mulsu r18,r20 ; (signed)ah * b
movw r5:r4,r1:r0
mul	r6,r20	; al * b
add	r4,r1
adc	r5,r22 ; r22 is cleared above
mov r17,r0

;multiply and accumulate sample 2 by distance
mulsu r19,r23 ; (signed)ah * b
add r4,r0 ; accumulate result
adc r5,r1
mul	r7,r23	; al * b
add r17,r0 ; accumulate result
adc	r4,r1
adc	r5,r22 ; r22 is cleared above

;get sample from other side of buffer
movw r17:r16,r27:r26 ; move buffer size to temporary register
lsr r17 ; divide buffer size by 2
ror r16
add r16,r28 ; add half buffer size read address
adc r17,r29
cp r16,r26 ; check for buffer overflow
cpc r17,r27
brlo getsample3_000020 ; continue if no overflow
sub r16,r26 ; flip around boundary
sbc r17,r27

getsample3_000020: ;get left channel sample 3 data from sram

out portd,r16 ; set address
sts porth,r17
add r16,r21 ; increment read address - r21 set to $01 above
adc r17,r22 ; r22 is cleared above
in r6,pina ; get data
in r18,pinc ; get data
cp r16,r26 ; check if at end of buffer
cpc r17,r27
brlo getsample4_000020 ; do nothing if not at end of buffer
clr r16 ; reset buffer to bottom
clr r17

getsample4_000020: ;get left channel sample 4 data from sram

out portd,r16 ; set address
sts porth,r17
nop ; wait 2 cycle setup time
nop
in r7,pina ; get data
in r19,pinc ; get data

;multiply sample 3 by distance
mulsu r18,r20 ; (signed)ah * b
movw r3:r2,r1:r0
mul	r6,r20 ; al * b
add	r2,r1
adc	r3,r22 ; r22 is cleared above
mov r17,r0

;multiply sample 4 by distance
mulsu r19,r23 ; (signed)ah * b
add r2,r0 ; accumulate result
adc r3,r1
mul	r7,r23	; al * b
add r17,r0 ; accumulate result
adc	r2,r1
adc	r3,r22 ; r22 is cleared above

;get distance to boundary
movw r17:r16,r29:r28 ; move read address to temporary regitser
mov r18,r23
sub r16,r24 ; find distance to loop boundary
sbc r17,r25
brcc half_000020 ; check if result is negative
com r16 ; invert distance if negative
com r17
com r18
add r18,r21 ; r21 set to $01 above
adc r16,r22
adc r17,r22

half_000020: ; check if result is greater than half the buffer size

lsr r27 ; divide buffer size by 2
ror r26
cp r16,r26 ; check if result is greater than half the buffer size
cpc r17,r27
brlo reset_000020 ; skip flip if not
lsl r26 ; reset buffer size
rol r27
sub r16,r26 ; flip result around boundary
sbc r17,r27
com r16
com r17
com r18
add r18,r21 ; r21 set to $01 above
adc r16,r22
adc r17,r22
rjmp scale_000020

reset_000020: ; reset buffer

lsl r26 ; reset buffer size
rol r27

scale_000020: ; scale distance to match buffer size - 50% accurate

movw r7:r6,r27:r26 ; move buffer size to temporary register
sbrc r7,$07 ; check if msb of buffer size is set
rjmp attenuate_000020 ; attenuate signal if 16b value

shift_000020: ; shift buffer size till it occupies full 16b

lsl r6 ; multiply buffer size by 2
rol r7
lsl r18 ; multiply distance by 2
rol r16
rol r17
sbrs r7,$07 ; check if msb of buffer size is set
rjmp shift_000020 ; keep checking if not set

attenuate_000020: ; multiply sample 1/2 by distance

lsl r18 ; multiply distance by 2 since max value is 1/2 buffer size
rol r16
rol r17
sub r6,r16 ; find complementary distance of sample 3/4
sbc r7,r17
movw r21:r20,r7:r6 ; move distance to signed multiply register
movw r19:r18,r5:r4 ; move value to signed multiply register
mulsu r19,r17 ; (signed)ah * bh
movw r5:r4,r1:r0
mul	r18,r16	; al * bl
movw r7:r6,r1:r0
mulsu r19,r16 ; (signed)ah * bl
sbc	r5,r22 ; r22 is cleared above
add	r7,r0
adc	r4,r1
adc	r5,r22
mul r17,r18 ; bh * al
add	r7,r0
adc	r4,r1
adc	r5,r22

;multiply and accumulate sample 3/4 with result from above
movw r19:r18,r3:r2 ; move value to signed multiply register
mulsu r19,r21 ; (signed)ah * bh
add	r4,r0
adc	r5,r1
mul	r18,r20 ; al * bl
add	r6,r0
adc	r7,r1
adc	r4,r22
adc	r5,r22
mulsu r19,r20 ; (signed)ah * bl
sbc	r5,r22
add	r7,r0
adc	r4,r1
adc	r5,r22
mul r21,r18 ; bh * al
add	r7,r0
adc	r4,r1
adc	r5,r22

rotary_000020: ; check rotary encoder and adjust playback rate
; rotary encoder is externally debounced, so that is not done here.
; pin1 is sampled on a transition from high to low on pin0.  if pin1 is
; high, a left turn occured, if pin1 is low, a right turn occured.
dec r14 ; reduce the sampling rate to help with debounce
brne adcsample_000020
ldi r17,$40 ; adjust sample frequency to catch all rising edges (1.5ms)
mov r14,r17
lds r17,pinj ; get switch data
sbrs r17,$00 ; check if pin0 is low
rjmp edge_000020 ; check if pin0 was low on previous sample
clt ;  clear state register if back high
rjmp adcsample_000020 ; finish off

edge_000020: ; check for falling edge

brts adcsample_000020 ; do nothing if the edge was already detected
set ; set state register to indicate a falling edge occured
sbrs r17,$01 ; check if pin1 is high
rjmp increment_000020 ; increment playback if right rotation
ldi r17,step_size_000020 ; decrement playback speed
sub r12,r17
sbc r13,r22 ; r22 is cleared above
rjmp adcsample_000020 ; finish off

increment_000020: ; increment playback speed

ldi r17,step_size_000020 ; increment playback speed
add r12,r17
adc r13,r22 ; r22 is cleared above

adcsample_000020: ; get loop setting

lds r17,adcsra ; get adc control register
sbrs r17,adif ; check if adc conversion is complete
rjmp done_000020 ; skip adc sampling
lds r17,adcl ; get low byte adc value
lds r16,adch ; get high byte adc value
add r10,r17
adc r11,r16 ; accumulate adc samples
adc r9,r22 ; accumulate adc samples - r22 is cleared above
ldi r17,$f7
sts adcsra,r17 ; clear interrupt flag
dec r15 ; countdown adc sample clock
brne done_000020 ; move adc value to loop setting after 256 samples
lsr r9 ; divide accumulated value by 4
ror r11
ror r10
lsr r9
ror r11
ror r10
ldi r17,$c0 ; mask off less than 12b
and r10,r17
ldi r17,$02 ; set minimum buffer size to 10ms
cp r11,r17
brsh compare_000020 ; compare to previous value if above 10ms
mov r11,r17 ; set minimum buffer size to 10ms
clr r10

compare_000020: ; compare to previous value

movw r17:r16,r27:r26 ; make a copy of current loop time for comparison
sub r16,r10 ; find difference between current loop time and last loop time
sbc r17,r11
brcc deadband_000020 ; see if difference is large enough to indicate a change
neg r16 ; invert difference if negative
adc r17,r22 ; r22 is cleared above
neg r17

deadband_000020: ; see if pot has moved or if its just noise

cpi r16,$80 ; see if difference is greater than 2 lsb
cpc r17,r22 ; r22 is cleared above
brlo nochange_000020 ; dont update loop time if difference is not large enough
movw r27:r26,r11:r10 ; move adc value to loop time register

nochange_000020: ; clear accumulation registers

clr r10 ; empty accumulation registers
clr r11
clr r9

;check rotary switch state
lds r16,pinj ; get switch data
andi r16,$78 ; mask off rotary switch
ldi r17,$02
lsr r16
lsr r16
add r16,r17 ; adjust switch position to program memory location
cpse r16,r31 ; check if location has changed
clr r30 ; reset jump register to intial state
mov r31,r16

done_000020:

reti

