; program: chorus-16b-sine.asm
; UID = 000042 - unique id to eliminate conflicts between variables
; 16b address space
; mono data in on left channel, identical stereo out
; pot (MOD1) controls lfo frequency
; rotary encoder (MOD2) controls lfo depth

; program overview
;
; data is read in from the codec and stored to sram.  this data is then read
; back out at a time varying delay.  the average delay time is a fixed number
; set at the beginning of the code, and is varied with a sinusoidal lfo.  the
; lfo is generated via interpolating a 16b 512s half sinewave lookup table.
; a 32b number is used to index into this lookup table, to allow for very
; slow lfo rates.  this lfo is then multiplied by a 16b amplitude signal,
; and the data is fetched from the sram, at that location.  the adc is
; oversampled 256 times and deadbanded before updating the lfo rate.  the
; rotary encoder is used to increment the amplitude of the lfo.  there are
; two voices, each swept with the same lfo signal, but in opposite
; directions. the average delay for each is independent.

; constants
;
.equ delay1_000042 = $0321 ; chorus average delay time for voice 1
; (1/44.1 ms per unit), min value $0100
.equ delay2_000042 = $0444 ; chorus average delay time for voice 2
; (1/44.1 ms per unit), min value $0100

; register usage - may be redefined in other sections
;
; r0  multiply result lsb
; r1  multiply result msb
; r2  accumulation lsb
; r3  accumulation mlb
; r4  right/left lsb out/accumulation mhb
; r5  right/left msb out/accumulation msb
; r6  left lsb in
; r7  left msb in
; r8  adc accumulator fractional byte
; r9  adc accumulator lsb
; r10 adc accumulator msb
; r11 rotary encoder counter
; r12 lfo rate lsb
; r13 lfo rate msb
; r14 null register
; r15 switch sample counter
; r16 temporary swap register
; r17 temporary swap register
; r18 sine wave buffer/multiply msb
; r19 sine wave buffer/multiply msb
; r20 multiply swap register
; r21 multiply swap register
; r22 sinetable lookup address lsb
; r23 sinetable lookup address mlb
; r24 write address lsb
; r25 write address msb
; r26 sinetable lookup address mhb
; r27 sinetable lookup address msb
; r28 temporary swap register
; r29 lfo depth
; r30 jump location for interrupt lsb
; r31 jump location for interrupt msb
; t   rotary encoder edge detect indicator

; program starts here first time
; intialize registers
ldi r30,$04 ; increment z pointer to new jump location
clr r14 ; clear null register
ldi r29,$0e ; initialize lfo depth
reti ; finish with initialization and wait for next interrupt

; program starts here every time but first
; initiate data transfer to codec
sbi portb,portb0 ; toggle slave select pin
out spdr,r5 ; send out left channel msb
cbi portb,portb0

adiw r25:r24,$01 ; increment write address

wait1_000042: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait1_000042
in r7,spdr ; recieve in left channel msb
out spdr,r4 ; send out left channel lsb

wait2_000042: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait2_000042
in r6,spdr ; recieve in left channel lsb
out spdr,r5 ; send out right channel msb

;write left channel datat to sram
out portd,r24 ; set address
sts porth,r25
out portg,r14 ; pull ce low,we low,and set high bits of address
ldi r17,$ff
out ddra,r17 ; set porta as output for data write
out ddrc,r17 ; set portc as output for data write
out porta,r6 ; set data
out portc,r7
sbi portg,portg2 ; pull we high to write
out ddra,r14 ; set porta as input for data lines
out ddrc,r14 ; set portc as input for data lines

wait3_000042: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait3_000042
in r17,spdr ; recieve in right channel msb
out spdr,r4 ; send out right channel lsb

wait4_000042: ; check if byte has been sent

in r17,spsr
sbrs r17,spif
rjmp wait4_000042
in r17,spdr ; recieve in right channel lsb

; vco generation
movw r17:r16,r31:r30 ; store z register
;get sample 1
add r22,r12 ; increment sinetable address
adc r23,r13
adc r26,r14 ; r14 is cleared above
adc r27,r14
movw r31:r30,r27:r26 ; move to z register for data fetch
lsl r30 ; adjust pointer for 16b fetch
rol r31
andi r31,$03 ; limit value to 10b (512 samples x 2 bytes)
ori r31,$48 ; set to memory address location where table is stored
lpm r18,z+ ; get sine value lsb, increment z register
lpm r19,z ; get sine value msb
sbrc r27,$01 ; flip sign for half of the values
rjmp interpolate_000042
neg r18
adc r19,r14 ; r14 is cleared above
neg r19

interpolate_000042: ; multiply sample 1 by distance

movw r21:r20,r23:r22 ; get distance from sample 1
com r20 ; invert distance
com r21
mulsu r19,r21 ; (signed)Ah * (unsigned)Bh - multiply high bytes
movw r5:r4,r1:r0 ; store high bytes result for later
mul	r18,r20	; (unsigned)Al * (unsigned)Bl ; multiply low bytes
movw r3:r2,r1:r0 ; store low byets for later
mulsu r19,r20 ; (signed)Ah * (unsigned)Bl - multiply middle bytes
sbc	r5,r14 ; r14 is cleared above - subtract sign bit
add	r3,r0 ; accumulate result
adc	r4,r1
adc	r5,r14 ; r14 is cleared above
mul r21,r18 ; (unsigned)Bh * (unsigned)Al - multiply middle bytes
add	r3,r0 ; accumulate result
adc	r4,r1
adc	r5,r14 ; r14 is cleared above

;get sample 2
adiw r27:r26,$01 ; set to next sample
movw r31:r30,r27:r26 ; move to z register for data fetch
lsl r30 ; adjust pointer for 16b fetch
rol r31
andi r31,$03 ; limit value to 10b (512 samples x 2 bytes)
ori r31,$48 ; set to memory address location where table is stored
lpm r18,z+ ; get sine value lsb, increment z register
lpm r19,z ; get sine value msb
sbrc r27,$01 ; flip sign for half of the values
rjmp interpolate1_000042
neg r18
adc r19,r14 ; r14 is cleared above
neg r19

interpolate1_000042: ; multiply sample 2 by distance

sbiw r27:r26,$01 ; reset address
movw r31:r30,r17:r16 ; restore z register
mulsu r19,r23 ; (signed)Ah * (unsigned)Bh - multiply high bytes
add r4,r0 ; accumulate result
adc r5,r1
mul	r18,r22	; (unsigned)Al * (unsigned)Bl ; multiply low bytes
add r2,r0 ; accumulate result
adc r3,r1
adc r4,r14 ; r14 is cleared above
adc r5,r14
mulsu r19,r22 ; (signed)Ah * (unsigned)Bl - multiply middle bytes
sbc	r5,r14 ; r14 is cleared above - subtract sign bit
add	r3,r0 ; accumulate result
adc	r4,r1
adc	r5,r14 ; r14 is cleared above
mul r23,r18 ; (unsigned)Bh * (unsigned)Al - multiply middle bytes
add	r3,r0 ; accumulate result
adc	r4,r1
adc	r5,r14 ; r14 is cleared above

;set lfo depth
movw r19:r18,r5:r4 ; move lfo signal to multiply register
mov r21,r29 ; move lfo depth to multiply register
mulsu r19,r21 ; (signed)ah * (unsigned)b
movw r5:r4,r1:r0
mul r21,r18 ; (unsigned)b * (unsigned)al
add	r4,r1
adc	r5,r14 ; r14 is cleared above

;create first voice
;add lfo to delay
mov r2,r4 ; make a backup copy of lfo value for second voice
mov r28,r5
movw r17:r16,r25:r24 ; move current location to read address
subi r16,low(delay1_000042) ; remove delay time
sbci r17,high(delay1_000042)
clr r21 ; prepare to add in lfo time
tst r5 ; check if lfo time is negative
brpl lfoadd_000042 ; add in lfo time if positive
ser r21 ; subtract lfo time if negative

lfoadd_000042: ; add in lfo time

add r16,r5 ; add in lfo time
adc r17,r21

;get left channel sample 1 from sram
out portd,r16 ; set address
sts porth,r17
nop ; wait setup period of two cycles
nop
in r18,pina ; get data
in r19,pinc ; get data

;multiply sample 1 by distance
mov r20,r4 ; get distance from sample 1
com r20 ; invert distance for sample weighting
mulsu r19,r20 ; (signed)ah * b
movw r5:r4,r1:r0
mul r18,r20 ; al * b
add r4,r1
adc r5,r14 ; r14 is cleared above
mov r3,r0

;get left channel sample 2 from sram
subi r16,$ff ; set to next sample
sbci r17,$ff ; done this way because there is no addi or adci
out portd,r16 ; set address
sts porth,r17
nop ; wait setup period of two cycles
nop
in r18,pina ; get data
in r19,pinc ; get data

;multiply sample 2 by distance
com r20 ; reset sample 2 distance
mulsu r19,r20 ; (signed)ah * b
add r4,r0 ; accumulate result
adc r5,r1
mul r18,r20 ; al * b
add r3,r0 ; accumulate result
adc r4,r1
adc r5,r14 ; r14 is cleared above
movw r7:r6,r5:r4 ; store voice 1

;create second voice
;add lfo to delay
mov r4,r2 ; restore lfo value for second voice
mov r5,r28
com r4 ; invert lfo for second voice
com r5
movw r17:r16,r25:r24 ; move current location to read address
subi r16,low(delay2_000042) ; remove delay time
sbci r17,high(delay2_000042)
clr r21 ; prepare to add in lfo time
tst r5 ; check if lfo time is negative
brpl lfoadd1_000042 ; add in lfo time if positive
ser r21 ; subtract lfo time if negative

lfoadd1_000042: ; add in lfo time

add r16,r5 ; add in lfo time
adc r17,r21

;get left channel sample 1 from sram
out portd,r16 ; set address
sts porth,r17
nop ; wait setup period of two cycles
nop
in r18,pina ; get data
in r19,pinc ; get data

;multiply sample 1 by distance
mov r20,r4 ; get distance from sample 1
com r20 ; invert distance for sample weighting
mulsu r19,r20 ; (signed)ah * b
movw r5:r4,r1:r0
mul r18,r20 ; al * b
add r4,r1
adc r5,r14 ; r14 is cleared above
mov r3,r0

;get left channel sample 2 from sram
subi r16,$ff ; set to next sample
sbci r17,$ff ; done this way because there is no addi or adci
out portd,r16 ; set address
sts porth,r17
nop ; wait setup period of two cycles
nop
in r18,pina ; get data
in r19,pinc ; get data

;multiply sample 2 by distance
com r20 ; reset sample 2 distance
mulsu r19,r20 ; (signed)ah * b
add r4,r0 ; accumulate result
adc r5,r1
mul r18,r20 ; al * b
add r3,r0 ; accumulate result
adc r4,r1
adc r5,r14 ; r14 is cleared above

;divide both voices by 2 and add them
asr r7 ; divide voice 1 by 2
ror r6
asr r5 ; divide voice 2 by 2
ror r4
add r4,r6 ; add the two voices
adc r5,r7

;check rotary encoder and adjust lfo depth
; although rotary encoder is externally debounced, it is done here again.
; pin1 is sampled on a transition from high to low on pin0.  if pin1 is
; high, a left turn occured, if pin1 is low, a right turn occured.
dec r11 ; check if time to sample rotary encoder
brne adcsample_000042 ; continue if not
ldi r17,$40 ; adjust sample frequency to catch all rising edges (1.5ms)
mov r11,r17
lds r17,pinj ; get switch data
sbrs r17,$00 ; check if pin0 is low
rjmp edge_000042 ; check if pin0 was low on previous sample
clt ;  clear state register if back high
rjmp adcsample_000042 ; finish off

edge_000042: ; check for falling edge

brts adcsample_000042 ; do nothing if edge was already detected
set ; set t register to indicate edge detected
sbrs r17,$01 ; check if pin1 is high
rjmp increment_000042 ; increment desired delay if right rotation
subi r29,$01 ; decrement lfo depth register
brcc adcsample_000042 ; check if underflow occured
clr r29 ; set lfo depth to min
rjmp adcsample_000042 ; finish off

increment_000042: ; increment desired delay register

ldi r16,$01 ; increment lfo depth register 
add r29,r16
brcc adcsample_000042 ; check if overflow occured
ser r29 ; set lfo depth to max

adcsample_000042: ; sample adc for lfo rate

lds r17,adcsra ; get adc control register
sbrs r17,adif ; check if adc conversion is complete
rjmp done_000042 ; skip adc sampling
lds r16,adcl ; get low byte adc value
lds r17,adch ; get high byte adc value
add r8,r16 ; accumulate adc samples
adc r9,r17
adc r10,r14 ; r14 is cleared above
ldi r17,$f7
sts adcsra,r17 ; clear interrupt flag
dec r15 ; countdown adc sample clock
brne done_000042 ; get delay time if its been long enough

deadband_000042: ; set the low value of the delay

lsr r10 ; divide adc value by 16
ror r9
ror r8
lsr r10
ror r9
ror r8
lsr r9 ; r10 is now empty
ror r8
lsr r9
ror r8
movw r17:r16,r9:r8 ; move adc sample to temporary register
ldi r21,$80 ; add in offset of min lfo rate ($0080)
add r16,r21
adc r17,r14 ; r14 is cleared above
sub r16,r12 ; find difference between adc sample and current lfo rate
sbc r17,r13
brsh check_000042 ; check for deadband if positive
neg r16 ; invert if negative
adc r17,r14 ; r14 is cleared above
neg r17

check_000042: ; check if difference is greater than deadband

cpi r16,$10 ; check if difference is less than 1 adc lsb
cpc r17,r14 ; r14 cleared above
brlo empty_000042 ; do nothing if less than 1 adc lsb
movw r13:r12,r9:r8 ; move adc sample to lfo rate register
add r12,r21 ; add in offset of min lfo rate ($0080)
adc r13,r14 ; r14 is cleared above

empty_000042: ; empty accumulation registers and finish off

clr r8 ; empty accumulation registers
clr r9
clr r10

switchsample_000042: ; check rotary switch

lds r16,pinj ; get switch data
andi r16,$78 ; mask off rotary switch
lsr r16 ; adjust switch position to program memory location
lsr r16
ldi r17,$02
add r16,r17
cpse r16,r31 ; check if location has changed
clr r30 ; reset jump register to intial state
mov r31,r16

done_000042:

reti ; return to waiting

