diff --git a/protocol_prototype/DryBox/voice_codec.py b/protocol_prototype/DryBox/voice_codec.py new file mode 100644 index 0000000..dcf8609 --- /dev/null +++ b/protocol_prototype/DryBox/voice_codec.py @@ -0,0 +1,714 @@ +""" +Voice codec integration for encrypted voice over GSM. +Implements Codec2 compression with FSK modulation for transmitting +encrypted voice data over standard GSM voice channels. +""" + +import array +import math +import struct +from typing import Optional, Tuple, List +from dataclasses import dataclass +from enum import IntEnum + +try: + import numpy as np + HAS_NUMPY = True +except ImportError: + HAS_NUMPY = False + +# ANSI colors +RED = "\033[91m" +GREEN = "\033[92m" +YELLOW = "\033[93m" +BLUE = "\033[94m" +RESET = "\033[0m" + + +class Codec2Mode(IntEnum): + """Codec2 bitrate modes.""" + MODE_3200 = 0 # 3200 bps + MODE_2400 = 1 # 2400 bps + MODE_1600 = 2 # 1600 bps + MODE_1400 = 3 # 1400 bps + MODE_1300 = 4 # 1300 bps + MODE_1200 = 5 # 1200 bps (recommended for robustness) + MODE_700C = 6 # 700 bps + + +@dataclass +class Codec2Frame: + """Represents a single Codec2 compressed voice frame.""" + mode: Codec2Mode + bits: bytes + timestamp: float + frame_number: int + + +class Codec2Wrapper: + """ + Wrapper for Codec2 voice codec. + In production, this would use py_codec2 or ctypes bindings to libcodec2. + This is a simulation interface for protocol development. + """ + + # Frame sizes in bits for each mode + FRAME_BITS = { + Codec2Mode.MODE_3200: 64, + Codec2Mode.MODE_2400: 48, + Codec2Mode.MODE_1600: 64, + Codec2Mode.MODE_1400: 56, + Codec2Mode.MODE_1300: 52, + Codec2Mode.MODE_1200: 48, + Codec2Mode.MODE_700C: 28 + } + + # Frame duration in ms + FRAME_MS = { + Codec2Mode.MODE_3200: 20, + Codec2Mode.MODE_2400: 20, + Codec2Mode.MODE_1600: 40, + Codec2Mode.MODE_1400: 40, + Codec2Mode.MODE_1300: 40, + Codec2Mode.MODE_1200: 40, + Codec2Mode.MODE_700C: 40 + } + + def __init__(self, mode: Codec2Mode = Codec2Mode.MODE_1200): + """ + Initialize Codec2 wrapper. + + Args: + mode: Codec2 bitrate mode (default 1200 bps for robustness) + """ + self.mode = mode + self.frame_bits = self.FRAME_BITS[mode] + self.frame_bytes = (self.frame_bits + 7) // 8 + self.frame_ms = self.FRAME_MS[mode] + self.frame_samples = int(8000 * self.frame_ms / 1000) # 8kHz sampling + self.frame_counter = 0 + + # Quiet initialization - no print + + def encode(self, audio_samples) -> Optional[Codec2Frame]: + """ + Encode PCM audio samples to Codec2 frame. + + Args: + audio_samples: PCM samples (8kHz, 16-bit signed) + + Returns: + Codec2Frame or None if insufficient samples + """ + if len(audio_samples) < self.frame_samples: + return None + + # In production: call codec2_encode(state, bits, samples) + # Simulation: create pseudo-compressed data + compressed = self._simulate_compression(audio_samples[:self.frame_samples]) + + frame = Codec2Frame( + mode=self.mode, + bits=compressed, + timestamp=self.frame_counter * self.frame_ms / 1000.0, + frame_number=self.frame_counter + ) + + self.frame_counter += 1 + return frame + + def decode(self, frame: Codec2Frame): + """ + Decode Codec2 frame to PCM audio samples. + + Args: + frame: Codec2 compressed frame + + Returns: + PCM samples (8kHz, 16-bit signed) + """ + if frame.mode != self.mode: + raise ValueError(f"Frame mode {frame.mode} doesn't match decoder mode {self.mode}") + + # In production: call codec2_decode(state, samples, bits) + # Simulation: decompress to audio + return self._simulate_decompression(frame.bits) + + def _simulate_compression(self, samples) -> bytes: + """Simulate Codec2 compression (for testing).""" + # Convert to list if needed + if hasattr(samples, 'tolist'): + sample_list = samples.tolist() + elif hasattr(samples, '__iter__'): + sample_list = list(samples) + else: + sample_list = samples + + # Extract basic features for simulation + if HAS_NUMPY and hasattr(samples, '__array__'): + # Convert to numpy array if needed + np_samples = np.asarray(samples, dtype=np.float32) + if len(np_samples) > 0: + mean_square = np.mean(np_samples ** 2) + energy = np.sqrt(mean_square) if not np.isnan(mean_square) else 0.0 + zero_crossings = np.sum(np.diff(np.sign(np_samples)) != 0) + else: + energy = 0.0 + zero_crossings = 0 + else: + # Manual calculation without numpy + if sample_list and len(sample_list) > 0: + energy = math.sqrt(sum(s**2 for s in sample_list) / len(sample_list)) + zero_crossings = sum(1 for i in range(1, len(sample_list)) + if (sample_list[i-1] >= 0) != (sample_list[i] >= 0)) + else: + energy = 0.0 + zero_crossings = 0 + + # Pack into bytes (simplified) + # Ensure values are valid + energy_int = max(0, min(65535, int(energy))) + zc_int = max(0, min(65535, int(zero_crossings))) + data = struct.pack('= 4: + energy, zero_crossings = struct.unpack('> 6) & 0x03, + (byte >> 4) & 0x03, + (byte >> 2) & 0x03, + byte & 0x03 + ]) + + # Generate audio signal + signal = [] + + # Add preamble + if add_preamble: + preamble_samples = int(self.preamble_duration * self.sample_rate) + if HAS_NUMPY: + t = np.arange(preamble_samples) / self.sample_rate + preamble = np.sin(2 * np.pi * self.preamble_freq * t) + signal.extend(preamble) + else: + for i in range(preamble_samples): + t = i / self.sample_rate + value = math.sin(2 * math.pi * self.preamble_freq * t) + signal.append(value) + + # Modulate symbols + for symbol in symbols: + freq = self.frequencies[symbol] + if HAS_NUMPY: + t = np.arange(self.samples_per_symbol) / self.sample_rate + tone = np.sin(2 * np.pi * freq * t) + signal.extend(tone) + else: + for i in range(self.samples_per_symbol): + t = i / self.sample_rate + value = math.sin(2 * math.pi * freq * t) + signal.append(value) + + # Apply smoothing to reduce clicks + if HAS_NUMPY: + audio = np.array(signal, dtype=np.float32) + else: + audio = array.array('f', signal) + audio = self._apply_envelope(audio) + + return audio + + def demodulate(self, audio) -> Tuple[bytes, float]: + """ + Demodulate FSK audio signal to binary data. + + Args: + audio: Audio signal + + Returns: + Tuple of (demodulated data, confidence score) + """ + # Find preamble + preamble_start = self._find_preamble(audio) + if preamble_start < 0: + return b'', 0.0 + + # Skip preamble + data_start = preamble_start + int(self.preamble_duration * self.sample_rate) + + # Demodulate symbols + symbols = [] + confidence_scores = [] + + pos = data_start + while pos + self.samples_per_symbol <= len(audio): + symbol_audio = audio[pos:pos + self.samples_per_symbol] + symbol, confidence = self._demodulate_symbol(symbol_audio) + symbols.append(symbol) + confidence_scores.append(confidence) + pos += self.samples_per_symbol + + # Convert symbols to bytes + data = bytearray() + for i in range(0, len(symbols), 4): + if i + 3 < len(symbols): + byte = (symbols[i] << 6) | (symbols[i+1] << 4) | (symbols[i+2] << 2) | symbols[i+3] + data.append(byte) + + if HAS_NUMPY and confidence_scores: + avg_confidence = np.mean(confidence_scores) + else: + avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0 + return bytes(data), avg_confidence + + def _find_preamble(self, audio) -> int: + """Find preamble in audio signal.""" + # Simple energy-based detection + window_size = int(0.01 * self.sample_rate) # 10ms window + + if HAS_NUMPY: + for i in range(0, len(audio) - window_size, window_size // 2): + window = audio[i:i + window_size] + + # Check for preamble frequency + fft = np.fft.fft(window) + freqs = np.fft.fftfreq(len(window), 1/self.sample_rate) + + # Find peak near preamble frequency + idx = np.argmax(np.abs(fft[:len(fft)//2])) + peak_freq = abs(freqs[idx]) + + if abs(peak_freq - self.preamble_freq) < 50: # 50 Hz tolerance + return i + else: + # Simple zero-crossing based detection without FFT + for i in range(0, len(audio) - window_size, window_size // 2): + window = list(audio[i:i + window_size]) + + # Count zero crossings + zero_crossings = 0 + for j in range(1, len(window)): + if (window[j-1] >= 0) != (window[j] >= 0): + zero_crossings += 1 + + # Estimate frequency from zero crossings + estimated_freq = (zero_crossings * self.sample_rate) / (2 * len(window)) + + if abs(estimated_freq - self.preamble_freq) < 100: # 100 Hz tolerance + return i + + return -1 + + def _demodulate_symbol(self, audio) -> Tuple[int, float]: + """Demodulate a single FSK symbol.""" + if HAS_NUMPY: + # FFT-based demodulation + fft = np.fft.fft(audio) + freqs = np.fft.fftfreq(len(audio), 1/self.sample_rate) + magnitude = np.abs(fft[:len(fft)//2]) + + # Find energy at each FSK frequency + energies = [] + for freq in self.frequencies: + idx = np.argmin(np.abs(freqs[:len(freqs)//2] - freq)) + energy = magnitude[idx] + energies.append(energy) + + # Select symbol with highest energy + symbol = np.argmax(energies) + else: + # Goertzel algorithm for specific frequency detection + audio_list = list(audio) if hasattr(audio, '__iter__') else audio + energies = [] + + for freq in self.frequencies: + # Goertzel algorithm + omega = 2 * math.pi * freq / self.sample_rate + coeff = 2 * math.cos(omega) + + s_prev = 0 + s_prev2 = 0 + + for sample in audio_list: + s = sample + coeff * s_prev - s_prev2 + s_prev2 = s_prev + s_prev = s + + # Calculate magnitude + power = s_prev2 * s_prev2 + s_prev * s_prev - coeff * s_prev * s_prev2 + energies.append(math.sqrt(abs(power))) + + # Select symbol with highest energy + symbol = energies.index(max(energies)) + + # Confidence is ratio of strongest to second strongest + sorted_energies = sorted(energies, reverse=True) + confidence = sorted_energies[0] / (sorted_energies[1] + 1e-6) + + return symbol, min(confidence, 10.0) / 10.0 + + def _apply_envelope(self, audio): + """Apply smoothing envelope to reduce clicks.""" + # Simple raised cosine envelope + ramp_samples = int(0.002 * self.sample_rate) # 2ms ramps + + if len(audio) > 2 * ramp_samples: + if HAS_NUMPY: + # Fade in + t = np.linspace(0, np.pi/2, ramp_samples) + audio[:ramp_samples] *= np.sin(t) ** 2 + + # Fade out + audio[-ramp_samples:] *= np.sin(t[::-1]) ** 2 + else: + # Manual fade in + for i in range(ramp_samples): + t = (i / ramp_samples) * (math.pi / 2) + factor = math.sin(t) ** 2 + audio[i] *= factor + + # Manual fade out + for i in range(ramp_samples): + t = ((ramp_samples - 1 - i) / ramp_samples) * (math.pi / 2) + factor = math.sin(t) ** 2 + audio[-(i+1)] *= factor + + return audio + + +class VoiceProtocol: + """ + Integrates voice codec and modem with the Icing protocol + for encrypted voice transmission over GSM. + """ + + def __init__(self, protocol_instance): + """ + Initialize voice protocol handler. + + Args: + protocol_instance: IcingProtocol instance + """ + self.protocol = protocol_instance + self.codec = Codec2Wrapper(Codec2Mode.MODE_1200) + self.modem = FSKModem(sample_rate=8000, baud_rate=600) + + # Voice crypto state + self.voice_iv_counter = 0 + self.voice_sequence = 0 + + # Buffers + if HAS_NUMPY: + self.audio_buffer = np.array([], dtype=np.int16) + else: + self.audio_buffer = array.array('h') # 16-bit signed integers + self.frame_buffer = [] + + print(f"{GREEN}[VOICE]{RESET} Voice protocol initialized") + + def process_voice_input(self, audio_samples): + """ + Process voice input: compress, encrypt, and modulate. + + Args: + audio_samples: PCM audio samples (8kHz, 16-bit) + + Returns: + Modulated audio signal ready for transmission (numpy array or array.array) + """ + # Add to buffer + if HAS_NUMPY: + self.audio_buffer = np.concatenate([self.audio_buffer, audio_samples]) + else: + self.audio_buffer.extend(audio_samples) + + # Process complete frames + modulated_audio = [] + + while len(self.audio_buffer) >= self.codec.frame_samples: + # Extract frame + if HAS_NUMPY: + frame_audio = self.audio_buffer[:self.codec.frame_samples] + self.audio_buffer = self.audio_buffer[self.codec.frame_samples:] + else: + frame_audio = array.array('h', self.audio_buffer[:self.codec.frame_samples]) + del self.audio_buffer[:self.codec.frame_samples] + + # Compress with Codec2 + compressed_frame = self.codec.encode(frame_audio) + if not compressed_frame: + continue + + # Encrypt frame + encrypted = self._encrypt_voice_frame(compressed_frame) + + # Add FEC + protected = self._add_fec(encrypted) + + # Modulate to audio + audio_signal = self.modem.modulate(protected, add_preamble=True) + modulated_audio.append(audio_signal) + + if modulated_audio: + if HAS_NUMPY: + return np.concatenate(modulated_audio) + else: + # Concatenate array.array objects + result = array.array('f') + for audio in modulated_audio: + result.extend(audio) + return result + return None + + def process_voice_output(self, modulated_audio): + """ + Process received audio: demodulate, decrypt, and decompress. + + Args: + modulated_audio: Received FSK-modulated audio + + Returns: + Decoded PCM audio samples (numpy array or array.array) + """ + # Demodulate + data, confidence = self.modem.demodulate(modulated_audio) + + if confidence < 0.5: + print(f"{YELLOW}[VOICE]{RESET} Low demodulation confidence: {confidence:.2f}") + return None + + # Remove FEC + frame_data = self._remove_fec(data) + if not frame_data: + return None + + # Decrypt + compressed_frame = self._decrypt_voice_frame(frame_data) + if not compressed_frame: + return None + + # Decompress + audio_samples = self.codec.decode(compressed_frame) + + return audio_samples + + def _encrypt_voice_frame(self, frame: Codec2Frame) -> bytes: + """Encrypt a voice frame using ChaCha20-CTR.""" + if not self.protocol.hkdf_key: + raise ValueError("No encryption key available") + + # Prepare frame data + frame_data = struct.pack(' Optional[Codec2Frame]: + """Decrypt a voice frame.""" + if len(data) < 10: + return None + + # Extract sequence and IV hint + sequence, iv_hint = struct.unpack(' bytes: + """Add forward error correction.""" + # Simple repetition code (3x) for testing + # In production: use convolutional code or LDPC + fec_data = bytearray() + + for byte in data: + # Repeat each byte 3 times + fec_data.extend([byte, byte, byte]) + + return bytes(fec_data) + + def _remove_fec(self, data: bytes) -> Optional[bytes]: + """Remove FEC and correct errors.""" + if len(data) % 3 != 0: + return None + + corrected = bytearray() + + for i in range(0, len(data), 3): + # Majority voting + votes = [data[i], data[i+1], data[i+2]] + byte_value = max(set(votes), key=votes.count) + corrected.append(byte_value) + + return bytes(corrected) + + +# Example usage +if __name__ == "__main__": + # Test Codec2 wrapper + print(f"\n{BLUE}=== Testing Codec2 Wrapper ==={RESET}") + codec = Codec2Wrapper(Codec2Mode.MODE_1200) + + # Generate test audio + if HAS_NUMPY: + t = np.linspace(0, 0.04, 320) # 40ms at 8kHz + test_audio = (np.sin(2 * np.pi * 440 * t) * 16384).astype(np.int16) + else: + test_audio = array.array('h') + for i in range(320): + t = i * 0.04 / 320 + value = int(math.sin(2 * math.pi * 440 * t) * 16384) + test_audio.append(value) + + # Encode + frame = codec.encode(test_audio) + print(f"Encoded frame: {len(frame.bits)} bytes") + + # Decode + decoded = codec.decode(frame) + print(f"Decoded audio: {len(decoded)} samples") + + # Test FSK modem + print(f"\n{BLUE}=== Testing FSK Modem ==={RESET}") + modem = FSKModem() + + # Test data + test_data = b"Hello, secure voice!" + + # Modulate + modulated = modem.modulate(test_data) + print(f"Modulated: {len(modulated)} samples ({len(modulated)/8000:.2f}s)") + + # Demodulate + demodulated, confidence = modem.demodulate(modulated) + print(f"Demodulated: {demodulated}") + print(f"Confidence: {confidence:.2%}") + print(f"Match: {demodulated == test_data}") \ No newline at end of file