Initial commit: Core Teletext Editor functionality

2025-12-28 21:38:21 +01:00
commit 6000897578
4494 changed files with 537255 additions and 0 deletions
--- a/src/teletext/charsets.py
+++ b/src/teletext/charsets.py
@@ -0,0 +1,60 @@
+
+"""
+Teletext Character Sets (G0).
+Maps the specific code points (0x23, 0x24, 0x40, 0x5B-0x5E, 0x60, 0x7B-0x7E)
+to Unicode characters based on the National Option (3 bits).
+"""
+
+# Default (English) - Option 000
+ENGLISH = {
+    0x23: '#', 0x24: '$', 0x40: '@', 
+    0x5B: '[', 0x5C: '\\', 0x5D: ']', 0x5E: '^', 
+    0x5F: '_', 0x60: '`', 
+    0x7B: '{', 0x7C: '|', 0x7D: '}', 0x7E: '~'
+}
+
+# Swedish/Finnish/Hungarian - Option 010 (2)
+SWEDISH_FINNISH = {
+    0x23: '#', 0x24: '¤', 0x40: 'É', 
+    0x5B: 'Ä', 0x5C: 'Ö', 0x5D: 'Å', 0x5E: 'Ü', 
+    0x5F: '_', 0x60: 'é', 
+    0x7B: 'ä', 0x7C: 'ö', 0x7D: 'å', 0x7E: 'ü'
+}
+
+# German - Option 001 (1) 
+GERMAN = {
+    0x23: '#', 0x24: '$', 0x40: '§', 
+    0x5B: 'Ä', 0x5C: 'Ö', 0x5D: 'Ü', 0x5E: '^', 
+    0x5F: '_', 0x60: '`', 
+    0x7B: 'ä', 0x7C: 'ö', 0x7D: 'ü', 0x7E: 'ß'
+}
+
+# We can add more as needed.
+
+SETS = [
+    ENGLISH,          # 000
+    GERMAN,           # 001
+    SWEDISH_FINNISH,  # 010
+    ENGLISH, # Italian (011) - placeholder
+    ENGLISH, # French (100) - placeholder
+    ENGLISH, # Portuguese/Spanish (101) - placeholder
+    ENGLISH, # Turkish (110) - placeholder
+    ENGLISH, # Romania (111) - placeholder
+]
+
+def get_char(byte_val, subset_idx):
+    if subset_idx < 0 or subset_idx >= len(SETS):
+        subset_idx = 0
+    
+    mapping = SETS[subset_idx]
+    
+    # If byte is in mapping, return mapped char.
+    # Else return ASCII equivalent (for basic chars)
+    
+    valid_byte = byte_val & 0x7F # Strip parity if present (though our packet data is 8-bit usually already stripping parity?)
+    # Packet data we store is raw bytes. We should probably strip parity bit 7 before lookup.
+    
+    if valid_byte in mapping:
+        return mapping[valid_byte]
+        
+    return chr(valid_byte)