src/teletext/charsets.py


"""
Teletext Character Sets (G0).
Maps the specific code points (0x23, 0x24, 0x40, 0x5B-0x5E, 0x60, 0x7B-0x7E)
to Unicode characters based on the National Option (3 bits).
"""

# Default (English) - Option 000
ENGLISH = {
    0x23: '#', 0x24: '$', 0x40: '@', 
    0x5B: '[', 0x5C: '\\', 0x5D: ']', 0x5E: '^', 
    0x5F: '_', 0x60: '`', 
    0x7B: '{', 0x7C: '|', 0x7D: '}', 0x7E: '~'
}

# Swedish/Finnish/Hungarian - Option 010 (2)
SWEDISH_FINNISH = {
    0x23: '#', 0x24: '\u00A4', 0x40: '\u00C9', 
    0x5B: '\u00C4', 0x5C: '\u00D6', 0x5D: '\u00C5', 0x5E: '\u00DC', 
    0x5F: '_', 0x60: '\u00E9', 
    0x7B: '\u00E4', 0x7C: '\u00F6', 0x7D: '\u00E5', 0x7E: '\u00FC'
}

# German - Option 001 (1) 
GERMAN = {
    0x23: '#', 0x24: '$', 0x40: '§', 
    0x5B: 'Ä', 0x5C: 'Ö', 0x5D: 'Ü', 0x5E: '^', 
    0x5F: '_', 0x60: '`', 
    0x7B: 'ä', 0x7C: 'ö', 0x7D: 'ü', 0x7E: 'ß'
}

# We can add more as needed.

SETS = [
    ENGLISH,          # 000
    GERMAN,           # 001
    SWEDISH_FINNISH,  # 010
    ENGLISH, # Italian (011) - placeholder
    ENGLISH, # French (100) - placeholder
    ENGLISH, # Portuguese/Spanish (101) - placeholder
    ENGLISH, # Turkish (110) - placeholder
    ENGLISH, # Romania (111) - placeholder
]

def get_char(byte_val, subset_idx):
    if subset_idx < 0 or subset_idx >= len(SETS):
        subset_idx = 0
    
    mapping = SETS[subset_idx]
    
    # If byte is in mapping, return mapped char.
    # Else return ASCII equivalent (for basic chars)
    
    valid_byte = byte_val & 0x7F # Strip parity if present (though our packet data is 8-bit usually already stripping parity?)
    # Packet data we store is raw bytes. We should probably strip parity bit 7 before lookup.
    
    if valid_byte in mapping:
        return mapping[valid_byte]
        
    return chr(valid_byte)

import unicodedata

def get_byte_from_char(char, subset_idx):
    if len(char) != 1: return 0
    
    # Normalize input to NFC to match our map keys (if they are NFC, which python literals usually are)
    char = unicodedata.normalize('NFC', char)
    
    if subset_idx < 0 or subset_idx >= len(SETS):
        subset_idx = 0
        
    mapping = SETS[subset_idx]
    
    for code, mapped_char in mapping.items():
        if mapped_char == char:
            return code
            
    return ord(char)
Initial commit: Core Teletext Editor functionality 2025-12-28 21:38:21 +01:00
			`"""`
			`Teletext Character Sets (G0).`
			`Maps the specific code points (0x23, 0x24, 0x40, 0x5B-0x5E, 0x60, 0x7B-0x7E)`
			`to Unicode characters based on the National Option (3 bits).`
			`"""`

			`# Default (English) - Option 000`
			`ENGLISH = {`
			`0x23: '#', 0x24: '$', 0x40: '@',`
			`0x5B: '[', 0x5C: '\\', 0x5D: ']', 0x5E: '^',`
			0x5F: '_', 0x60: '`',
			`0x7B: '{', 0x7C: '\|', 0x7D: '}', 0x7E: '~'`
			`}`

			`# Swedish/Finnish/Hungarian - Option 010 (2)`
			`SWEDISH_FINNISH = {`
Fixed block rendering issues Fixed issues where graphic blocks had horizontal stripes in them 2026-01-02 00:20:28 +01:00			`0x23: '#', 0x24: '\u00A4', 0x40: '\u00C9',`
			`0x5B: '\u00C4', 0x5C: '\u00D6', 0x5D: '\u00C5', 0x5E: '\u00DC',`
			`0x5F: '_', 0x60: '\u00E9',`
			`0x7B: '\u00E4', 0x7C: '\u00F6', 0x7D: '\u00E5', 0x7E: '\u00FC'`
Initial commit: Core Teletext Editor functionality 2025-12-28 21:38:21 +01:00			`}`

			`# German - Option 001 (1)`
			`GERMAN = {`
			`0x23: '#', 0x24: '$', 0x40: '§',`
			`0x5B: 'Ä', 0x5C: 'Ö', 0x5D: 'Ü', 0x5E: '^',`
			0x5F: '_', 0x60: '`',
			`0x7B: 'ä', 0x7C: 'ö', 0x7D: 'ü', 0x7E: 'ß'`
			`}`

			`# We can add more as needed.`

			`SETS = [`
			`ENGLISH, # 000`
			`GERMAN, # 001`
			`SWEDISH_FINNISH, # 010`
			`ENGLISH, # Italian (011) - placeholder`
			`ENGLISH, # French (100) - placeholder`
			`ENGLISH, # Portuguese/Spanish (101) - placeholder`
			`ENGLISH, # Turkish (110) - placeholder`
			`ENGLISH, # Romania (111) - placeholder`
			`]`

			`def get_char(byte_val, subset_idx):`
			`if subset_idx < 0 or subset_idx >= len(SETS):`
			`subset_idx = 0`

			`mapping = SETS[subset_idx]`

			`# If byte is in mapping, return mapped char.`
			`# Else return ASCII equivalent (for basic chars)`

			`valid_byte = byte_val & 0x7F # Strip parity if present (though our packet data is 8-bit usually already stripping parity?)`
			`# Packet data we store is raw bytes. We should probably strip parity bit 7 before lookup.`

			`if valid_byte in mapping:`
			`return mapping[valid_byte]`

			`return chr(valid_byte)`
Fixed block rendering issues Fixed issues where graphic blocks had horizontal stripes in them 2026-01-02 00:20:28 +01:00
			`import unicodedata`

			`def get_byte_from_char(char, subset_idx):`
			`if len(char) != 1: return 0`

			`# Normalize input to NFC to match our map keys (if they are NFC, which python literals usually are)`
			`char = unicodedata.normalize('NFC', char)`

			`if subset_idx < 0 or subset_idx >= len(SETS):`
			`subset_idx = 0`

			`mapping = SETS[subset_idx]`

			`for code, mapped_char in mapping.items():`
			`if mapped_char == char:`
			`return code`

			`return ord(char)`