feat: implement Hamming 8/4 error correction and auto-alignment for T42 files
All checks were successful
Build Linux / Build Linux (push) Successful in 1m33s
Build Windows / Build Windows (push) Successful in 4m50s

This commit is contained in:
2026-02-21 11:35:55 +01:00
parent 18fef7b049
commit 62dc11fd40
2 changed files with 63 additions and 17 deletions

View File

@@ -1,22 +1,46 @@
import os
from typing import List, Callable, Optional
from .models import Packet, Page, TeletextService
from .models import Packet, Page, TeletextService, decode_hamming_8_4
def load_t42(file_path: str, progress_callback: Optional[Callable[[int, int], None]] = None) -> TeletextService:
service = TeletextService()
total_bytes = os.path.getsize(file_path)
# Auto-detect alignment
# Scan first 4096 bytes for the offset that yields the most Row 0 headers
best_offset = 0
max_headers = 0
with open(file_path, 'rb') as f:
head_data = f.read(4096 + 42)
for offset in range(42):
headers = 0
for i in range(offset, len(head_data) - 42, 42):
chunk = head_data[i:i+42]
try:
# We check if Mag/Row decode to something sensible
# AND if it's Row 0, we check if Page Num decodes without errors (ideally)
# But for now, just counting Row 0s is good enough.
mag, row = decode_packet_header(chunk[0], chunk[1])
if row == 0 and mag != 8: # Mag 8 is often just nulls
headers += 1
except:
pass
if headers > max_headers:
max_headers = headers
best_offset = offset
# Each packet is 42 bytes
total_packets = total_bytes // 42
total_packets = (total_bytes - best_offset) // 42
processed_packets = 0
with open(file_path, 'rb') as f:
f.seek(best_offset)
while True:
chunk = f.read(42)
if not chunk:
break
if len(chunk) < 42:
# Should not happen in a valid T42 stream, or we just ignore incomplete tail
break
processed_packets += 1
@@ -27,9 +51,6 @@ def load_t42(file_path: str, progress_callback: Optional[Callable[[int, int], No
service.all_packets.append(packet)
# Logic to group into pages.
# This is non-trivial because packets for a page might be interleaved or sequential.
# Standard implementation: Packets arrive in order. Row 0 starts a new page/subpage.
if packet.row == 0:
# Start of a new page header.
# Byte 2-9 of header contain Page Number, Subcode, Control bits etc.
@@ -175,12 +196,6 @@ def save_t42(file_path: str, service: TeletextService, progress_callback: Option
f.write(header + packet.data)
def decode_hamming_8_4(byte_val):
return ((byte_val >> 1) & 1) | \
(((byte_val >> 3) & 1) << 1) | \
(((byte_val >> 5) & 1) << 2) | \
(((byte_val >> 7) & 1) << 3)
def parse_header(data: bytearray):
# Data is 40 bytes.
# Bytes 0-7 are Page Num (2), Subcode (4), Control (2) - ALL Hamming encoded.