feat: implement Hamming 8/4 error correction and auto-alignment for T42 files
This commit is contained in:
@@ -1,22 +1,46 @@
|
||||
import os
|
||||
from typing import List, Callable, Optional
|
||||
from .models import Packet, Page, TeletextService
|
||||
from .models import Packet, Page, TeletextService, decode_hamming_8_4
|
||||
|
||||
def load_t42(file_path: str, progress_callback: Optional[Callable[[int, int], None]] = None) -> TeletextService:
|
||||
service = TeletextService()
|
||||
|
||||
total_bytes = os.path.getsize(file_path)
|
||||
|
||||
# Auto-detect alignment
|
||||
# Scan first 4096 bytes for the offset that yields the most Row 0 headers
|
||||
best_offset = 0
|
||||
max_headers = 0
|
||||
with open(file_path, 'rb') as f:
|
||||
head_data = f.read(4096 + 42)
|
||||
for offset in range(42):
|
||||
headers = 0
|
||||
for i in range(offset, len(head_data) - 42, 42):
|
||||
chunk = head_data[i:i+42]
|
||||
try:
|
||||
# We check if Mag/Row decode to something sensible
|
||||
# AND if it's Row 0, we check if Page Num decodes without errors (ideally)
|
||||
# But for now, just counting Row 0s is good enough.
|
||||
mag, row = decode_packet_header(chunk[0], chunk[1])
|
||||
if row == 0 and mag != 8: # Mag 8 is often just nulls
|
||||
headers += 1
|
||||
except:
|
||||
pass
|
||||
if headers > max_headers:
|
||||
max_headers = headers
|
||||
best_offset = offset
|
||||
|
||||
# Each packet is 42 bytes
|
||||
total_packets = total_bytes // 42
|
||||
total_packets = (total_bytes - best_offset) // 42
|
||||
processed_packets = 0
|
||||
|
||||
with open(file_path, 'rb') as f:
|
||||
f.seek(best_offset)
|
||||
while True:
|
||||
chunk = f.read(42)
|
||||
if not chunk:
|
||||
break
|
||||
if len(chunk) < 42:
|
||||
# Should not happen in a valid T42 stream, or we just ignore incomplete tail
|
||||
break
|
||||
|
||||
processed_packets += 1
|
||||
@@ -27,9 +51,6 @@ def load_t42(file_path: str, progress_callback: Optional[Callable[[int, int], No
|
||||
service.all_packets.append(packet)
|
||||
|
||||
# Logic to group into pages.
|
||||
# This is non-trivial because packets for a page might be interleaved or sequential.
|
||||
# Standard implementation: Packets arrive in order. Row 0 starts a new page/subpage.
|
||||
|
||||
if packet.row == 0:
|
||||
# Start of a new page header.
|
||||
# Byte 2-9 of header contain Page Number, Subcode, Control bits etc.
|
||||
@@ -175,12 +196,6 @@ def save_t42(file_path: str, service: TeletextService, progress_callback: Option
|
||||
|
||||
f.write(header + packet.data)
|
||||
|
||||
def decode_hamming_8_4(byte_val):
|
||||
return ((byte_val >> 1) & 1) | \
|
||||
(((byte_val >> 3) & 1) << 1) | \
|
||||
(((byte_val >> 5) & 1) << 2) | \
|
||||
(((byte_val >> 7) & 1) << 3)
|
||||
|
||||
def parse_header(data: bytearray):
|
||||
# Data is 40 bytes.
|
||||
# Bytes 0-7 are Page Num (2), Subcode (4), Control (2) - ALL Hamming encoded.
|
||||
|
||||
Reference in New Issue
Block a user