diff --git a/src/teletext/io.py b/src/teletext/io.py index b8573e8..322f145 100644 --- a/src/teletext/io.py +++ b/src/teletext/io.py @@ -5,69 +5,82 @@ from .models import Packet, Page, TeletextService def load_t42(file_path: str, progress_callback: Optional[Callable[[int, int], None]] = None) -> TeletextService: service = TeletextService() + if not os.path.exists(file_path): + return service + total_bytes = os.path.getsize(file_path) - # Each packet is 42 bytes total_packets = total_bytes // 42 processed_packets = 0 + # Magazine buffers: magazine -> {row_num: Packet} + magazine_buffers = {m: {} for m in range(1, 9)} + # Active page lookup: magazine -> Page object (for O(1) access) + active_pages = {m: None for m in range(1, 9)} + with open(file_path, 'rb') as f: while True: chunk = f.read(42) - if not chunk: - break - if len(chunk) < 42: - # Should not happen in a valid T42 stream, or we just ignore incomplete tail - break + if not chunk: break + if len(chunk) < 42: break processed_packets += 1 - if progress_callback and processed_packets % 100 == 0: + if progress_callback and processed_packets % 500 == 0: progress_callback(processed_packets, total_packets) packet = Packet(chunk) service.all_packets.append(packet) - # Logic to group into pages. - # This is non-trivial because packets for a page might be interleaved or sequential. - # Standard implementation: Packets arrive in order. Row 0 starts a new page/subpage. + mag = packet.magazine + buffer = magazine_buffers[mag] if packet.row == 0: - # Start of a new page header. - # Byte 2-9 of header contain Page Number, Subcode, Control bits etc. - # We need to parse the header to identify the page. + p_num, sub_code, control_bits, language = parse_header(packet.data) - # Header format (after Mag/Row): - # Bytes: P1 P2 S1 S2 S3 S4 C1 C2 ... - # All Hamming 8/4 encoded. + # Check Erase Page bit (C4 is bit 0 of control_bits) + erase_page = bool(control_bits & 1) - # For now, let's just create a new page entry for every Header we see, - # or find the existing one if we want to support updates (but T42 usually is a stream capture). - # If it's an editor file, it's likely sequential. - - p_num, sub_code, language = parse_header(packet.data) - - # Create new page - new_page = Page(magazine=packet.magazine, page_number=p_num, sub_code=sub_code, language=language) - new_page.packets.append(packet) - service.pages.append(new_page) - else: - # Add to the "current" page of this magazine. - # We need to track the current active page for each magazine. - # A simplistic approach: add to the last page added that matches the magazine ?? - # Robust approach: Maintain a dict of current_pages_by_magazine. - - # Let's find the last page in service that matches the packet's magazine - # This is O(N) but N (pages) is small. - target_page = None - for p in reversed(service.pages): - if p.magazine == packet.magazine: - target_page = p - break - - if target_page: - target_page.packets.append(packet) + if erase_page: + magazine_buffers[mag] = {0: packet} + buffer = magazine_buffers[mag] else: - # Packet without a header? Orphaned. Just keep in all_packets - pass + buffer[0] = packet + + # Create snapshot + new_page = Page( + magazine=mag, + page_number=p_num, + sub_code=sub_code, + control_bits=control_bits, + language=language + ) + + # Efficient cloning: use the existing Packet objects where possible, + # but we MUST clone the data bytearray if we plan to edit it later. + for r_num, pkt in sorted(buffer.items()): + # Create a new packet shell sharing the original_data but with its own data bytearray + cloned_pkt = Packet(pkt.original_data) + cloned_pkt.data = bytearray(pkt.data) + new_page.packets.append(cloned_pkt) + + service.pages.append(new_page) + active_pages[mag] = new_page # Update active page lookup + + elif 1 <= packet.row <= 31: + # Update the running buffer + buffer[packet.row] = packet + + # Update the active snapshot immediately + target_page = active_pages[mag] + if target_page: + # Update row in the current active page + found_row = False + for i, p in enumerate(target_page.packets): + if p.row == packet.row: + target_page.packets[i] = packet + found_row = True + break + if not found_row: + target_page.packets.append(packet) return service @@ -182,52 +195,51 @@ def decode_hamming_8_4(byte_val): (((byte_val >> 7) & 1) << 3) def parse_header(data: bytearray): - # Data is 40 bytes. - # Bytes 0-7 are Page Num (2), Subcode (4), Control (2) - ALL Hamming encoded. - - # 0: Page Units (PU) - # 1: Page Tens (PT) + # Data is 40 bytes (after MRAG). + # Byte 0: Page Units (PU) + # Byte 1: Page Tens (PT) + # Byte 2: Subcode S1 (bits 0-3) + # Byte 3: Subcode S2 (bits 4-6), C4 (bit 7) + # Byte 4: Subcode S3 (bits 8-11) + # Byte 5: Subcode S4 (bits 12-13), C5 (bit 14), C6 (bit 15) + # Byte 6: C7-C10 + # Byte 7: C11-C14 (C12-C14 are Language) pu = decode_hamming_8_4(data[0]) pt = decode_hamming_8_4(data[1]) - # Use BCD/Hex-like storage: High nibble is Tens, Low nibble is Units. - # This preserves Hex pages (A-F) without colliding with decimal pages. - # E.g. Page 1FF -> Tens=F(15), Units=F(15) -> 0xFF (255) - # Page 12E -> Tens=2, Units=E(14) -> 0x2E (46) - # Page 134 -> Tens=3, Units=4 -> 0x34 (52) - # 0x2E != 0x34. No collision. + # Page number: pt (tens), pu (units). 0x00 to 0xFF. page_num = ((pt & 0xF) << 4) | (pu & 0xF) - # Subcode: S1, S2, S3, S4 - # S1 (low), S2, S3, S4 (high) - + # Subcode (13 bits) s1 = decode_hamming_8_4(data[2]) s2 = decode_hamming_8_4(data[3]) s3 = decode_hamming_8_4(data[4]) s4 = decode_hamming_8_4(data[5]) - # Subcode logic is a bit complex with specific bit mapping for "Time" vs "Subcode" - # But usually just combining them gives the raw subcode value. - # S1: bits 0-3 - # S2: bits 4-6 (bit 4 is C4) -> actually S2 has 3 bits of subcode + 1 control bit usually? - # Let's simplify and just concat them for a unique identifier. + sub_code = (s1 & 0xF) | \ + ((s2 & 0x7) << 4) | \ + ((s3 & 0xF) << 7) | \ + ((s4 & 0x3) << 11) + + # Control bits C4-C14 + c4 = (s2 >> 3) & 1 + c5 = (s4 >> 2) & 1 + c6 = (s4 >> 3) & 1 - sub_code = s1 | (s2 << 4) | (s3 << 8) | (s4 << 12) + c_7_10 = decode_hamming_8_4(data[6]) + c_11_14 = decode_hamming_8_4(data[7]) - # Control bits C12, C13, C14 are in Byte 8 (index 8) - # They determine the National Option (Language) - c_bits_2 = decode_hamming_8_4(data[8]) + # bitmask starting at index 0 for C4 + control_bits = c4 | (c5 << 1) | (c6 << 2) | \ + ((c_7_10 & 0xF) << 3) | \ + ((c_11_14 & 0xF) << 7) + + # Language (C12, C13, C14) + # c_11_14: bit 0:C11, bit 1:C12, bit 2:C13, bit 3:C14 + language = (c_11_14 >> 1) & 0x7 - # Fix for Language Detection: - # It seems C12 and C13 are swapped in the Hamming decoding or file format relative to expected values. - # C12 is bit 0, C13 is bit 1. - # We swap them so D1 maps to C13 (Swedish bit) and D2 maps to C12 (German bit). - # Original: language = c_bits_2 & 0b111 - - language = ((c_bits_2 & 1) << 1) | ((c_bits_2 & 2) >> 1) | (c_bits_2 & 4) - - return page_num, sub_code, language + return page_num, sub_code, control_bits, language def save_tti(file_path: str, page: Page): """ diff --git a/src/teletext/models.py b/src/teletext/models.py index 5cb7c48..cd1ec13 100644 --- a/src/teletext/models.py +++ b/src/teletext/models.py @@ -65,9 +65,13 @@ class Page: Can have multiple subpages. """ magazine: int - page_number: int # 00-99 - sub_code: int = 0 # Subpage code (0000 to 3F7F hex usually, simplest is 0-99 equivalent) - language: int = 0 # National Option (0-7) + page_number: int # 00-99 (Hex storage: 0x00-0xFF) + sub_code: int = 0 # 13-bit subcode (0000 to 3F7F hex) + + # Control bits C4-C14 + control_bits: int = 0 + + language: int = 0 # National Option (0-7, from C12-C14) packets: List[Packet] = field(default_factory=list) @property @@ -75,6 +79,20 @@ class Page: # Format as Hex to support A-F pages return f"{self.magazine}{self.page_number:02X}" + def get_control_bit(self, n: int) -> bool: + """ Returns value of control bit Cn (4-14) """ + if 4 <= n <= 14: + return bool((self.control_bits >> (n - 4)) & 1) + return False + + def set_control_bit(self, n: int, value: bool): + """ Sets value of control bit Cn (4-14) """ + if 4 <= n <= 14: + if value: + self.control_bits |= (1 << (n - 4)) + else: + self.control_bits &= ~(1 << (n - 4)) + def calculate_crc(self) -> int: """ Calculates the CRC-16 checksum for the page. diff --git a/src/teletext/renderer.py b/src/teletext/renderer.py index a6219d3..4668363 100755 --- a/src/teletext/renderer.py +++ b/src/teletext/renderer.py @@ -214,10 +214,18 @@ class TeletextCanvas(QWidget): painter.end() return - # Draw each packet - # Initialize a grid of empty chars + # Check Control Bits for "Inhibit Display" (C10) + # In our bitmask (from parse_header): + # C4:0, C5:1, C6:2, C7:3, C8:4, C9:5, C10:6, C11:7, C12:8, C13:9, C14:10 + inhibit_display = bool((self.page.control_bits >> 6) & 1) + if inhibit_display: + painter.setPen(Qt.GlobalColor.gray) + painter.drawText(10, 20, f"Page {self.page.full_page_number} - INHIBIT DISPLAY (C10 set)") + painter.end() + return + + # Organize each packet by row grid = [None] * 26 # 0-25 - for p in self.page.packets: if 0 <= p.row <= 25: grid[p.row] = p @@ -243,6 +251,10 @@ class TeletextCanvas(QWidget): # Output mask for the next row next_occlusion_mask = [False] * 40 + # Check for Suppress Header (C7) + # C7:3, so bit 3 of control_bits + suppress_header = bool((self.page.control_bits >> 3) & 1) + # Default State at start of row fg = COLORS[7] # White bg = COLORS[0] # Black @@ -272,29 +284,18 @@ class TeletextCanvas(QWidget): for c in range(40): x = c * self.cell_w - - # If this cell is occluded by the row above, skip drawing and attribute processing? - # Spec says "The characters in the row below are ignored." - # Ideally we shouldn't even process attributes, but for simple renderer we just skip draw. - # However, if we skip attribute processing, state (fg/bg) won't update. - # Teletext attributes are serial. - # BUT, if the row above covers it, the viewer sees the row above. - # Does the hidden content affect the *rest* of the row? - # Likely yes, attributes usually propagate. - # But the spec says "ignored". Let's assume we skip *everything* for this cell visually, - # but maybe we should technically maintain state? - # For "Double Height" visual correctness, skipping drawing is the key. - # We will Process attributes (to keep state consistent) but Skip Drawing if occluded. - - # Wait, if we process attributes, we might set double_height=True for the NEXT row? - # If this cell is occluded, it shouldn't trigger DH for the next row. - is_occluded = occlusion_mask[c] # Decide byte value - if row == 0 and c < 8: - # Use generated header prefix - byte_val = ord(header_prefix[c]) + if row == 0: + if c < 8: + # Column 0-7: Header prefix + byte_val = ord(header_prefix[c]) + elif suppress_header and c < 32: + # Column 8-31: Hide header if C7 set + byte_val = 0x20 + else: + byte_val = data[c] if c < len(data) else 0x20 else: byte_val = data[c] if c < len(data) else 0x20 diff --git a/src/teletext/ui.py b/src/teletext/ui.py index f1294c8..c0c3bef 100644 --- a/src/teletext/ui.py +++ b/src/teletext/ui.py @@ -524,8 +524,9 @@ class MainWindow(QMainWindow): self.language_overrides[key] = idx # Patch Row 0 packet data to persist language selection to file - # Language bits are in Byte 8 (Control Bits 2): C12, C13, C14 - # We need to preserve C11 (bit 3 of encoded 4-bit val) which is "Inhibit Display" usually 0 + # Language bits are in Byte 7 (Control Bits C11-C14) + # Byte 7 encoded structure: bit 0:C11, bit 1:C12, bit 2:C13, bit 3:C14 + # National Option index corresponds to (C14 C13 C12) # Find Row 0 packet header_packet = None @@ -534,36 +535,23 @@ class MainWindow(QMainWindow): header_packet = p break - if header_packet and len(header_packet.data) > 8: + if header_packet and len(header_packet.data) >= 8: try: - old_val = decode_hamming_8_4(header_packet.data[8]) - # Encoded nibble structure: D1(b0), D2(b1), D3(b2), D4(b3) - # D1 maps to C12 - # D2 maps to C13 - # D3 maps to C14 - # D4 maps to C11 + # Byte 7 contains C11, C12, C13, C14 + old_val = decode_hamming_8_4(header_packet.data[7]) - # io.py logic for reading: - # language = ((c_bits_2 & 1) << 1) | ((c_bits_2 & 2) >> 1) | (c_bits_2 & 4) - # i.e. Lang Bit 0 comes from D2, Lang Bit 1 comes from D1, Lang Bit 2 comes from D3 + l0 = (idx >> 0) & 1 # C12 + l1 = (idx >> 1) & 1 # C13 + l2 = (idx >> 2) & 1 # C14 - # So for writing: - # D1 = Lang Bit 1 - # D2 = Lang Bit 0 - # D3 = Lang Bit 2 - - l0 = (idx >> 0) & 1 - l1 = (idx >> 1) & 1 - l2 = (idx >> 2) & 1 - - d1 = l1 + d1 = (old_val >> 0) & 1 # Preserve C11 d2 = l0 - d3 = l2 - d4 = (old_val >> 3) & 1 # Preserve C11 + d3 = l1 + d4 = l2 new_val = d1 | (d2 << 1) | (d3 << 2) | (d4 << 3) - header_packet.data[8] = encode_hamming_8_4(new_val) + header_packet.data[7] = encode_hamming_8_4(new_val) self.set_modified(True) self.status_label.setText(f"Language set to {self.language_names[idx]} (saved to header).") except Exception as e: @@ -884,9 +872,21 @@ class MainWindow(QMainWindow): self.subpage_combo.clear() for i, p in enumerate(pages): - # Display format: Index or Subcode? - # Subcode is often 0000. Index 1/N is clearer for editing. - label = f"{i+1}/{len(pages)} (Sub {p.sub_code:04X})" + # Try to find the clock in Row 0 (last 8 characters) + clock_str = "" + for pkt in p.packets: + if pkt.row == 0: + # Bytes 32-39 of the 40-byte data are the clock + raw_clock = pkt.data[32:40].decode('latin-1', errors='replace') + # Strip parity from each char and filter non-printables + clock_str = "".join([chr(ord(c) & 0x7F) if 32 <= (ord(c) & 0x7F) <= 126 else " " for c in raw_clock]) + break + + label = f"{i+1}/{len(pages)} " + if clock_str.strip(): + label += f"[{clock_str.strip()}] " + label += f"(Sub {p.sub_code:04X})" + self.subpage_combo.addItem(label, p) self.subpage_combo.blockSignals(False)