From fe4253c8df5f5a973098e7b5feb94d1532370961 Mon Sep 17 00:00:00 2001 From: Daniel Dybing Date: Sat, 21 Feb 2026 12:17:14 +0100 Subject: [PATCH] fix: prevent data leakage between pages by splitting on row sequence reversal --- src/teletext/io.py | 55 ++++++++++++++++++++++++++-------------------- src/teletext/ui.py | 5 ++++- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/teletext/io.py b/src/teletext/io.py index 1aa3b09..abebcfb 100644 --- a/src/teletext/io.py +++ b/src/teletext/io.py @@ -33,7 +33,11 @@ def load_t42(file_path: str, progress_callback: Optional[Callable[[int, int], No # Each packet is 42 bytes total_packets = (total_bytes - best_offset) // 42 processed_packets = 0 - + + # Track current active page for each magazine to handle interleaved packets + current_pages_by_mag = {} # mag -> Page object + last_row_by_mag = {} # mag -> int + with open(file_path, 'rb') as f: f.seek(best_offset) while True: @@ -50,42 +54,45 @@ def load_t42(file_path: str, progress_callback: Optional[Callable[[int, int], No packet = Packet(chunk) service.all_packets.append(packet) + mag = packet.magazine + row = packet.row + # Logic to group into pages. - if packet.row == 0: + if row == 0: # Start of a new page header. - # Byte 2-9 of header contain Page Number, Subcode, Control bits etc. - # We need to parse the header to identify the page. - - # Header format (after Mag/Row): - # Bytes: P1 P2 S1 S2 S3 S4 C1 C2 ... - # All Hamming 8/4 encoded. - - # For now, let's just create a new page entry for every Header we see, - # or find the existing one if we want to support updates (but T42 usually is a stream capture). - # If it's an editor file, it's likely sequential. - p_num, sub_code, language = parse_header(packet.data) # Create new page - new_page = Page(magazine=packet.magazine, page_number=p_num, sub_code=sub_code, language=language) + new_page = Page(magazine=mag, page_number=p_num, sub_code=sub_code, language=language) new_page.packets.append(packet) service.pages.append(new_page) + + # Update tracking + current_pages_by_mag[mag] = new_page + last_row_by_mag[mag] = 0 else: # Add to the "current" page of this magazine. - # We need to track the current active page for each magazine. - # A simplistic approach: add to the last page added that matches the magazine ?? - # Robust approach: Maintain a dict of current_pages_by_magazine. + target_page = current_pages_by_mag.get(mag) + prev_row = last_row_by_mag.get(mag, -1) - # Let's find the last page in service that matches the packet's magazine - # This is O(N) but N (pages) is small. - target_page = None - for p in reversed(service.pages): - if p.magazine == packet.magazine: - target_page = p - break + # Robustness check for VHS captures: + # If we see a row number that has already passed (e.g. Row 1 after Row 25) + # AND we didn't see a Row 0, it means a new page started but we missed the header. + # We should split into a new Page object to avoid data corruption. + if target_page and row <= prev_row and row != prev_row: # Strictly less than (or handle duplicate rows?) + # In some captures, we might see the same row twice (Field 1/2). + # If it's the SAME row number, we just append (overwrites in renderer). + # If it's a LOWER row number, it's definitely a new cycle. + + # Create a "Lost Header" page + # We use page_number=0xFF to indicate unknown, but we keep mag. + target_page = Page(magazine=mag, page_number=0xFF, sub_code=0, language=0) + service.pages.append(target_page) + current_pages_by_mag[mag] = target_page if target_page: target_page.packets.append(packet) + last_row_by_mag[mag] = row else: # Packet without a header? Orphaned. Just keep in all_packets pass diff --git a/src/teletext/ui.py b/src/teletext/ui.py index f1294c8..4b633eb 100644 --- a/src/teletext/ui.py +++ b/src/teletext/ui.py @@ -870,7 +870,10 @@ class MainWindow(QMainWindow): for mag, pnum in sorted_keys: # Display as Hex - label = f"{mag}{pnum:02X}" + if pnum == 0xFF: + label = f"{mag}?? (Lost Header)" + else: + label = f"{mag}{pnum:02X}" item = QListWidgetItem(label) item.setData(Qt.ItemDataRole.UserRole, (mag, pnum)) self.page_list.addItem(item)