#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Extracts all images that are embedded in a .sla file into individual image files. © 2019 MIT license, Ale Rimoldi """ import os import sys import argparse import re import zlib from pathlib import Path import base64 parser = argparse.ArgumentParser(description='Extracts all images that are embedded in a .sla file into individual image files.') parser.add_argument('in_file', help='.sla file to be processed.') parser.add_argument('-o', dest='out_file', action='store', help='Destination .sla file.') parser.add_argument('-i', dest='out_images', action='store', help='Destination directory for the images.') args = parser.parse_args() SLA_INPUT_FILE = args.in_file PARENT_PATH = Path(SLA_INPUT_FILE).parent SLA_OUTPUT_FILE = args.out_file if args.out_file is not None else Path(PARENT_PATH) / (Path(SLA_INPUT_FILE).stem + '-out.sla') IMAGES_RELATIVE_PATH = 'images' IMAGES_ABSOLUTE_PATH = args.out_images if args.out_images is not None else Path(PARENT_PATH) / IMAGES_RELATIVE_PATH if not os.path.exists(IMAGES_ABSOLUTE_PATH): print('cannot store images', IMAGES_ABSOLUTE_PATH, 'does not exist') sys.exit() # os.makedirs(images_paths) p_image_ext = re.compile(r'inlineImageExt="(.+?)"') p_item_id = re.compile(r'ItemID="(.+?)"') p_page = re.compile(r'OwnPage="(.+?)"') with open(SLA_INPUT_FILE) as sla_input: with open(SLA_OUTPUT_FILE, 'w') as sla_output: for line in sla_input: pos_start = line.find('ImageData') if pos_start > 0: image_type = p_image_ext.search(line)[1] # print(match_image_ext[1]) item_id = p_item_id.search(line)[1] page_number = max(int(p_page.search(line)[1]), 0) image_file = f'{page_number:03d}-{item_id}.{image_type}' print(image_file) pos_start += 11 pos_end = line.find('"', pos_start) with open(Path(IMAGES_ABSOLUTE_PATH) / image_file, 'wb') as image_output: # images are qcompress(ed) and stored as base64. # to decompress a qcompress bytes sequence one need to remove the first 4 bytes (size information) # see https://doc.qt.io/qt-5/qbytearray.html#qUncompress # see Scribus150Format::pasteItem decoded = base64.standard_b64decode(line[pos_start:pos_end]) image_output.write(zlib.decompress(decoded[4:])) line = line[0:pos_start - 11] + line[pos_end + 1:] # print(line) # replace: PFILE="relative url" # remove: isInlineImage="1" inlineImageExt="*", ImageData="*" line = line.replace('isInlineImage="1"', '') line = line.replace(f'inlineImageExt="{image_type}"', '') line = line.replace('PFILE=""', f'PFILE="{Path(IMAGES_RELATIVE_PATH) / image_file}"') # re.sub(r'inlineImageExt=".\{-}"', '', line) # print(line) # sys.exit() sla_output.write(line) # A linked image: # # # An embedded image (with ImageData emptied) #