#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Extracts all images that are embedded in a .sla file into individual image files.
© 2019 MIT license, Ale Rimoldi <a.l.e@graphicslab.org>
"""
import os
import sys
import argparse
import re
import zlib
from pathlib import Path
import base64

parser = argparse.ArgumentParser(description='Extracts all images that are embedded in a .sla file into individual image files.')
parser.add_argument('in_file',
                   help='.sla file to be processed.')
parser.add_argument('-o', dest='out_file', action='store',
                   help='Destination .sla file.')
parser.add_argument('-i', dest='out_images', action='store',
                   help='Destination directory for the images.')

args = parser.parse_args()

SLA_INPUT_FILE = args.in_file
PARENT_PATH = Path(SLA_INPUT_FILE).parent
SLA_OUTPUT_FILE = args.out_file if args.out_file is not None else Path(PARENT_PATH) / (Path(SLA_INPUT_FILE).stem + '-out.sla')
IMAGES_RELATIVE_PATH = 'images'
IMAGES_ABSOLUTE_PATH = args.out_images if args.out_images is not None else Path(PARENT_PATH) / IMAGES_RELATIVE_PATH

if not os.path.exists(IMAGES_ABSOLUTE_PATH):
    print('cannot store images', IMAGES_ABSOLUTE_PATH, 'does not exist')
    sys.exit()
    # os.makedirs(images_paths)

p_image_ext = re.compile(r'inlineImageExt="(.+?)"')
p_item_id = re.compile(r'ItemID="(.+?)"')
p_page = re.compile(r'OwnPage="(.+?)"')

with open(SLA_INPUT_FILE) as sla_input:
    with open(SLA_OUTPUT_FILE, 'w') as sla_output:
        for line in sla_input:
            pos_start = line.find('ImageData')
            if pos_start > 0:
                image_type = p_image_ext.search(line)[1]
                # print(match_image_ext[1])
                item_id = p_item_id.search(line)[1]
                page_number = max(int(p_page.search(line)[1]), 0)

                image_file = f'{page_number:03d}-{item_id}.{image_type}'
                print(image_file)
                 
                pos_start += 11
                pos_end = line.find('"', pos_start)
                with open(Path(IMAGES_ABSOLUTE_PATH) / image_file, 'wb') as image_output:
                    # images are qcompress(ed) and stored as base64.
                    # to decompress a qcompress bytes sequence one need to remove the first 4 bytes (size information)
                    # see https://doc.qt.io/qt-5/qbytearray.html#qUncompress
                    # see Scribus150Format::pasteItem
                    decoded = base64.standard_b64decode(line[pos_start:pos_end])
                    image_output.write(zlib.decompress(decoded[4:]))
                line = line[0:pos_start - 11] + line[pos_end + 1:]
                # print(line)
                # replace: PFILE="relative url"
                # remove: isInlineImage="1" inlineImageExt="*", ImageData="*"
                line = line.replace('isInlineImage="1"', '')
                line = line.replace(f'inlineImageExt="{image_type}"', '')
                line = line.replace('PFILE=""', f'PFILE="{Path(IMAGES_RELATIVE_PATH) / image_file}"')
                # re.sub(r'inlineImageExt=".\{-}"', '', line)
                # print(line)
                # sys.exit()
            sla_output.write(line)



# A linked image:
#         <PAGEOBJECT XPOS="280.5" YPOS="260.945881889764" OwnPage="0" ItemID="28811912" PTYPE="2" WIDTH="159" HEIGHT="120.804118110236" FRTYPE="0" CLIPEDIT="0" PWIDTH="1" PLINEART="1" LOCALSCX="0.125837623031496" LOCALSCY="0.125837623031496" LOCALX="0" LOCALY="0" LOCALROT="0" PICART="1" SCALETYPE="0" RATIO="1" Pagenumber="0" PFILE="bleiben.jpg" PRFILE="Embedded c2" EPROF="Embedded c2" IRENDER="0" path="M0 0 L159 0 L159 120.804 L0 120.804 L0 0 Z" copath="M0 0 L159 0 L159 120.804 L0 120.804 L0 0 Z" gXpos="280.5" gYpos="260.945881889764" gWidth="0" gHeight="0" LAYER="0" NEXTITEM="-1" BACKITEM="-1"/>
#
# An embedded image (with ImageData emptied)
#         <PAGEOBJECT XPOS="406.0008" YPOS="59202.2288" OwnPage="141" ItemID="1365294098" PTYPE="2" WIDTH="234" HEIGHT="231.72" FRTYPE="0" CLIPEDIT="0" PWIDTH="1" PLINEART="1" LOCALSCX="0.12" LOCALSCY="0.12" LOCALX="0" LOCALY="0" LOCALROT="0" PICART="1" SCALETYPE="0" RATIO="1" Pagenumber="0" PFILE="" isInlineImage="1" inlineImageExt="tiff" ImageData="" PRFILE="sRGB IEC61966-2.1" IRENDER="0" EMBEDDED="0" path="M0 0 L234 0 L234 231.72 L0 231.72 L0 0 Z" copath="M0 0 L234 0 L234 231.72 L0 231.72 L0 0 Z" gXpos="406.0008" gYpos="59202.2288" gWidth="0" gHeight="0" LAYER="0" NEXTITEM="-1" BACKITEM="-1"/>