79 lines
2.3 KiB
Python
Executable file
79 lines
2.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
# dedup.py
|
|
# Removes duplicates from Bitwarden export .csv
|
|
# 2019-02-09 5erif
|
|
|
|
import sys
|
|
import hashlib
|
|
from urllib.parse import urlparse
|
|
|
|
# Field ordinals in Bitwarden CSV
|
|
FOLDER = 0
|
|
FAVORITE = 1
|
|
TYPE = 2
|
|
NAME = 3
|
|
NOTES = 4
|
|
FIELDS = 5
|
|
URI = 6
|
|
USERNAME = 7
|
|
PASSWORD = 8
|
|
TOTP = 9
|
|
|
|
def main(argv):
|
|
|
|
if len(argv) < 1:
|
|
print('Missing input file path')
|
|
sys.exit(1)
|
|
|
|
in_file_path = argv[0]
|
|
out_file_path = in_file_path[0:(len(in_file_path)-4)]+'_out.csv'
|
|
rem_file_path = in_file_path[0:(len(in_file_path)-4)]+'_rem.csv'
|
|
completed_lines_hash = set()
|
|
line_number = -1
|
|
write_count = 0
|
|
cache = ''
|
|
|
|
out_file = open(out_file_path, 'w', encoding = 'utf8')
|
|
rem_file = open(rem_file_path, 'w', encoding = 'utf8')
|
|
for line in open(in_file_path, 'r', encoding = 'utf8'):
|
|
line_number += 1
|
|
fields = line.split(',')
|
|
if len(fields) < 10:
|
|
# Add previous line if short
|
|
line = cache.strip('\n') + line
|
|
cache = line
|
|
fields = line.split(',')
|
|
if len(fields) > 9:
|
|
print(f'Recovered with line {line_number}:\n{line}')
|
|
cache = ''
|
|
else:
|
|
print(f'Missing fields in line {line_number}:\n{line}')
|
|
rem_file.write(line)
|
|
continue
|
|
else:
|
|
cache = ''
|
|
if line_number != 0:
|
|
domain = urlparse(fields[URI]).netloc
|
|
if len(domain) > 0:
|
|
fields[URI] = domain
|
|
token = fields[URI] + fields[USERNAME] + fields[PASSWORD]
|
|
hashValue = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()
|
|
if hashValue not in completed_lines_hash:
|
|
out_file.write(line)
|
|
completed_lines_hash.add(hashValue)
|
|
write_count += 1
|
|
else:
|
|
rem_file.write(line)
|
|
# Uncomment for verbose mode
|
|
# print(f'Skipping duplicate on line {line_number}:\n{line}')
|
|
out_file.close()
|
|
rem_file.close()
|
|
|
|
dup_count = line_number - write_count
|
|
print(f'\nOutput file: {out_file_path}\n{write_count} unique entries saved')
|
|
print(f'\n{dup_count} duplicates saved to {rem_file_path}')
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv[1:])
|