1313# See the License for the specific language governing permissions and
1414# limitations under the License.
1515#
16- """This module contains Crypt4GH based encryption functionality """
16+ """Functionality to encrypt files in chunks with Crypt4GH before upload. """
1717
1818import base64
1919import os
2020from collections .abc import Generator
2121from io import BufferedReader
22- from pathlib import Path
22+ from typing import Any
2323
2424import crypt4gh .header
25- import crypt4gh .keys
2625import crypt4gh .lib
2726from nacl .bindings import crypto_aead_chacha20poly1305_ietf_encrypt
27+ from pydantic import SecretBytes
2828
2929from ghga_connector .config import get_ghga_pubkey
30- from ghga_connector .core import get_segments , read_file_parts
30+ from ghga_connector .core .crypt .checksums import Checksums
31+ from ghga_connector .core .file_operations import get_segments , read_file_parts
3132
32- from .abstract_bases import Encryptor
33- from .checksums import Checksums
3433
35-
36- class Crypt4GHEncryptor (Encryptor ):
34+ class Crypt4GHEncryptor :
3735 """Handles on the fly encryption and checksum calculation"""
3836
39- def __init__ (
40- self ,
41- part_size : int ,
42- private_key_path : Path ,
43- passphrase : str | None ,
44- checksums : Checksums = Checksums (),
45- file_secret : bytes | None = None ,
46- ):
47- self ._encrypted_file_size = 0
48- self ._checksums = checksums
37+ def __init__ (self , part_size : int , my_private_key : SecretBytes ):
4938 self ._part_size = part_size
50- self ._private_key_path = private_key_path
39+ self ._my_private_key = my_private_key
5140 self ._server_public_key = base64 .b64decode (get_ghga_pubkey ())
52- self ._passphrase = passphrase
53- if file_secret is None :
54- file_secret = os .urandom (32 )
55- self ._file_secret = file_secret
41+ self ._file_secret = os .urandom (32 )
42+ self .checksums = Checksums () # Updated as encryption takes place
43+ self ._encrypted_file_size = 0 # Updated as encryption takes place
5644
5745 def _encrypt (self , part : bytes ):
5846 """Encrypt file part using secret"""
@@ -64,7 +52,7 @@ def _encrypt(self, part: bytes):
6452
6553 return b"" .join (encrypted_segments ), incomplete_segment
6654
67- def _encrypt_segment (self , segment : bytes ):
55+ def _encrypt_segment (self , segment : bytes ) -> bytes :
6856 """Encrypt one single segment"""
6957 nonce = os .urandom (12 )
7058 encrypted_data = crypto_aead_chacha20poly1305_ietf_encrypt (
@@ -77,38 +65,31 @@ def _create_envelope(self) -> bytes:
7765 Gather file encryption/decryption secret and assemble a crypt4gh envelope using the
7866 server's private and the client's public key
7967 """
80- if self ._passphrase :
81- private_key = crypt4gh .keys .get_private_key (
82- filepath = self ._private_key_path , callback = lambda : self ._passphrase
83- )
84- else :
85- private_key = crypt4gh .keys .get_private_key (
86- filepath = self ._private_key_path , callback = None
87- )
88-
89- keys = [(0 , private_key , self ._server_public_key )]
68+ keys = [(0 , self ._my_private_key .get_secret_value (), self ._server_public_key )]
9069 header_content = crypt4gh .header .make_packet_data_enc (0 , self ._file_secret )
9170 header_packets = crypt4gh .header .encrypt (header_content , keys )
9271 header_bytes = crypt4gh .header .serialize (header_packets )
93-
9472 return header_bytes
9573
9674 def get_encrypted_size (self ) -> int :
9775 """Get file size after encryption, excluding envelope"""
9876 return self ._encrypted_file_size
9977
100- def process_file (self , file : BufferedReader ) -> Generator [bytes , None , None ]:
101- """Encrypt file parts and prepare for upload."""
78+ def process_file (
79+ self , file : BufferedReader
80+ ) -> Generator [tuple [int , bytes ], Any , None ]:
81+ """Encrypt file parts for upload, yielding a tuple of the part number and content."""
10282 unprocessed_bytes = b""
10383 upload_buffer = self ._create_envelope ()
104- update_encrypted = self ._checksums .update_encrypted
10584
10685 # get envelope size to adjust checksum buffers and encrypted content size
10786 envelope_size = len (upload_buffer )
10887
109- for file_part in read_file_parts (file = file , part_size = self ._part_size ):
88+ for part_number , file_part in enumerate (
89+ read_file_parts (file = file , part_size = self ._part_size ), start = 1
90+ ):
11091 # process unencrypted
111- self ._checksums .update_unencrypted (file_part )
92+ self .checksums .update_unencrypted (file_part )
11293 unprocessed_bytes += file_part
11394
11495 # encrypt in chunks
@@ -118,12 +99,12 @@ def process_file(self, file: BufferedReader) -> Generator[bytes, None, None]:
11899 # update checksums and yield if part size
119100 if len (upload_buffer ) >= self ._part_size :
120101 current_part = upload_buffer [: self ._part_size ]
121- if self ._checksums .encrypted_is_empty ():
122- update_encrypted (current_part [envelope_size :])
102+ if self .checksums .encrypted_is_empty ():
103+ self . checksums . update_encrypted (current_part [envelope_size :])
123104 else :
124- update_encrypted (current_part )
105+ self . checksums . update_encrypted (current_part )
125106 self ._encrypted_file_size += self ._part_size
126- yield current_part
107+ yield part_number , current_part
127108 upload_buffer = upload_buffer [self ._part_size :]
128109
129110 # process dangling bytes
@@ -132,14 +113,16 @@ def process_file(self, file: BufferedReader) -> Generator[bytes, None, None]:
132113
133114 while len (upload_buffer ) >= self ._part_size :
134115 current_part = upload_buffer [: self ._part_size ]
135- update_encrypted (current_part )
116+ self . checksums . update_encrypted (current_part )
136117 self ._encrypted_file_size += self ._part_size
137- yield current_part
118+ part_number += 1
119+ yield part_number , current_part
138120 upload_buffer = upload_buffer [self ._part_size :]
139121
140122 if upload_buffer :
141- update_encrypted (upload_buffer )
123+ self . checksums . update_encrypted (upload_buffer )
142124 self ._encrypted_file_size += len (upload_buffer )
143- yield upload_buffer
125+ part_number += 1
126+ yield part_number , upload_buffer
144127
145128 self ._encrypted_file_size -= envelope_size
0 commit comments