diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b6684f3..cbc3a41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,10 @@ jobs: with: otp-version: ${{matrix.otp}} elixir-version: ${{matrix.elixir}} + - name: Install 7zip + run: | + sudo apt-get update + sudo apt-get install -y 7zip - run: mix deps.get - run: mix format --check-formatted if: ${{ matrix.check_format }} diff --git a/README.md b/README.md index 1d3168f..cc8b558 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ end) ### zip * compression (deflate, stored) -* encryption (traditional) +* encryption (traditional, [WinZip AES](https://www.winzip.com/en/support/aes-encryption/)) * zip64 ### unzip diff --git a/lib/zstream.ex b/lib/zstream.ex index b2bae51..bad7025 100644 --- a/lib/zstream.ex +++ b/lib/zstream.ex @@ -41,6 +41,12 @@ defmodule Zstream do options. Example `{Zstream.EncryptionCoder.Traditional, password: "secret"}` + - `Zstream.EncryptionCoder.AES` - use AES encryption (128, 192, or 256-bit). + `:password` key should be present in the options. Supports both AE-1 and AE-2 + formats. Additional options: `:key_size` (128, 192, or 256, defaults to 256), + `:ae_version` (1 or 2, defaults to 1). Example `{Zstream.EncryptionCoder.AES, + password: "secret", key_size: 256, ae_version: 1}` + - `Zstream.EncryptionCoder.None` - no encryption - Defaults to `Zstream.EncryptionCoder.None` diff --git a/lib/zstream/encryption_coder.ex b/lib/zstream/encryption_coder.ex index 0d490d3..cee1739 100644 --- a/lib/zstream/encryption_coder.ex +++ b/lib/zstream/encryption_coder.ex @@ -8,4 +8,19 @@ defmodule Zstream.EncryptionCoder do @callback close(state :: term) :: iodata @callback general_purpose_flag() :: integer + + @optional_callbacks [ + compression_method: 0, + extra_field_data: 1, + version_needed_to_extract: 0, + crc_exposed?: 1 + ] + + @callback compression_method() :: integer + + @callback extra_field_data(options :: Keyword.t()) :: binary + + @callback version_needed_to_extract() :: integer + + @callback crc_exposed?(options :: Keyword.t()) :: boolean end diff --git a/lib/zstream/encryption_coder/aes.ex b/lib/zstream/encryption_coder/aes.ex new file mode 100644 index 0000000..790dea3 --- /dev/null +++ b/lib/zstream/encryption_coder/aes.ex @@ -0,0 +1,197 @@ +defmodule Zstream.EncryptionCoder.AES do + @moduledoc """ + Implements AES encryption (128, 192, 256) as described in https://www.winzip.com/en/support/aes-encryption + + Supports both AE-1 and AE-2 formats: + - AE-1: Exposes the CRC-32 in the zip file, WinZip itself encrypts most files using the AE-1 format + - AE-2: Does not expose the CRC-32 in the zip file + + ## Options + + * `:key_size` - The AES key size in bits. Valid values are 128, 192, or 256. Defaults to 256. + * `:ae_version` - The AES encryption format version. Valid values are 1 or 2. Defaults to 1. + """ + @behaviour Zstream.EncryptionCoder + + @aes_block_size 16 + + # https://www.winzip.com/en/support/aes-encryption/#key-generation + @pbkdf2_iterations 1000 + # https://www.winzip.com/en/support/aes-encryption/#salt + @pbkdf2_salt_lengths %{128 => 8, 192 => 12, 256 => 16} + # https://www.winzip.com/en/support/aes-encryption/#pwd-verify + @password_verify_length 2 + + # AES key sizes in bytes + @aes_key_sizes %{128 => 16, 192 => 24, 256 => 32} + # AES mode indicators for extra field + @aes_mode_indicators %{128 => 0x01, 192 => 0x02, 256 => 0x03} + # AES algorithm names for :crypto + @aes_algorithms %{128 => :aes_128_ecb, 192 => :aes_192_ecb, 256 => :aes_256_ecb} + + defmodule State do + defstruct mac_state: nil, + crypto_state: nil, + encrypted_file_header: <<>>, + counter: 1, + buffer: <<>>, + key_size: 256 + end + + @impl true + def init(opts) do + password = Keyword.fetch!(opts, :password) + key_size = Keyword.get(opts, :key_size, 256) + ae_version = Keyword.get(opts, :ae_version, 1) + + if key_size not in [128, 192, 256] do + raise ArgumentError, "Invalid key_size: #{key_size}. Must be 128, 192, or 256." + end + + if ae_version not in [1, 2] do + raise ArgumentError, "Invalid ae_version: #{ae_version}. Must be 1 or 2." + end + + aes_key_length = @aes_key_sizes[key_size] + salt = :crypto.strong_rand_bytes(@pbkdf2_salt_lengths[key_size]) + + << + encryption_key::binary-size(aes_key_length), + hmac_key::binary-size(aes_key_length), + password_verify::binary-size(@password_verify_length) + >> = + :crypto.pbkdf2_hmac( + :sha, + password, + salt, + @pbkdf2_iterations, + aes_key_length + aes_key_length + @password_verify_length + ) + + %State{ + encrypted_file_header: salt <> password_verify, + crypto_state: :crypto.crypto_init(@aes_algorithms[key_size], encryption_key, true), + mac_state: :crypto.mac_init(:hmac, :sha, hmac_key), + counter: 1, + key_size: key_size + } + end + + @impl true + def encode(chunk, state) do + if state.encrypted_file_header != <<>> do + {encrypted, updated_state} = encrypt_chunk(chunk, %{state | encrypted_file_header: <<>>}) + {[state.encrypted_file_header, encrypted], updated_state} + else + encrypt_chunk(chunk, state) + end + end + + defp encrypt_chunk( + chunk, + %State{buffer: buffer, counter: counter, crypto_state: crypto_state} = state + ) do + input = IO.iodata_to_binary([buffer, chunk]) + input_size = byte_size(input) + + if input_size < @aes_block_size do + {<<>>, %{state | buffer: input}} + else + block_count = div(input_size, @aes_block_size) + blocks = Enum.map(counter..(counter + block_count - 1), &<<&1::unsigned-little-128>>) + plaintext_size = block_count * @aes_block_size + <> = input + cipher = :crypto.exor(plaintext, :crypto.crypto_update(crypto_state, blocks)) + + { + cipher, + %{ + state + | mac_state: :crypto.mac_update(state.mac_state, cipher), + counter: counter + block_count, + buffer: new_buffer + } + } + end + end + + @impl true + def close(%State{buffer: buffer, counter: counter, crypto_state: crypto_state} = state) do + buffer_size = byte_size(buffer) + + {final_encrypted, final_state} = + if buffer_size > 0 do + last_block = <> + + cipher = + :crypto.exor( + buffer, + binary_part(:crypto.crypto_update(crypto_state, last_block), 0, buffer_size) + ) + + final_state = %{ + state + | mac_state: :crypto.mac_update(state.mac_state, cipher), + counter: counter + 1, + buffer: <<>> + } + + {cipher, final_state} + else + {<<>>, state} + end + + # https://www.winzip.com/win/en/aes_info.html#auth-faq + auth_code = binary_part(:crypto.mac_final(final_state.mac_state), 0, 10) + <<>> = :crypto.crypto_final(final_state.crypto_state) + + [final_encrypted, auth_code] + end + + # https://www.winzip.com/en/support/aes-encryption/#comp-method + @impl true + def general_purpose_flag do + # which means encrypted (0x0001) + 0x0001 + end + + # https://www.winzip.com/en/support/aes-encryption/#comp-method + # a compression method of 99 is used to indicate the presence of an AES-encrypted file + @impl true + def compression_method, do: 99 + + # https://www.winzip.com/en/support/aes-encryption/#extra-data + @impl true + def extra_field_data(options) do + {coder, _options} = Keyword.fetch!(options, :coder) + {_encryption_coder, encryption_options} = Keyword.fetch!(options, :encryption_coder) + key_size = Keyword.get(encryption_options, :key_size, 256) + ae_version = Keyword.get(encryption_options, :ae_version, 1) + + << + # Extra field header ID + 0x9901::little-size(16), + # Data size + 7::little-size(16), + # Integer version number specific to the zip vendor, 0x0001 ae-1, 0x0002 ae-2 + ae_version::little-size(16), + # 2-character vendor ID + "AE"::binary, + # Integer mode value indicating AES encryption strength + @aes_mode_indicators[key_size]::little-size(8), + # Actual compression method used (8=deflate, 0=stored) + coder.compression_method()::little-size(16) + >> + end + + # https://github.com/zlib-ng/minizip-ng/blob/636cba8643/doc/zip/appnote.iz.txt#L386 + @impl true + def version_needed_to_extract, do: 51 + + @impl true + def crc_exposed?(options) do + {_encryption_coder, encryption_options} = Keyword.fetch!(options, :encryption_coder) + ae_version = Keyword.get(encryption_options, :ae_version, 1) + ae_version == 1 + end +end diff --git a/lib/zstream/protocol.ex b/lib/zstream/protocol.ex index f5e8c8f..44b55ec 100644 --- a/lib/zstream/protocol.ex +++ b/lib/zstream/protocol.ex @@ -9,6 +9,8 @@ defmodule Zstream.Protocol do @comment "Created by Zstream" + defp get_encryption_coder(options), do: get_in(options, [:encryption_coder, Access.elem(0)]) + def local_file_header(name, local_file_header_offset, options) do extra_field = zip64?( @@ -17,12 +19,28 @@ defmodule Zstream.Protocol do Extra.zip64_extended_info(0, 0, local_file_header_offset) ) + encryption_coder = get_encryption_coder(options) + + final_extra_field = + if encryption_coder && function_exported?(encryption_coder, :extra_field_data, 1) do + [extra_field, encryption_coder.extra_field_data(options)] + else + extra_field + end + + version_needed_to_extract = + if encryption_coder && function_exported?(encryption_coder, :version_needed_to_extract, 0) do + encryption_coder.version_needed_to_extract() + else + zip64?(options, 20, 45) + end + [ << # local file header signature 0x04034B50::little-size(32), # version needed to extract - zip64?(options, 20, 45)::little-size(16), + version_needed_to_extract::little-size(16), general_purpose_bit_flag(options)::little-size(16), # compression method compression_method(options)::little-size(16), @@ -39,21 +57,32 @@ defmodule Zstream.Protocol do # file name length byte_size(name)::little-size(16), # extra field length - IO.iodata_length(extra_field)::little-size(16) + IO.iodata_length(final_extra_field)::little-size(16) >>, name, - extra_field + final_extra_field ] end def data_descriptor(crc32, compressed_size, uncompressed_size, options) do + encryption_coder = get_encryption_coder(options) + + crc = + if encryption_coder && + function_exported?(encryption_coder, :crc_exposed?, 1) && + !encryption_coder.crc_exposed?(options) do + 0 + else + crc32 + end + if Keyword.fetch!(options, :zip64) do # signature - <<0x08074B50::little-size(32), crc32::little-size(32), compressed_size::little-size(64), + <<0x08074B50::little-size(32), crc::little-size(32), compressed_size::little-size(64), uncompressed_size::little-size(64)>> else # signature - <<0x08074B50::little-size(32), crc32::little-size(32), compressed_size::little-size(32), + <<0x08074B50::little-size(32), crc::little-size(32), compressed_size::little-size(32), uncompressed_size::little-size(32)>> end end @@ -68,6 +97,29 @@ defmodule Zstream.Protocol do Extra.zip64_extended_info(entry.size, entry.c_size, entry.local_file_header_offset) ) + encryption_coder = get_encryption_coder(options) + + final_extra_field = + if encryption_coder && function_exported?(encryption_coder, :extra_field_data, 1) do + [extra_field, encryption_coder.extra_field_data(options)] + else + extra_field + end + + version_needed_to_extract = + if encryption_coder && function_exported?(encryption_coder, :version_needed_to_extract, 0) do + encryption_coder.version_needed_to_extract() + else + zip64?(options, 20, 45) + end + + crc_exposed? = + if encryption_coder && function_exported?(encryption_coder, :crc_exposed?, 1) do + encryption_coder.crc_exposed?(options) + else + true + end + [ << # central file header signature @@ -75,7 +127,7 @@ defmodule Zstream.Protocol do # version made by 52::little-size(16), # version needed to extract - zip64?(options, 20, 45)::little-size(16), + version_needed_to_extract::little-size(16), general_purpose_bit_flag(entry.options)::little-size(16), # compression method compression_method(entry.options)::little-size(16), @@ -84,7 +136,7 @@ defmodule Zstream.Protocol do # last mod file date dos_date(Keyword.fetch!(entry.options, :mtime))::little-size(16), # crc-32 - entry.crc::little-size(32), + if(crc_exposed?, do: entry.crc, else: 0)::little-size(32), # compressed size zip64?(options, entry.c_size, 0xFFFFFFFF)::little-size(32), # uncompressed size @@ -92,7 +144,7 @@ defmodule Zstream.Protocol do # file name length byte_size(entry.name)::little-size(16), # extra field length - IO.iodata_length(extra_field)::little-size(16), + IO.iodata_length(final_extra_field)::little-size(16), # file comment length 0::little-size(16), # disk number start @@ -104,7 +156,7 @@ defmodule Zstream.Protocol do >>, # file name entry.name, - extra_field + final_extra_field ] end @@ -193,7 +245,14 @@ defmodule Zstream.Protocol do defp compression_method(options) do {coder, _opts} = Keyword.fetch!(options, :coder) - coder.compression_method() + + encryption_coder = get_encryption_coder(options) + + if encryption_coder && function_exported?(encryption_coder, :compression_method, 0) do + encryption_coder.compression_method() + else + coder.compression_method() + end end defp dos_time(t) do diff --git a/lib/zstream/zip.ex b/lib/zstream/zip.ex index f221a4e..2040046 100644 --- a/lib/zstream/zip.ex +++ b/lib/zstream/zip.ex @@ -165,11 +165,11 @@ defmodule Zstream.Zip do defp close_entry(state) do if state.coder do - {encrypted, _encryption_coder_state} = + {encrypted, encryption_coder_state} = state.coder.close(state.coder_state) |> state.encryption_coder.encode(state.encryption_coder_state) - encrypted = [encrypted, state.encryption_coder.close(state.encryption_coder_state)] + encrypted = [encrypted, state.encryption_coder.close(encryption_coder_state)] c_size = IO.iodata_length(encrypted) state = put_in(state.coder, nil) state = put_in(state.coder_state, nil) diff --git a/test/zstream_test.exs b/test/zstream_test.exs index c6ff934..d893810 100644 --- a/test/zstream_test.exs +++ b/test/zstream_test.exs @@ -190,6 +190,86 @@ defmodule ZstreamTest do assert_memory() end + for ae_version <- [1, 2], key_size <- [256, 192, 128] do + test "aes key_size: #{key_size}, ae_version: #{ae_version} encryption" do + password = Base.encode64(:crypto.strong_rand_bytes(12)) + + verify_aes_password( + [ + Zstream.entry("kafan", file("kafan.txt"), + encryption_coder: + {Zstream.EncryptionCoder.AES, + password: password, key_size: unquote(key_size), ae_version: unquote(ae_version)} + ), + Zstream.entry("kafka_uncompressed", file("kafan.txt"), + coder: Zstream.Coder.Stored, + encryption_coder: + {Zstream.EncryptionCoder.AES, + password: password, key_size: unquote(key_size), ae_version: unquote(ae_version)} + ), + Zstream.entry("कफ़न", file("kafan.txt"), + encryption_coder: + {Zstream.EncryptionCoder.AES, + password: password, key_size: unquote(key_size), ae_version: unquote(ae_version)} + ) + ], + password + ) + + # Test AES with empty files + verify_aes_password( + [ + Zstream.entry("empty_file", [], + encryption_coder: + {Zstream.EncryptionCoder.AES, + password: password, key_size: unquote(key_size), ae_version: unquote(ae_version)} + ), + Zstream.entry("empty_file_1", [], + coder: Zstream.Coder.Stored, + encryption_coder: + {Zstream.EncryptionCoder.AES, + password: password, key_size: unquote(key_size), ae_version: unquote(ae_version)} + ) + ], + password + ) + + # Test AES with larger files + verify_aes_password( + [ + Zstream.entry("moby.txt", file("moby_dick.txt"), + coder: Zstream.Coder.Stored, + encryption_coder: + {Zstream.EncryptionCoder.AES, + password: password, key_size: unquote(key_size), ae_version: unquote(ae_version)} + ), + Zstream.entry("deep/moby.txt", file("moby_dick.txt"), + encryption_coder: + {Zstream.EncryptionCoder.AES, + password: password, key_size: unquote(key_size), ae_version: unquote(ae_version)} + ) + ], + password + ) + end + end + + test "aes stream encryption" do + big_file = Stream.repeatedly(&random_bytes/0) |> Stream.take(50) + + assert_memory() + + # Test AES encryption with streaming + Zstream.zip([ + Zstream.entry("big_file_aes", big_file, + encryption_coder: {Zstream.EncryptionCoder.AES, password: "test123", key_size: 256} + ) + ]) + |> Stream.run() + + assert_memory() + end + defmodule MockCoder do @behaviour Zstream.Coder def init(_opts), do: nil @@ -349,4 +429,54 @@ defmodule ZstreamTest do Logger.debug("Total memory: #{total}") assert total < 150 end + + defp verify_aes_password(entries, password) do + verify_aes_password_with_options(entries, password) + verify_aes_password_with_options(entries, password, zip64: true) + end + + defp verify_aes_password_with_options(entries, password, options \\ []) do + Temp.track!() + path = Temp.path!(%{suffix: ".zip"}) + + Zstream.zip(entries, options) + |> Stream.into(File.stream!(path)) + |> Stream.run() + + # Use 7zz to verify the AES encrypted archive + {response, exit_code} = System.cmd("7zz", ["l", path]) + Logger.debug("7zz list output: #{response}") + assert exit_code == 0 + + # Test the archive integrity and password + {response, exit_code} = System.cmd("7zz", ["t", "-p#{password}", path]) + Logger.debug("7zz test output: #{response}") + assert exit_code == 0 + + # Extract and verify file contents + temp_dir = Temp.mkdir!() + + {response, exit_code} = + System.cmd("7zz", ["x", "-p#{password}", "-o#{temp_dir}", path, "-y"]) + + Logger.debug("7zz extract output: #{response}") + assert exit_code == 0 + + # Verify extracted file contents match original + entries = Enum.reject(entries, fn e -> String.ends_with?(e.name, "/") end) + + Enum.each(entries, fn entry -> + extracted_path = Path.join(temp_dir, entry.name) + + assert File.exists?(extracted_path) + extracted_content = File.read!(extracted_path) + original_content = as_binary(entry.stream) + + assert extracted_content == original_content, + "Content mismatch for #{entry.name}" + end) + + File.rm_rf!(temp_dir) + File.rm!(path) + end end