Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add filename sanitizing to adapters #113

Merged
merged 4 commits into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/active_encode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
require 'active_encode/version'
require 'active_encode/base'
require 'active_encode/engine'
require 'active_encode/filename_sanitizer'

module ActiveEncode
extend ActiveEncode::FilenameSanitizer
end
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,10 @@ def check_s3_bucket(input_url, source_bucket)
# logger.info("Already in bucket `#{source_bucket}'")
s3_object.key
else
s3_key = File.join(SecureRandom.uuid, s3_object.key)
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying to `#{source_bucket}/#{input_url}'")
target = Aws::S3::Object.new(bucket_name: source_bucket, key: input_url)
target = Aws::S3::Object.new(bucket_name: source_bucket, key: s3_key)
target.copy_from(s3_object, multipart_copy: s3_object.size > 15_728_640) # 15.megabytes
s3_key
end
Expand All @@ -152,7 +153,8 @@ def upload_to_s3(input_url, source_bucket)
# original_input = input_url
bucket = Aws::S3::Resource.new(client: s3client).bucket(source_bucket)
filename = FileLocator.new(input_url).location
s3_key = File.join(SecureRandom.uuid, File.basename(filename))
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying `#{original_input}' to `#{source_bucket}/#{input_url}'")
obj = bucket.object(s3_key)
obj.upload_file filename
Expand Down
13 changes: 3 additions & 10 deletions lib/active_encode/engine_adapters/ffmpeg_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def create(input_url, options = {})
case input_url
when /^file\:\/\/\//
input_url = Addressable::URI.unencode(input_url)
input_url = ActiveEncode.sanitize_input(input_url)
when /^s3\:\/\//
require 'file_locator'

Expand Down Expand Up @@ -200,7 +201,7 @@ def build_outputs(encode)
Dir["#{File.absolute_path(working_path('outputs', id))}/*"].each do |file_path|
output = ActiveEncode::Output.new
output.url = "file://#{file_path}"
sanitized_filename = sanitize_base encode.input.url
sanitized_filename = ActiveEncode.sanitize_base encode.input.url
output.label = file_path[/#{Regexp.quote(sanitized_filename)}\-(.*?)#{Regexp.quote(File.extname(file_path))}$/, 1]
output.id = "#{encode.input.id}-#{output.label}"
output.created_at = encode.created_at
Expand All @@ -219,7 +220,7 @@ def build_outputs(encode)

def ffmpeg_command(input_url, id, opts)
output_opt = opts[:outputs].collect do |output|
sanitized_filename = sanitize_base input_url
sanitized_filename = ActiveEncode.sanitize_base input_url
file_name = "outputs/#{sanitized_filename}-#{output[:label]}.#{output[:extension]}"
" #{output[:ffmpeg_opt]} #{working_path(file_name, id)}"
end.join(" ")
Expand All @@ -230,14 +231,6 @@ def ffmpeg_command(input_url, id, opts)
"#{FFMPEG_PATH} #{header_opt} -y -loglevel level+fatal -progress #{working_path('progress', id)} -i \"#{input_url}\" #{output_opt}"
end

def sanitize_base(input_url)
if input_url.is_a? URI::HTTP
File.basename(input_url.path, File.extname(input_url.path))
else
File.basename(input_url, File.extname(input_url)).gsub(/[^0-9A-Za-z.\-]/, '_')
end
end

def get_pid(id)
File.read(working_path("pid", id)).remove("\n") if File.file? working_path("pid", id)
end
Expand Down
12 changes: 9 additions & 3 deletions lib/active_encode/engine_adapters/media_convert_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,10 @@ def mediaconvert
end
end

def s3client
Aws::S3::Client.new
end

cjcolvar marked this conversation as resolved.
Show resolved Hide resolved
def s3_uri(url, options = {})
bucket = options[:masterfile_bucket]

Expand All @@ -503,9 +507,10 @@ def check_s3_bucket(input_url, source_bucket)
# logger.info("Already in bucket `#{source_bucket}'")
s3_object.key
else
s3_key = File.join(SecureRandom.uuid, s3_object.key)
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying to `#{source_bucket}/#{input_url}'")
target = Aws::S3::Object.new(bucket_name: source_bucket, key: input_url)
target = Aws::S3::Object.new(bucket_name: source_bucket, key: s3_key)
target.copy_from(s3_object, multipart_copy: s3_object.size > 15_728_640) # 15.megabytes
s3_key
end
Expand All @@ -515,7 +520,8 @@ def upload_to_s3(input_url, source_bucket)
# original_input = input_url
bucket = Aws::S3::Resource.new(client: s3client).bucket(source_bucket)
filename = FileLocator.new(input_url).location
s3_key = File.join(SecureRandom.uuid, File.basename(filename))
cleaned_url = ActiveEncode.sanitize_filename input_url
s3_key = File.join(SecureRandom.uuid, File.basename(cleaned_url))
# logger.info("Copying `#{original_input}' to `#{source_bucket}/#{input_url}'")
obj = bucket.object(s3_key)
obj.upload_file filename
Expand Down
6 changes: 1 addition & 5 deletions lib/active_encode/engine_adapters/pass_through_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def create(input_url, options = {})
# Copy derivatives to work directory
options[:outputs].each do |opt|
url = opt[:url]
output_path = working_path("outputs/#{sanitize_base opt[:url]}#{File.extname opt[:url]}", new_encode.id)
output_path = working_path("outputs/#{ActiveEncode.sanitize_base opt[:url]}#{File.extname opt[:url]}", new_encode.id)
FileUtils.cp FileLocator.new(url).location, output_path
filename_label_hash[output_path] = opt[:label]
end
Expand Down Expand Up @@ -206,10 +206,6 @@ def build_outputs(encode)
outputs
end

def sanitize_base(input_url)
File.basename(input_url, File.extname(input_url)).gsub(/[^0-9A-Za-z.\-]/, '_')
end

def working_path(path, id)
File.join(WORK_DIR, id, path)
end
Expand Down
22 changes: 22 additions & 0 deletions lib/active_encode/filename_sanitizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true
module ActiveEncode
module FilenameSanitizer
# ffmpeg has trouble with double quotes in file names. Escape them to get ffmpeg to find the file.
def sanitize_input(input_url)
input_url.gsub(/["]/, '\\\\\0')
end

def sanitize_base(input_url)
filepath = input_url.is_a?(URI::HTTP) ? input_url.path : input_url
# Replace special characters with underscores and remove excess periods.
# This removes the extension before processing so it is safe to delete all detected periods.
File.basename(filepath, File.extname(filepath)).gsub(/[^0-9A-Za-z.\-\/]/, '_').delete('.')
end

def sanitize_filename(input_url)
filepath = input_url.is_a?(URI::HTTP) ? input_url.path : input_url
# Replace special characters with underscores and remove excess periods.
File.basename(filepath).gsub(/[^0-9A-Za-z.\-\/]/, '_').gsub(/\.(?=.*\.)/, '')
end
end
end
Binary file added spec/fixtures/"file_with_double_quote".low.mp4
Binary file not shown.
Binary file added spec/fixtures/"file_with_double_quote".mp4
Binary file not shown.
Binary file added spec/fixtures/'file_with_single_quote'.low.mp4
Binary file not shown.
Binary file added spec/fixtures/'file_with_single_quote'.mp4
Binary file not shown.
Binary file added spec/fixtures/file.with :=+%sp3c!l-ch4cts().mp4
Binary file not shown.
Binary file added spec/fixtures/file.with...periods.mp4
Binary file not shown.
48 changes: 48 additions & 0 deletions spec/integration/elastic_transcoder_adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,54 @@
end
end

describe "#copy_to_input_bucket" do
context "when filename has no special characters" do
context "non-s3 file" do
let(:input_url) { "spec/fixtures/fireworks.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
expect(described_class.new.send(:copy_to_input_bucket, input_url, source_bucket)).to eq "randomstring/fireworks.mp4"
end
end
context "s3 file" do
let(:input_url) { "s3://bucket1/file.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #check_s3_bucket method" do
expect(described_class.new.send(:copy_to_input_bucket, input_url, source_bucket)).to eq "file.mp4"
end
end
end
context "when filename has special characters" do
context "non-s3 file" do
let(:input) { ["'file_with_single_quote'.mp4", '"file_with_double_quote".mp4', "file with space.mp4", "file.with...periods.mp4", "file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input.each_with_index do |url, index|
expect(described_class.new.send(:copy_to_input_bucket, "spec/fixtures/#{url}", source_bucket)).to eq "randomstring/#{clean[index]}"
end
end
end
context "s3 file" do
let(:input_urls) { ["s3://bucket1/'file_with_single_quote'.mp4", 's3://bucket1/"file_with_double_quote".mp4', "s3://bucket1/file with space.mp4", "s3://bucket1/file.with...periods.mp4", "s3://bucket1/file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket2" }

it "calls the #check_s3_bucket method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input_urls.each_with_index do |url, index|
expect(described_class.new.send(:copy_to_input_bucket, url, source_bucket)).to eq "randomstring/#{clean[index]}"
end
end
end
end
end

describe "#check_s3_bucket" do
context "when file exists in masterfile_bucket" do
let(:input_url) { "s3://bucket1/file.mp4" }
Expand Down
108 changes: 108 additions & 0 deletions spec/integration/ffmpeg_adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,114 @@ def find_encode(id)
end
end

context "input filename with single quotes" do
let(:file_with_single_quote) { "file://" + Rails.root.join('..', 'spec', 'fixtures', "'file_with_single_quote'.mp4").to_s }
let!(:create_single_quote_job) { ActiveEncode::Base.create(file_with_single_quote, outputs: [{ label: "low", ffmpeg_opt: "-s 640x480", extension: 'mp4' }]) }
let(:find_single_quote_job) { ActiveEncode::Base.find create_single_quote_job.id }

it "does not have errors" do
sleep 2
expect(find_single_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/pid")).not_to be_empty
end

context 'when uri encoded' do
let(:file_with_single_quote) { Addressable::URI.encode("file://" + Rails.root.join('..', 'spec', 'fixtures', "'file_with_single_quote'.mp4").to_s) }

it "does not have errors" do
sleep 2
expect(find_single_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_single_quote_job.id}/pid")).not_to be_empty
end
end
end

context "input filename with double quotes" do
let(:file_with_double_quote) { "file://" + Rails.root.join('..', 'spec', 'fixtures', '"file_with_double_quote".mp4').to_s }
let!(:create_double_quote_job) { ActiveEncode::Base.create(file_with_double_quote, outputs: [{ label: "low", ffmpeg_opt: "-s 640x480", extension: 'mp4' }]) }
let(:find_double_quote_job) { ActiveEncode::Base.find create_double_quote_job.id }

it "does not have errors" do
sleep 2
expect(find_double_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/pid")).not_to be_empty
end

context 'when uri encoded' do
let(:file_with_double_quote) { Addressable::URI.encode("file://" + Rails.root.join('..', 'spec', 'fixtures', '"file_with_double_quote".mp4').to_s) }

it "does not have errors" do
sleep 2
expect(find_double_quote_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_double_quote_job.id}/pid")).not_to be_empty
end
end
end

context "input filename with other special characters" do
let(:file_with_special_characters) { "file://" + Rails.root.join('..', 'spec', 'fixtures', 'file.with :=+%sp3c!l-ch4cts().mp4').to_s }
let!(:create_special_characters_job) { ActiveEncode::Base.create(file_with_special_characters, outputs: [{ label: "low", ffmpeg_opt: "-s 640x480", extension: 'mp4' }]) }
let(:find_special_characters_job) { ActiveEncode::Base.find create_special_characters_job.id }

it "does not have errors" do
sleep 2
expect(find_special_characters_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/pid")).not_to be_empty
end

context 'when uri encoded' do
let(:file_with_special_characters) { Addressable::URI.encode("file://" + Rails.root.join('..', 'spec', 'fixtures', 'file.with :=+%sp3c!l-ch4cts().mp4').to_s) }

it "does not have errors" do
sleep 2
expect(find_special_characters_job.errors).to be_empty
end

it "has the input technical metadata in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/input_metadata")).not_to be_empty
end

it "has the pid in a file" do
expect(File.read("#{work_dir}/#{create_special_characters_job.id}/pid")).not_to be_empty
end
end
end

context 'when failed' do
subject { created_job }

Expand Down
51 changes: 51 additions & 0 deletions spec/integration/media_convert_adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,15 @@ def reconstitute_response(fixture_path)
let(:cloudwatch_events) { Aws::CloudWatchEvents::Client.new(stub_responses: true) }
let(:cloudwatch_logs) { Aws::CloudWatchLogs::Client.new(stub_responses: true) }

let(:s3client) { Aws::S3::Client.new(stub_responses: true) }

before do
mediaconvert.stub_responses(:describe_endpoints, reconstitute_response("media_convert/endpoints.json"))

allow(Aws::MediaConvert::Client).to receive(:new).and_return(mediaconvert)
allow(Aws::CloudWatchEvents::Client).to receive(:new).and_return(cloudwatch_events)
allow(Aws::CloudWatchLogs::Client).to receive(:new).and_return(cloudwatch_logs)
allow(Aws::S3::Client).to receive(:new).and_return(s3client)
end

let(:created_job) do
Expand Down Expand Up @@ -267,4 +270,52 @@ def reconstitute_response(fixture_path)
completed_job
end
end

describe "#s3_uri" do
context "when filename has no special characters" do
context "non-s3 file" do
let(:input_url) { "spec/fixtures/fireworks.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
expect(described_class.new.send(:s3_uri, input_url, { masterfile_bucket: source_bucket })).to eq "randomstring/fireworks.mp4"
end
end
context "s3 file" do
let(:input_url) { "s3://bucket1/file.mp4" }
let(:source_bucket) { "bucket1" }

it "calls the #check_s3_bucket method" do
expect(described_class.new.send(:s3_uri, input_url, { masterfile_bucket: source_bucket })).to eq "file.mp4"
end
end
end
context "when filename has special characters" do
context "non-s3 file" do
let(:input) { ["'file_with_single_quote'.mp4", '"file_with_double_quote".mp4', "file with space.mp4", "file.with...periods.mp4", "file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket1" }

it "calls the #upload_to_s3 method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input.each_with_index do |url, index|
expect(described_class.new.send(:s3_uri, "spec/fixtures/#{url}", { masterfile_bucket: source_bucket })).to eq "randomstring/#{clean[index]}"
end
end
end
context "s3 file" do
let(:input_urls) { ["s3://bucket1/'file_with_single_quote'.mp4", 's3://bucket1/"file_with_double_quote".mp4', "s3://bucket1/file with space.mp4", "s3://bucket1/file.with...periods.mp4", "s3://bucket1/file.with :=+%sp3c!l-ch4cts().mp4"] }
let(:clean) { ["_file_with_single_quote_.mp4", "_file_with_double_quote_.mp4", "file_with_space.mp4", "filewithperiods.mp4", "filewith_____sp3c_l-ch4cts__.mp4"] }
let(:source_bucket) { "bucket2" }

it "calls the #check_s3_bucket method" do
allow(SecureRandom).to receive(:uuid).and_return("randomstring")
input_urls.each_with_index do |url, index|
expect(described_class.new.send(:s3_uri, url, { masterfile_bucket: source_bucket })).to eq "randomstring/#{clean[index]}"
end
end
end
end
end
end
Loading