Skip to content

Commit

Permalink
Fixed bug with WAV file wrongly parsed as MP3
Browse files Browse the repository at this point in the history
- Changed MP3 priority from 99 to 101
- Added a preventive check against WAV headers in the MP3 parser
- Added a new test that ensures we correctly parse files over HTTP even when not providing a filename hint
- Added a test verifying that MP3 is always the last parser used when not providing a filename hint
- Updated some fixture names
  • Loading branch information
CostinTanasoiu committed Aug 22, 2023
1 parent 46e328f commit d585078
Show file tree
Hide file tree
Showing 16 changed files with 86 additions and 10 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
### WAV
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
- c_M1F1-Alaw-AFsp.wav and invalid_d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)

### WEBP
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
Expand Down
8 changes: 7 additions & 1 deletion lib/parsers/mp3_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ def call(raw_io)
io.seek(0)
return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))

# Prevention against parsing WAV files.
io.seek(0)
wav_chunk_id, _wav_size, wav_riff_type = safe_read(io, 12).unpack('a4la4')
return if wav_chunk_id == 'RIFF' || wav_riff_type == 'WAVE'

# Read all the ID3 tags (or at least attempt to)
io.seek(0)
id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
Expand All @@ -90,6 +95,7 @@ def call(raw_io)

io.seek(ignore_bytes_at_head)


maybe_xing_header, initial_frames = parse_mpeg_frames(io)

return if initial_frames.empty?
Expand Down Expand Up @@ -315,5 +321,5 @@ def with_id3tag_local_configs
end
end

FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 101
end
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added spec/fixtures/WAV/c_SCAM_MIC_SOL001_RUN001.wav
Binary file not shown.
File renamed without changes.
16 changes: 14 additions & 2 deletions spec/format_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,24 @@
'FormatParser::CR3Parser',
'FormatParser::DPXParser',
'FormatParser::FLACParser',
'FormatParser::MP3Parser',
'FormatParser::OggParser',
'FormatParser::TIFFParser',
'FormatParser::WAVParser'
'FormatParser::WAVParser',
'FormatParser::MP3Parser'
])
end

it 'ensures that MP3 parser is the last one among all' do
parsers = FormatParser.parsers_for(
[:audio, :image, :document, :text, :video, :archive],
[:aac, :aiff, :arw, :bmp, :cr2, :cr3, :dpx, :fdx, :flac, :gif, :heif, :heic,
:jpg, :json, :m3u, :mov, :mp3, :mp4, :m4a, :m4b, :m4p, :m4r, :m4v, :mpg,
:mpeg, :nef, :ogg, :pdf, :png, :psd, :rw2, :tif, :wav, :webp, :zip]
)

parser_class_names = parsers.map { |parser| parser.class.name }
expect(parser_class_names.last).to eq 'FormatParser::MP3Parser'
end
end

describe '.register_parser and .deregister_parser' do
Expand Down
2 changes: 1 addition & 1 deletion spec/parsers/flac_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
end

it 'raises an error when sample rate is 0' do
fpath = fixtures_dir + 'FLAC/sample_rate_0.flac'
fpath = fixtures_dir + 'FLAC/invalid_sample_rate_0.flac'

expect {
subject.call(File.open(fpath, 'rb'))
Expand Down
6 changes: 6 additions & 0 deletions spec/parsers/mp3_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
expect(parsed).to be_nil
end

it 'does not misdetect a WAV' do
fpath = fixtures_dir + '/WAV/c_SCAM_MIC_SOL001_RUN001.wav'
parsed = subject.call(File.open(fpath, 'rb'))
expect(parsed).to be_nil
end

describe 'title/artist/album attributes' do
let(:parsed) { subject.call(File.open(fpath, 'rb')) }

Expand Down
6 changes: 3 additions & 3 deletions spec/parsers/pdf_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@ def parse_pdf(pdf_filename)

describe 'broken PDF files should not parse' do
it 'PDF with missing version header' do
parsed_pdf = parse_pdf 'not_a.pdf'
parsed_pdf = parse_pdf 'invalid_not_a.pdf'
expect(parsed_pdf).to be_nil
end

it 'PDF 2.0 with offset start' do
parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf'
parsed_pdf = parse_pdf 'invalid PDF 2.0 with offset start.pdf'
expect(parsed_pdf).to be_nil
end

it 'exceeds the PDF read limit' do
parsed_pdf = parse_pdf 'exceed_PDF_read_limit.pdf'
parsed_pdf = parse_pdf 'invalid_exceed_PDF_read_limit.pdf'
expect(parsed_pdf).to be_nil
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/parsers/wav_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
expect {
subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
}.to raise_error(FormatParser::IOUtils::InvalidRead)
end
end
2 changes: 1 addition & 1 deletion spec/parsers/webp_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
end

it 'does not parse files with an unrecognised variant' do
result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb'))
result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-unrecognised-variant.webp', 'rb'))
expect(result).to be_nil
end

Expand Down
52 changes: 52 additions & 0 deletions spec/remote_fetching_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,58 @@
expect(file_information.format).to eq(:png)
end

describe 'correctly parses WAV files without falling back to another filetype' do
['c_8kmp316.wav', 'c_SCAM_MIC_SOL001_RUN001.wav'].each do |filename|
it "parses WAV file #{filename}" do
remote_url = 'http://localhost:9399/WAV/' + filename
file_information = FormatParser.parse_http(remote_url)
expect(file_information).not_to be_nil
expect(file_information.format).to eq(:wav)
end
end
end

describe "correctly parses files over HTTP without filename hint" do
nature_fixture_dirs = {
:document => ['PDF'],
:audio => ['AAC', 'AIFF', 'FLAC', 'MP3', 'WAV'],
:video => ['MOV', 'MP4'],
:image => ['ARW', 'CR2', 'CR3', 'GIF', 'JPG', 'NEF', 'PNG', 'PSD', 'RW2', 'TIF', 'WEBP']
}
nature_fixture_dirs.each { |nature, dirs|
dirs.each do |file_type_dir|
Dir.glob(fixtures_dir + "/#{file_type_dir}/*.*").each do |file_path|
file_name = File.basename(file_path)
next if file_name.include? "invalid"

expected_format = file_type_dir.downcase.to_sym
if file_type_dir == 'HEIF'
expected_format = File.extname(file_name).delete('.').downcase.to_sym
end

it "parses #{file_type_dir} file: #{file_name}" do
url = "http://localhost:9399/#{file_type_dir}/#{file_name}?some_param=test".gsub(' ', '%20')
file_information = FormatParser.parse_http(url)
expect(file_information).not_to be_nil
expect(file_information.nature).to eq(nature)
expect(file_information.format == expected_format).to be_truthy
end

end
end
}

# Dir.glob(fixtures_dir + "/HEIF/*.*").each do |file_path|
# file_name = File.basename(file_path)
# next if file_name.include? "invalid"
#
# expected_format = file_type_dir.downcase.to_sym
# it "parses #{file_type_dir} file: #{file_name}" do
# parseFile(file_type_dir, file_name, nature, expected_format)
# end
# end
end

describe 'when parsing remote fixtures' do
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
filename = File.basename(fixture_path)
Expand Down

0 comments on commit d585078

Please sign in to comment.