Skip to content

Commit ede0477

Browse files
committed
[SP-2655] Fix tests and optimize performance
1 parent 8966848 commit ede0477

File tree

2 files changed

+6
-25
lines changed

2 files changed

+6
-25
lines changed

src/scanoss/winnowing.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def __strip_snippets(self, file: str, wfp: str) -> str:
353353
self.print_debug(f'Stripped snippet ids from {file}')
354354
return wfp
355355

356-
def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool, bool]:
356+
def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool]:
357357
"""Detect the types of line endings present in file contents.
358358
359359
Args:
@@ -369,11 +369,7 @@ def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool, bool
369369
# For CR detection, we need to find CR that's not part of CRLF
370370
has_standalone_cr = b'\r' in content_without_crlf
371371

372-
# Check if we have mixed line endings
373-
line_ending_count = sum([has_crlf, has_standalone_lf, has_standalone_cr])
374-
has_mixed = line_ending_count > 1
375-
376-
return has_crlf, has_standalone_lf, has_standalone_cr, has_mixed
372+
return has_crlf, has_standalone_lf, has_standalone_cr
377373

378374
def __calculate_opposite_line_ending_hash(self, contents: bytes):
379375
"""Calculate hash for contents with opposite line endings.
@@ -387,7 +383,7 @@ def __calculate_opposite_line_ending_hash(self, contents: bytes):
387383
Returns:
388384
Hash with opposite line endings as hex string, or None if no line endings detected.
389385
"""
390-
has_crlf, has_standalone_lf, has_standalone_cr, has_mixed = self.__detect_line_endings(contents)
386+
has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
391387

392388
if not has_crlf and not has_standalone_lf and not has_standalone_cr:
393389
return None
@@ -405,21 +401,6 @@ def __calculate_opposite_line_ending_hash(self, contents: bytes):
405401

406402
return hashlib.md5(opposite_contents).hexdigest()
407403

408-
def __should_generate_opposite_hash(self, contents: bytes) -> bool:
409-
"""Determine if an opposite line ending hash (fh2) should be generated.
410-
411-
Args:
412-
contents: File contents as bytes.
413-
414-
Returns:
415-
True if fh2 hash should be generated, False otherwise.
416-
"""
417-
has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
418-
419-
# Generate fh2 hash when file has any line endings (CRLF, LF, or CR)
420-
# This allows us to always produce the opposite hash
421-
return has_crlf or has_standalone_lf or has_standalone_cr
422-
423404
def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str: # noqa: PLR0912, PLR0915
424405
"""
425406
Generate a Winnowing fingerprint (WFP) for the given file contents
@@ -451,7 +432,7 @@ def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:
451432
wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
452433

453434
# Add opposite line ending hash based on line ending analysis
454-
if not bin_file and self.__should_generate_opposite_hash(contents):
435+
if not bin_file:
455436
opposite_hash = self.__calculate_opposite_line_ending_hash(contents)
456437
if opposite_hash is not None:
457438
wfp += f'fh2={opposite_hash}\n'

tests/test_winnowing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,8 @@ def test_binary_file_with_line_endings(self):
275275

276276
print(f'Binary file WFP:\n{wfp}')
277277

278-
# Binary files should still generate fh2 if they have line endings (platform independent)
279-
self.assertIn('fh2=', wfp)
278+
# Binary files should not generate fh2
279+
self.assertNotIn('fh2=', wfp)
280280

281281
def test_cr_only_line_endings(self):
282282
"""Test classic Mac CR-only line endings."""

0 commit comments

Comments
 (0)