@@ -355,10 +355,10 @@ def __strip_snippets(self, file: str, wfp: str) -> str:
355
355
356
356
def __detect_line_endings (self , contents : bytes ) -> Tuple [bool , bool , bool , bool ]:
357
357
"""Detect the types of line endings present in file contents.
358
-
358
+
359
359
Args:
360
360
contents: File contents as bytes.
361
-
361
+
362
362
Returns:
363
363
Tuple of (has_crlf, has_lf_only, has_cr_only, has_mixed) indicating which line ending types are present.
364
364
"""
@@ -368,51 +368,54 @@ def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool, bool
368
368
has_standalone_lf = b'\n ' in content_without_crlf
369
369
# For CR detection, we need to find CR that's not part of CRLF
370
370
has_standalone_cr = b'\r ' in content_without_crlf
371
-
371
+
372
372
# Check if we have mixed line endings
373
373
line_ending_count = sum ([has_crlf , has_standalone_lf , has_standalone_cr ])
374
374
has_mixed = line_ending_count > 1
375
-
375
+
376
376
return has_crlf , has_standalone_lf , has_standalone_cr , has_mixed
377
377
378
- def __calculate_opposite_line_ending_hash (self , contents : bytes ) -> str :
378
+ def __calculate_opposite_line_ending_hash (self , contents : bytes ):
379
379
"""Calculate hash for contents with opposite line endings.
380
-
380
+
381
381
If the file is primarily Unix (LF), calculates Windows (CRLF) hash.
382
382
If the file is primarily Windows (CRLF), calculates Unix (LF) hash.
383
383
384
384
Args:
385
385
contents: File contents as bytes.
386
386
387
387
Returns:
388
- Hash with opposite line endings as hex string.
388
+ Hash with opposite line endings as hex string, or None if no line endings detected .
389
389
"""
390
390
has_crlf , has_standalone_lf , has_standalone_cr , has_mixed = self .__detect_line_endings (contents )
391
-
391
+
392
+ if not has_crlf and not has_standalone_lf and not has_standalone_cr :
393
+ return None
394
+
392
395
# Normalize all line endings to LF first
393
396
normalized = contents .replace (b'\r \n ' , b'\n ' ).replace (b'\r ' , b'\n ' )
394
-
397
+
395
398
# Determine the dominant line ending type
396
399
if has_crlf and not has_standalone_lf and not has_standalone_cr :
397
400
# File is Windows (CRLF) - produce Unix (LF) hash
398
401
opposite_contents = normalized
399
402
else :
400
- # File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
403
+ # File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
401
404
opposite_contents = normalized .replace (b'\n ' , b'\r \n ' )
402
-
405
+
403
406
return hashlib .md5 (opposite_contents ).hexdigest ()
404
407
405
408
def __should_generate_opposite_hash (self , contents : bytes ) -> bool :
406
409
"""Determine if an opposite line ending hash (fh2) should be generated.
407
-
410
+
408
411
Args:
409
412
contents: File contents as bytes.
410
-
413
+
411
414
Returns:
412
415
True if fh2 hash should be generated, False otherwise.
413
416
"""
414
- has_crlf , has_standalone_lf , has_standalone_cr , has_mixed = self .__detect_line_endings (contents )
415
-
417
+ has_crlf , has_standalone_lf , has_standalone_cr = self .__detect_line_endings (contents )
418
+
416
419
# Generate fh2 hash when file has any line endings (CRLF, LF, or CR)
417
420
# This allows us to always produce the opposite hash
418
421
return has_crlf or has_standalone_lf or has_standalone_cr
@@ -448,9 +451,10 @@ def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:
448
451
wfp = 'file={0},{1},{2}\n ' .format (file_md5 , content_length , wfp_filename )
449
452
450
453
# Add opposite line ending hash based on line ending analysis
451
- if self .__should_generate_opposite_hash (contents ):
454
+ if not bin_file and self .__should_generate_opposite_hash (contents ):
452
455
opposite_hash = self .__calculate_opposite_line_ending_hash (contents )
453
- wfp += f'fh2={ opposite_hash } \n '
456
+ if opposite_hash is not None :
457
+ wfp += f'fh2={ opposite_hash } \n '
454
458
455
459
# We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
456
460
if bin_file or self .skip_snippets or self .__skip_snippets (file , contents .decode ('utf-8' , 'ignore' )):
0 commit comments