From 5748e9975ec15a77e2af23fc97339225a5bad1b5 Mon Sep 17 00:00:00 2001
From: User name <user@example.com>
Date: Sat, 23 May 2020 15:22:32 +0200
Subject: [PATCH 1/2] Metadata UnicodeDecodeError Fix

Some movies fail to decode just because of bad metadatas.

Traceback (most recent call last):
  File "av\utils.pyx", line 14, in av.utils._decode
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte

Examples :
Ignoring UnicodeDecodeError bytes from : b'AVI-Mux GUI 1.17.8, Aug 30 2008  12:36:58\xff'
-> 'AVI-Mux GUI 1.17.8, Aug 30 2008  12:36:58'

Proposed solution :
Merely ignore bad bytes by overriding the errors flag (from 'strict' to 'ignore')
also print the original metadata as a warning
---
 av/utils.pyx | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/av/utils.pyx b/av/utils.pyx
index f7db60b..1f4831e 100644
--- a/av/utils.pyx
+++ b/av/utils.pyx
@@ -11,7 +11,12 @@ cimport libav as lib
 # ====================
 
 cdef _decode(char *s, encoding, errors):
-    return (<bytes>s).decode(encoding, errors)
+    try:
+      decoded = (<bytes>s).decode(encoding, errors)
+    except UnicodeDecodeError:
+      print("Ignoring UnicodeDecodeError bytes from : %s" % (str(<bytes>s)))
+      decoded = (<bytes>s).decode(encoding, errors='ignore')
+    return decoded
 
 cdef bytes _encode(s, encoding, errors):
     return s.encode(encoding, errors)
-- 
GitLab


From 6c46ce72ba99b501b39876d877917adbe345a016 Mon Sep 17 00:00:00 2001
From: User name <user@example.com>
Date: Sat, 23 May 2020 15:34:56 +0200
Subject: [PATCH 2/2] indentation fix

---
 av/utils.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/av/utils.pyx b/av/utils.pyx
index 1f4831e..35f796f 100644
--- a/av/utils.pyx
+++ b/av/utils.pyx
@@ -12,10 +12,10 @@ cimport libav as lib
 
 cdef _decode(char *s, encoding, errors):
     try:
-      decoded = (<bytes>s).decode(encoding, errors)
+        decoded = (<bytes>s).decode(encoding, errors)
     except UnicodeDecodeError:
-      print("Ignoring UnicodeDecodeError bytes from : %s" % (str(<bytes>s)))
-      decoded = (<bytes>s).decode(encoding, errors='ignore')
+        print("Ignoring UnicodeDecodeError bytes from : %s" % (str(<bytes>s)))
+        decoded = (<bytes>s).decode(encoding, errors='ignore')
     return decoded
 
 cdef bytes _encode(s, encoding, errors):
-- 
GitLab