From 5748e9975ec15a77e2af23fc97339225a5bad1b5 Mon Sep 17 00:00:00 2001 From: User name <user@example.com> Date: Sat, 23 May 2020 15:22:32 +0200 Subject: [PATCH 1/2] Metadata UnicodeDecodeError Fix Some movies fail to decode just because of bad metadatas. Traceback (most recent call last): File "av\utils.pyx", line 14, in av.utils._decode UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte Examples : Ignoring UnicodeDecodeError bytes from : b'AVI-Mux GUI 1.17.8, Aug 30 2008 12:36:58\xff' -> 'AVI-Mux GUI 1.17.8, Aug 30 2008 12:36:58' Proposed solution : Merely ignore bad bytes by overriding the errors flag (from 'strict' to 'ignore') also print the original metadata as a warning --- av/utils.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/av/utils.pyx b/av/utils.pyx index f7db60b..1f4831e 100644 --- a/av/utils.pyx +++ b/av/utils.pyx @@ -11,7 +11,12 @@ cimport libav as lib # ==================== cdef _decode(char *s, encoding, errors): - return (<bytes>s).decode(encoding, errors) + try: + decoded = (<bytes>s).decode(encoding, errors) + except UnicodeDecodeError: + print("Ignoring UnicodeDecodeError bytes from : %s" % (str(<bytes>s))) + decoded = (<bytes>s).decode(encoding, errors='ignore') + return decoded cdef bytes _encode(s, encoding, errors): return s.encode(encoding, errors) -- GitLab From 6c46ce72ba99b501b39876d877917adbe345a016 Mon Sep 17 00:00:00 2001 From: User name <user@example.com> Date: Sat, 23 May 2020 15:34:56 +0200 Subject: [PATCH 2/2] indentation fix --- av/utils.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/av/utils.pyx b/av/utils.pyx index 1f4831e..35f796f 100644 --- a/av/utils.pyx +++ b/av/utils.pyx @@ -12,10 +12,10 @@ cimport libav as lib cdef _decode(char *s, encoding, errors): try: - decoded = (<bytes>s).decode(encoding, errors) + decoded = (<bytes>s).decode(encoding, errors) except UnicodeDecodeError: - print("Ignoring UnicodeDecodeError bytes from : %s" % (str(<bytes>s))) - decoded = (<bytes>s).decode(encoding, errors='ignore') + print("Ignoring UnicodeDecodeError bytes from : %s" % (str(<bytes>s))) + decoded = (<bytes>s).decode(encoding, errors='ignore') return decoded cdef bytes _encode(s, encoding, errors): -- GitLab