Skip to content
GitLab
    • Explore Projects Groups Snippets
Projects Groups Snippets
  • /
  • Help
    • Help
    • Support
    • Community forum
    • Submit feedback
    • Contribute to GitLab
  • Sign in / Register
  • A ArduinoJson
  • Project information
    • Project information
    • Activity
    • Labels
    • Members
  • Repository
    • Repository
    • Files
    • Commits
    • Branches
    • Tags
    • Contributors
    • Graph
    • Compare
  • Issues 24
    • Issues 24
    • List
    • Boards
    • Service Desk
    • Milestones
  • Merge requests 0
    • Merge requests 0
  • CI/CD
    • CI/CD
    • Pipelines
    • Jobs
    • Schedules
  • Deployments
    • Deployments
    • Environments
    • Releases
  • Packages and registries
    • Packages and registries
    • Package Registry
    • Infrastructure Registry
  • Monitor
    • Monitor
    • Incidents
  • Analytics
    • Analytics
    • Value stream
    • CI/CD
    • Repository
  • Wiki
    • Wiki
  • Snippets
    • Snippets
  • Activity
  • Graph
  • Create a new issue
  • Jobs
  • Commits
  • Issue Boards
Collapse sidebar
  • Benoît Blanchon
  • ArduinoJson
  • Merge requests
  • !1157

Decode escaped UTF-16 surrogates to UTF-8

  • Review changes

  • Download
  • Email patches
  • Plain diff
Closed Administrator requested to merge github/fork/versioduo/decode-unicode into 6.x 5 years ago
  • Overview 6
  • Commits 1
  • Pipelines 0
  • Changes 4

Created by: kaysievers

This adds the missing decoding of UTF-16 surrogates.

We use plain JSON over a MIDI (Musical Instruments) transport, which can carry only 7 bit byte streams. The unicode characters in JSON need to be escaped to satisfy this requirement.

Compare
  • 6.x (base)

and
  • latest version
    5d8b16be
    1 commit, 2 years ago

4 files
+ 31
- 9

    Preferences

    File browser
    Compare changes
extras/tests/J‎sonDeserializer‎
invalid_‎input.cpp‎ +1 -0
strin‎g.cpp‎ +4 -4
src/Arduin‎oJson/Json‎
JsonDeseri‎alizer.hpp‎ +18 -1
Utf8‎.hpp‎ +8 -4
extras/tests/JsonDeserializer/invalid_input.cpp
+ 1
- 0
  • View file @ 5d8b16be


@@ -8,6 +8,7 @@
TEST_CASE("Invalid JSON input") {
const char* testCases[] = {"'\\u'", "'\\u000g'", "'\\u000'", "'\\u000G'",
"'\\ud83d\\ud83d'", "'\\udda4'", "'\\ud83d_'",
"'\\u000/'", "\\x1234", "6a9", "1,",
"2]", "3}"};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
extras/tests/JsonDeserializer/string.cpp
+ 4
- 4
  • View file @ 5d8b16be


@@ -17,10 +17,10 @@ TEST_CASE("Valid JSON strings value") {
{"\'hello world\'", "hello world"},
{"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"},
{"'\\u0041'", "A"},
{"'\\u00e4'", "\xc3\xa4"}, // ä
{"'\\u00E4'", "\xc3\xa4"}, // ä
{"'\\u3042'", "\xe3\x81\x82"}, // あ
{"'\\u00e4'", "\xc3\xa4"}, // ä
{"'\\u00E4'", "\xc3\xa4"}, // ä
{"'\\u3042'", "\xe3\x81\x82"}, // あ
{"'\\ud83d\\udda4'", "\xf0\x9f\x96\xa4"}, // 🖤
};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
src/ArduinoJson/Json/JsonDeserializer.hpp
+ 18
- 1
  • View file @ 5d8b16be


@@ -189,6 +189,7 @@ class JsonDeserializer {
DeserializationError parseQuotedString(const char *&result) {
StringBuilder builder = _stringStorage.startString();
uint16_t surrogate1 = 0;
const char stopChar = current();
move();
@@ -208,7 +209,20 @@ class JsonDeserializer {
move();
DeserializationError err = parseCodepoint(codepoint);
if (err) return err;
Utf8::encodeCodepoint(codepoint, builder);
if (codepoint >= 0xd800 && codepoint <= 0xdbff) {
if (surrogate1 > 0)
return DeserializationError::InvalidInput;
    • Benoît Blanchon
      Benoît Blanchon @bblanchon · 5 years ago
      Owner

      Line not covered in unit tests.

      • Please register or sign in to reply
Please register or sign in to reply
surrogate1 = codepoint;
} else if (codepoint >= 0xdc00 && codepoint <= 0xdfff) {
if (surrogate1 == 0)
return DeserializationError::InvalidInput;
    • Benoît Blanchon
      Benoît Blanchon @bblanchon · 5 years ago
      Owner

      Line not covered in unit tests.

      • Please register or sign in to reply
Please register or sign in to reply
uint32_t codepoint32 = 0x10000;
codepoint32 += static_cast<uint32_t>(surrogate1 - 0xd800) << 10;
codepoint32 += codepoint - 0xdc00;
Utf8::encodeCodepoint(codepoint32, builder);
surrogate1 = 0;
} else
Utf8::encodeCodepoint(codepoint, builder);
continue;
#else
return DeserializationError::NotSupported;
@@ -220,6 +234,9 @@ class JsonDeserializer {
move();
}
if (surrogate1 > 0)
return DeserializationError::InvalidInput;
    • Benoît Blanchon
      Benoît Blanchon @bblanchon · 5 years ago
      Owner

      Line not covered in unit tests.

      • Please register or sign in to reply
Please register or sign in to reply
builder.append(c);
}
src/ArduinoJson/Json/Utf8.hpp
+ 8
- 4
  • View file @ 5d8b16be


@@ -10,17 +10,21 @@ namespace ARDUINOJSON_NAMESPACE {
namespace Utf8 {
template <typename TStringBuilder>
inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
inline void encodeCodepoint(uint32_t codepoint, TStringBuilder &str) {
if (codepoint < 0x80) {
str.append(char(codepoint));
return;
}
if (codepoint >= 0x00000800) {
if (codepoint < 0x00000800) {
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
} else if (codepoint < 0x00010000) {
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
} else {
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
} else if (codepoint < 0x00110000) {
str.append(char(0xf0 /*0b11110000*/ | (codepoint >> 18)));
str.append(char(((codepoint >> 12) & 0x3f /*0b00111111*/) | 0x80));
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
}
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
}
0 Assignees
None
Assign to
Reviewer
Benoît Blanchon's avatar
Benoît Blanchon
Request review from
Labels
0
None
0
None
    Assign labels
  • Manage project labels

Milestone
No milestone
None
None
Time tracking
No estimate or time spent
Lock merge request
Unlocked
2
2 participants
Benoît Blanchon
Administrator
Reference: bblanchon/ArduinoJson!1157
Source branch: github/fork/versioduo/decode-unicode

Menu

Explore Projects Groups Snippets