Source: main/webapp/modules/UTF8Parser.js

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. var Guacamole = Guacamole || {};
  20. /**
  21. * Parser that decodes UTF-8 text from a series of provided ArrayBuffers.
  22. * Multi-byte characters that continue from one buffer to the next are handled
  23. * correctly.
  24. *
  25. * @constructor
  26. */
  27. Guacamole.UTF8Parser = function UTF8Parser() {
  28. /**
  29. * The number of bytes remaining for the current codepoint.
  30. *
  31. * @private
  32. * @type {!number}
  33. */
  34. var bytesRemaining = 0;
  35. /**
  36. * The current codepoint value, as calculated from bytes read so far.
  37. *
  38. * @private
  39. * @type {!number}
  40. */
  41. var codepoint = 0;
  42. /**
  43. * Decodes the given UTF-8 data into a Unicode string, returning a string
  44. * containing all complete UTF-8 characters within the provided data. The
  45. * data may end in the middle of a multi-byte character, in which case the
  46. * complete character will be returned from a later call to decode() after
  47. * enough bytes have been provided.
  48. *
  49. * @private
  50. * @param {!ArrayBuffer} buffer
  51. * Arbitrary UTF-8 data.
  52. *
  53. * @return {!string}
  54. * The decoded Unicode string.
  55. */
  56. this.decode = function decode(buffer) {
  57. var text = '';
  58. var bytes = new Uint8Array(buffer);
  59. for (var i=0; i<bytes.length; i++) {
  60. // Get current byte
  61. var value = bytes[i];
  62. // Start new codepoint if nothing yet read
  63. if (bytesRemaining === 0) {
  64. // 1 byte (0xxxxxxx)
  65. if ((value | 0x7F) === 0x7F)
  66. text += String.fromCharCode(value);
  67. // 2 byte (110xxxxx)
  68. else if ((value | 0x1F) === 0xDF) {
  69. codepoint = value & 0x1F;
  70. bytesRemaining = 1;
  71. }
  72. // 3 byte (1110xxxx)
  73. else if ((value | 0x0F )=== 0xEF) {
  74. codepoint = value & 0x0F;
  75. bytesRemaining = 2;
  76. }
  77. // 4 byte (11110xxx)
  78. else if ((value | 0x07) === 0xF7) {
  79. codepoint = value & 0x07;
  80. bytesRemaining = 3;
  81. }
  82. // Invalid byte
  83. else
  84. text += '\uFFFD';
  85. }
  86. // Continue existing codepoint (10xxxxxx)
  87. else if ((value | 0x3F) === 0xBF) {
  88. codepoint = (codepoint << 6) | (value & 0x3F);
  89. bytesRemaining--;
  90. // Write codepoint if finished
  91. if (bytesRemaining === 0)
  92. text += String.fromCharCode(codepoint);
  93. }
  94. // Invalid byte
  95. else {
  96. bytesRemaining = 0;
  97. text += '\uFFFD';
  98. }
  99. }
  100. return text;
  101. };
  102. };