Browse Source

src: support UTF-8 in compiled-in JS source files

Detect it when source files in lib/ are not ASCII.  Decode them as UTF-8
and store them as UTF-16 in the binary so they can be used as external
string resources without non-ASCII characters getting mangled.

Fixes: https://github.com/nodejs/node/issues/10673
PR-URL: https://github.com/nodejs/node/pull/11129
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: James M Snell <jasnell@gmail.com>
v7.x
Ben Noordhuis 8 years ago
committed by Italo A. Casas
parent
commit
3380cd5fdb
No known key found for this signature in database GPG Key ID: 23EFEFE93C4CFFFE
  1. 8
      node.gyp
  2. 51
      src/node_javascript.cc
  3. 117
      tools/js2c.py

8
node.gyp

@ -145,7 +145,7 @@
'src', 'src',
'tools/msvs/genfiles', 'tools/msvs/genfiles',
'deps/uv/src/ares', 'deps/uv/src/ares',
'<(SHARED_INTERMEDIATE_DIR)', # for node_natives.h '<(SHARED_INTERMEDIATE_DIR)',
], ],
'sources': [ 'sources': [
@ -166,7 +166,6 @@
'src/node_debug_options.cc', 'src/node_debug_options.cc',
'src/node_file.cc', 'src/node_file.cc',
'src/node_http_parser.cc', 'src/node_http_parser.cc',
'src/node_javascript.cc',
'src/node_main.cc', 'src/node_main.cc',
'src/node_os.cc', 'src/node_os.cc',
'src/node_revert.cc', 'src/node_revert.cc',
@ -234,11 +233,11 @@
'deps/http_parser/http_parser.h', 'deps/http_parser/http_parser.h',
'deps/v8/include/v8.h', 'deps/v8/include/v8.h',
'deps/v8/include/v8-debug.h', 'deps/v8/include/v8-debug.h',
'<(SHARED_INTERMEDIATE_DIR)/node_natives.h',
# javascript files to make for an even more pleasant IDE experience # javascript files to make for an even more pleasant IDE experience
'<@(library_files)', '<@(library_files)',
# node.gyp is added to the project by default. # node.gyp is added to the project by default.
'common.gypi', 'common.gypi',
'<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
], ],
'defines': [ 'defines': [
@ -711,12 +710,13 @@
'actions': [ 'actions': [
{ {
'action_name': 'node_js2c', 'action_name': 'node_js2c',
'process_outputs_as_sources': 1,
'inputs': [ 'inputs': [
'<@(library_files)', '<@(library_files)',
'./config.gypi', './config.gypi',
], ],
'outputs': [ 'outputs': [
'<(SHARED_INTERMEDIATE_DIR)/node_natives.h', '<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
], ],
'conditions': [ 'conditions': [
[ 'node_use_dtrace=="false" and node_use_etw=="false"', { [ 'node_use_dtrace=="false" and node_use_etw=="false"', {

51
src/node_javascript.cc

@ -1,51 +0,0 @@
#include "node.h"
#include "node_natives.h"
#include "v8.h"
#include "env.h"
#include "env-inl.h"
namespace node {
using v8::Local;
using v8::NewStringType;
using v8::Object;
using v8::String;
// id##_data is defined in node_natives.h.
#define V(id) \
static struct : public String::ExternalOneByteStringResource { \
const char* data() const override { \
return reinterpret_cast<const char*>(id##_data); \
} \
size_t length() const override { return sizeof(id##_data); } \
void Dispose() override { /* Default calls `delete this`. */ } \
} id##_external_data;
NODE_NATIVES_MAP(V)
#undef V
Local<String> MainSource(Environment* env) {
auto maybe_string =
String::NewExternalOneByte(
env->isolate(),
&internal_bootstrap_node_external_data);
return maybe_string.ToLocalChecked();
}
void DefineJavaScript(Environment* env, Local<Object> target) {
auto context = env->context();
#define V(id) \
do { \
auto key = \
String::NewFromOneByte( \
env->isolate(), id##_name, NewStringType::kNormal, \
sizeof(id##_name)).ToLocalChecked(); \
auto value = \
String::NewExternalOneByte( \
env->isolate(), &id##_external_data).ToLocalChecked(); \
CHECK(target->Set(context, key, value).FromJust()); \
} while (0);
NODE_NATIVES_MAP(V)
#undef V
}
} // namespace node

117
tools/js2c.py

@ -37,13 +37,16 @@ import sys
import string import string
def ToCString(contents): def ToCArray(elements, step=10):
step = 20 slices = (elements[i:i+step] for i in xrange(0, len(elements), step))
slices = (contents[i:i+step] for i in xrange(0, len(contents), step)) slices = map(lambda s: ','.join(str(x) for x in s), slices)
slices = map(lambda s: ','.join(str(ord(c)) for c in s), slices)
return ',\n'.join(slices) return ',\n'.join(slices)
def ToCString(contents):
return ToCArray(map(ord, contents), step=20)
def ReadFile(filename): def ReadFile(filename):
file = open(filename, "rt") file = open(filename, "rt")
try: try:
@ -161,34 +164,72 @@ def ReadMacros(lines):
return (constants, macros) return (constants, macros)
HEADER_TEMPLATE = """\ TEMPLATE = """
#ifndef NODE_NATIVES_H_ #include "node.h"
#define NODE_NATIVES_H_ #include "node_javascript.h"
#include "v8.h"
#include "env.h"
#include "env-inl.h"
#include <stdint.h> namespace node {{
#define NODE_NATIVES_MAP(V) \\ {definitions}
{node_natives_map}
v8::Local<v8::String> MainSource(Environment* env) {{
return internal_bootstrap_node_value.ToStringChecked(env->isolate());
}}
void DefineJavaScript(Environment* env, v8::Local<v8::Object> target) {{
{initializers}
}}
namespace node {{
{sources}
}} // namespace node }} // namespace node
"""
#endif // NODE_NATIVES_H_ ONE_BYTE_STRING = """
static const uint8_t raw_{var}[] = {{ {data} }};
static struct : public v8::String::ExternalOneByteStringResource {{
const char* data() const override {{
return reinterpret_cast<const char*>(raw_{var});
}}
size_t length() const override {{ return arraysize(raw_{var}); }}
void Dispose() override {{ /* Default calls `delete this`. */ }}
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {{
return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked();
}}
}} {var};
""" """
TWO_BYTE_STRING = """
static const uint16_t raw_{var}[] = {{ {data} }};
static struct : public v8::String::ExternalStringResource {{
const uint16_t* data() const override {{ return raw_{var}; }}
size_t length() const override {{ return arraysize(raw_{var}); }}
void Dispose() override {{ /* Default calls `delete this`. */ }}
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {{
return v8::String::NewExternalTwoByte(isolate, this).ToLocalChecked();
}}
}} {var};
"""
NODE_NATIVES_MAP = """\ INITIALIZER = """\
V({escaped_id}) \\ CHECK(target->Set(env->context(),
{key}.ToStringChecked(env->isolate()),
{value}.ToStringChecked(env->isolate())).FromJust());
""" """
SOURCES = """\ def Render(var, data):
static const uint8_t {escaped_id}_name[] = {{ # Treat non-ASCII as UTF-8 and convert it to UTF-16.
{name}}}; if any(ord(c) > 127 for c in data):
static const uint8_t {escaped_id}_data[] = {{ template = TWO_BYTE_STRING
{data}}}; data = map(ord, data.decode('utf-8').encode('utf-16be'))
""" data = [data[i] * 256 + data[i+1] for i in xrange(0, len(data), 2)]
data = ToCArray(data)
else:
template = ONE_BYTE_STRING
data = ToCString(data)
return template.format(var=var, data=data)
def JS2C(source, target): def JS2C(source, target):
@ -207,36 +248,32 @@ def JS2C(source, target):
(consts, macros) = ReadMacros(macro_lines) (consts, macros) = ReadMacros(macro_lines)
# Build source code lines # Build source code lines
node_natives_map = [] definitions = []
sources = [] initializers = []
for s in modules: for name in modules:
lines = ReadFile(str(s)) lines = ReadFile(str(name))
lines = ExpandConstants(lines, consts) lines = ExpandConstants(lines, consts)
lines = ExpandMacros(lines, macros) lines = ExpandMacros(lines, macros)
data = ToCString(lines)
# On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar" # On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar"
# so don't assume there is always a slash in the file path. # so don't assume there is always a slash in the file path.
if '/' in s or '\\' in s: if '/' in name or '\\' in name:
id = '/'.join(re.split('/|\\\\', s)[1:]) name = '/'.join(re.split('/|\\\\', name)[1:])
else:
id = s
if '.' in id:
id = id.split('.', 1)[0]
name = ToCString(id) name = name.split('.', 1)[0]
escaped_id = id.replace('-', '_').replace('/', '_') var = name.replace('-', '_').replace('/', '_')
node_natives_map.append(NODE_NATIVES_MAP.format(**locals())) key = '%s_key' % var
sources.append(SOURCES.format(**locals())) value = '%s_value' % var
node_natives_map = ''.join(node_natives_map) definitions.append(Render(key, name))
sources = ''.join(sources) definitions.append(Render(value, lines))
initializers.append(INITIALIZER.format(key=key, value=value))
# Emit result # Emit result
output = open(str(target[0]), "w") output = open(str(target[0]), "w")
output.write(HEADER_TEMPLATE.format(**locals())) output.write(TEMPLATE.format(definitions=''.join(definitions),
initializers=''.join(initializers)))
output.close() output.close()
def main(): def main():

Loading…
Cancel
Save