diff options
Diffstat (limited to 'init')
-rw-r--r-- | init/Android.bp | 1 | ||||
-rw-r--r-- | init/tokenizer_test.cpp | 163 |
2 files changed, 164 insertions, 0 deletions
diff --git a/init/Android.bp b/init/Android.bp index cf7637f96..625fb949a 100644 --- a/init/Android.bp +++ b/init/Android.bp | |||
@@ -179,6 +179,7 @@ cc_test { | |||
179 | "rlimit_parser_test.cpp", | 179 | "rlimit_parser_test.cpp", |
180 | "service_test.cpp", | 180 | "service_test.cpp", |
181 | "subcontext_test.cpp", | 181 | "subcontext_test.cpp", |
182 | "tokenizer_test.cpp", | ||
182 | "ueventd_test.cpp", | 183 | "ueventd_test.cpp", |
183 | "util_test.cpp", | 184 | "util_test.cpp", |
184 | ], | 185 | ], |
diff --git a/init/tokenizer_test.cpp b/init/tokenizer_test.cpp new file mode 100644 index 000000000..acfc7c7d7 --- /dev/null +++ b/init/tokenizer_test.cpp | |||
@@ -0,0 +1,163 @@ | |||
1 | // | ||
2 | // Copyright (C) 2018 The Android Open Source Project | ||
3 | // | ||
4 | // Licensed under the Apache License, Version 2.0 (the "License"); | ||
5 | // you may not use this file except in compliance with the License. | ||
6 | // You may obtain a copy of the License at | ||
7 | // | ||
8 | // http://www.apache.org/licenses/LICENSE-2.0 | ||
9 | // | ||
10 | // Unless required by applicable law or agreed to in writing, software | ||
11 | // distributed under the License is distributed on an "AS IS" BASIS, | ||
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
13 | // See the License for the specific language governing permissions and | ||
14 | // limitations under the License. | ||
15 | // | ||
16 | |||
17 | #include "tokenizer.h" | ||
18 | |||
19 | #include <string> | ||
20 | #include <vector> | ||
21 | |||
22 | #include <gtest/gtest.h> | ||
23 | |||
24 | namespace android { | ||
25 | namespace init { | ||
26 | |||
27 | namespace { | ||
28 | |||
29 | void RunTest(const std::string& data, const std::vector<std::vector<std::string>>& expected_tokens) { | ||
30 | auto data_copy = std::string{data}; | ||
31 | data_copy.push_back('\n'); // TODO: fix tokenizer | ||
32 | data_copy.push_back('\0'); | ||
33 | |||
34 | parse_state state; | ||
35 | state.line = 0; | ||
36 | state.ptr = data_copy.data(); | ||
37 | state.nexttoken = 0; | ||
38 | |||
39 | std::vector<std::string> current_line; | ||
40 | std::vector<std::vector<std::string>> tokens; | ||
41 | |||
42 | while (true) { | ||
43 | switch (next_token(&state)) { | ||
44 | case T_EOF: | ||
45 | EXPECT_EQ(expected_tokens, tokens) << data; | ||
46 | return; | ||
47 | case T_NEWLINE: | ||
48 | tokens.emplace_back(std::move(current_line)); | ||
49 | break; | ||
50 | case T_TEXT: | ||
51 | current_line.emplace_back(state.text); | ||
52 | break; | ||
53 | } | ||
54 | } | ||
55 | } | ||
56 | |||
57 | } // namespace | ||
58 | |||
59 | TEST(tokenizer, null) { | ||
60 | RunTest("", {{}}); | ||
61 | } | ||
62 | |||
63 | TEST(tokenizer, simple_oneline) { | ||
64 | RunTest("one two\tthree\rfour", {{"one", "two", "three", "four"}}); | ||
65 | } | ||
66 | |||
67 | TEST(tokenizer, simple_multiline) { | ||
68 | RunTest("1 2 3\n4 5 6\n7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}}); | ||
69 | } | ||
70 | |||
71 | TEST(tokenizer, preceding_space) { | ||
72 | // Preceding spaces are ignored. | ||
73 | RunTest(" 1 2 3\n\t\t\t\t4 5 6\n\r\r\r\r7 8 9", | ||
74 | {{"1", "2", "3"}, {"4", "5", "6"}, {"7", "8", "9"}}); | ||
75 | } | ||
76 | |||
77 | TEST(tokenizer, comments) { | ||
78 | // Entirely commented lines still produce a T_NEWLINE token for tracking line count. | ||
79 | RunTest("1 2 3\n#4 5 6\n7 8 9", {{"1", "2", "3"}, {}, {"7", "8", "9"}}); | ||
80 | |||
81 | RunTest("#1 2 3\n4 5 6\n7 8 9", {{}, {"4", "5", "6"}, {"7", "8", "9"}}); | ||
82 | |||
83 | RunTest("1 2 3\n4 5 6\n#7 8 9", {{"1", "2", "3"}, {"4", "5", "6"}, {}}); | ||
84 | |||
85 | RunTest("1 2 #3\n4 #5 6\n#7 8 9", {{"1", "2"}, {"4"}, {}}); | ||
86 | } | ||
87 | |||
88 | TEST(tokenizer, control_chars) { | ||
89 | // Literal \n, \r, \t, and \\ produce the control characters \n, \r, \t, and \\ respectively. | ||
90 | // Literal \? produces ? for all other character '?' | ||
91 | |||
92 | RunTest(R"(1 token\ntoken 2)", {{"1", "token\ntoken", "2"}}); | ||
93 | RunTest(R"(1 token\rtoken 2)", {{"1", "token\rtoken", "2"}}); | ||
94 | RunTest(R"(1 token\ttoken 2)", {{"1", "token\ttoken", "2"}}); | ||
95 | RunTest(R"(1 token\\token 2)", {{"1", "token\\token", "2"}}); | ||
96 | RunTest(R"(1 token\btoken 2)", {{"1", "tokenbtoken", "2"}}); | ||
97 | |||
98 | RunTest(R"(1 token\n 2)", {{"1", "token\n", "2"}}); | ||
99 | RunTest(R"(1 token\r 2)", {{"1", "token\r", "2"}}); | ||
100 | RunTest(R"(1 token\t 2)", {{"1", "token\t", "2"}}); | ||
101 | RunTest(R"(1 token\\ 2)", {{"1", "token\\", "2"}}); | ||
102 | RunTest(R"(1 token\b 2)", {{"1", "tokenb", "2"}}); | ||
103 | |||
104 | RunTest(R"(1 \ntoken 2)", {{"1", "\ntoken", "2"}}); | ||
105 | RunTest(R"(1 \rtoken 2)", {{"1", "\rtoken", "2"}}); | ||
106 | RunTest(R"(1 \ttoken 2)", {{"1", "\ttoken", "2"}}); | ||
107 | RunTest(R"(1 \\token 2)", {{"1", "\\token", "2"}}); | ||
108 | RunTest(R"(1 \btoken 2)", {{"1", "btoken", "2"}}); | ||
109 | |||
110 | RunTest(R"(1 \n 2)", {{"1", "\n", "2"}}); | ||
111 | RunTest(R"(1 \r 2)", {{"1", "\r", "2"}}); | ||
112 | RunTest(R"(1 \t 2)", {{"1", "\t", "2"}}); | ||
113 | RunTest(R"(1 \\ 2)", {{"1", "\\", "2"}}); | ||
114 | RunTest(R"(1 \b 2)", {{"1", "b", "2"}}); | ||
115 | } | ||
116 | |||
117 | TEST(tokenizer, cr_lf) { | ||
118 | // \ before \n, \r, or \r\n is interpreted as a line continuation | ||
119 | // Extra whitespace on the next line is eaten, except \r unlike in the above tests. | ||
120 | |||
121 | RunTest("lf\\\ncont", {{"lfcont"}}); | ||
122 | RunTest("lf\\\n \t\t\t\tcont", {{"lfcont"}}); | ||
123 | |||
124 | RunTest("crlf\\\r\ncont", {{"crlfcont"}}); | ||
125 | RunTest("crlf\\\r\n \t\t\t\tcont", {{"crlfcont"}}); | ||
126 | |||
127 | RunTest("cr\\\rcont", {{"crcont"}}); | ||
128 | |||
129 | RunTest("lfspace \\\ncont", {{"lfspace", "cont"}}); | ||
130 | RunTest("lfspace \\\n \t\t\t\tcont", {{"lfspace", "cont"}}); | ||
131 | |||
132 | RunTest("crlfspace \\\r\ncont", {{"crlfspace", "cont"}}); | ||
133 | RunTest("crlfspace \\\r\n \t\t\t\tcont", {{"crlfspace", "cont"}}); | ||
134 | |||
135 | RunTest("crspace \\\rcont", {{"crspace", "cont"}}); | ||
136 | } | ||
137 | |||
138 | TEST(tokenizer, quoted) { | ||
139 | RunTest("\"quoted simple string\"", {{"quoted simple string"}}); | ||
140 | |||
141 | // Unterminated quotes just return T_EOF without any T_NEWLINE. | ||
142 | RunTest("\"unterminated quoted string", {}); | ||
143 | |||
144 | RunTest("\"1 2 3\"\n \"unterminated quoted string", {{"1 2 3"}}); | ||
145 | |||
146 | // Escaping quotes is not allowed and are treated as an unterminated quoted string. | ||
147 | RunTest("\"quoted escaped quote\\\"\"", {}); | ||
148 | RunTest("\"quoted escaped\\\" quote\"", {}); | ||
149 | RunTest("\"\\\"quoted escaped quote\"", {}); | ||
150 | |||
151 | RunTest("\"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n\"", | ||
152 | {{"quoted control characters \\n \\r \\t \\\\ \\b \\\r \\\n \r \n"}}); | ||
153 | |||
154 | RunTest("\"quoted simple string\" \"second quoted string\"", | ||
155 | {{"quoted simple string", "second quoted string"}}); | ||
156 | |||
157 | RunTest("\"# comment quoted string\"", {{"# comment quoted string"}}); | ||
158 | |||
159 | RunTest("\"Adjacent \"\"quoted strings\"", {{"Adjacent quoted strings"}}); | ||
160 | } | ||
161 | |||
162 | } // namespace init | ||
163 | } // namespace android | ||