Skip to content

Commit

Permalink
lexer: allow TABS for indentation
Browse files Browse the repository at this point in the history
Allow TAB character to be used for indentation.

This is useful to have TAB character used as indentation, especially when
parts of build.ninja are hand-written as HEREDOCs in otherwise
TAB-indented file (either mandated by style for other part of project, or
required by language itself).

Changing lexer is easy thanks to the use of re2c, syntax is perhaps a bit
too permissive now, but that is job of the parser to reject use of mixed
indentation.

Let's stop complaining that:
ninja: error: build.ninja:3: expected 'command =' line
when it is exactly:
	command = cc $cflags -c $in -o $out

tests: print string instead of int for Lexer::Token

Extend C++ tests by a wrapper for Lexer::Token printing, to have string
value reported instead of numeric value, that gives "newline" instead of
"8" in test results.
Original line numbers are kept in error messages as they were before.

Extend python functional test to have TABs used in both variable and build
statements.

Closes #1598
Signed-of-by: Przemek Kitszel <[email protected]>
  • Loading branch information
pkitszel committed Mar 22, 2024
1 parent 9279832 commit fa7a486
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 63 deletions.
8 changes: 7 additions & 1 deletion doc/manual.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,13 @@ one_word_with_no_space = foo$
Other whitespace is only significant if it's at the beginning of a
line. If a line is indented more than the previous one, it's
considered part of its parent's scope; if it is indented less than the
previous one, it closes the previous scope.
previous one, it closes the previous scope. Note that the physical line that is
a line continuation is not considered as a separate line for the scope
determination purposes.

_Available since Ninja 1.12._
Horizontal Tab character (0x09) could be used for indentation. Earlier Ninja
versions allow only space (0x20) character to be used for such purpose.

[[ref_toplevel]]
Top-level variables
Expand Down
24 changes: 24 additions & 0 deletions misc/output_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,30 @@ def test_tool_inputs(self):
out2
''')

def test_tabs_indent(self):
content = '''
rule exec
<TAB>command = $cmd
var_hello = hell$
<TAB>o
build foo: exec
<TAB>cmd = touch foo
build bar: exec $
<TAB>foo
<TAB>cmd = touch bar
build $var_hello: exec
<TAB>cmd = touch $var_hello
build baz: exec $
<TAB>bar $var_hello
<TAB>cmd = touch baz
'''.replace('<TAB>', '\t')
run(content)


if __name__ == '__main__':
unittest.main()
70 changes: 36 additions & 34 deletions src/lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,9 @@ const char* Lexer::TokenErrorHint(Token expected) {

string Lexer::DescribeLastError() {
if (last_token_) {
switch (last_token_[0]) {
case '\t':
return "tabs are not allowed, use spaces";
}
return "lexing error <last token:"+string(last_token_)+">";
}
return "lexing error";
return "lexing error (EOF?)";
}

void Lexer::UnreadToken() {
Expand All @@ -130,7 +127,7 @@ Lexer::Token Lexer::ReadToken() {
unsigned int yyaccept = 0;
static const unsigned char yybm[] = {
0, 128, 128, 128, 128, 128, 128, 128,
128, 128, 0, 128, 128, 128, 128, 128,
128, 160, 0, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
160, 128, 128, 128, 128, 128, 128, 128,
Expand Down Expand Up @@ -164,16 +161,17 @@ Lexer::Token Lexer::ReadToken() {
};
yych = *p;
if (yybm[0+yych] & 32) {
goto yy9;
goto yy6;
}
if (yych <= '^') {
if (yych <= ',') {
if (yych <= '\f') {
if (yych <= 0x00) goto yy2;
if (yych == '\n') goto yy6;
if (yych <= 0x08) goto yy4;
if (yych <= '\n') goto yy9;
goto yy4;
} else {
if (yych <= '\r') goto yy8;
if (yych <= '\r') goto yy11;
if (yych == '#') goto yy12;
goto yy4;
}
Expand Down Expand Up @@ -228,31 +226,32 @@ Lexer::Token Lexer::ReadToken() {
yy5:
{ token = ERROR; break; }
yy6:
++p;
{ token = NEWLINE; break; }
yy8:
yych = *++p;
if (yych == '\n') goto yy28;
goto yy5;
yy9:
yyaccept = 0;
yych = *(q = ++p);
if (yybm[0+yych] & 32) {
goto yy9;
goto yy6;
}
if (yych <= '\f') {
if (yych == '\n') goto yy6;
if (yych <= 0x08) goto yy8;
if (yych <= '\n') goto yy9;
} else {
if (yych <= '\r') goto yy30;
if (yych == '#') goto yy32;
if (yych <= '\r') goto yy28;
if (yych == '#') goto yy30;
}
yy11:
yy8:
{ token = INDENT; break; }
yy9:
++p;
{ token = NEWLINE; break; }
yy11:
yych = *++p;
if (yych == '\n') goto yy32;
goto yy5;
yy12:
yyaccept = 1;
yych = *(q = ++p);
if (yych <= 0x00) goto yy5;
goto yy33;
goto yy31;
yy13:
yych = *++p;
yy14:
Expand Down Expand Up @@ -296,25 +295,27 @@ Lexer::Token Lexer::ReadToken() {
if (yych == '|') goto yy44;
{ token = PIPE; break; }
yy28:
++p;
{ token = NEWLINE; break; }
yy30:
yych = *++p;
if (yych == '\n') goto yy28;
yy31:
if (yych == '\n') goto yy32;
yy29:
p = q;
if (yyaccept == 0) {
goto yy11;
goto yy8;
} else {
goto yy5;
}
yy32:
yy30:
yych = *++p;
yy33:
yy31:
if (yybm[0+yych] & 128) {
goto yy32;
goto yy30;
}
if (yych <= 0x00) goto yy31;
if (yych <= 0x00) goto yy29;
goto yy34;
yy32:
++p;
{ token = NEWLINE; break; }
yy34:
++p;
{ continue; }
yy36:
Expand Down Expand Up @@ -478,7 +479,7 @@ void Lexer::EatWhitespace() {
unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 128, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
128, 0, 0, 0, 0, 0, 0, 0,
Expand Down Expand Up @@ -631,7 +632,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
unsigned char yych;
static const unsigned char yybm[] = {
0, 16, 16, 16, 16, 16, 16, 16,
16, 16, 0, 16, 16, 0, 16, 16,
16, 48, 0, 16, 16, 0, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
32, 16, 16, 16, 0, 16, 16, 16,
Expand Down Expand Up @@ -797,6 +798,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
goto yy113;
yy128:
yych = *++p;
if (yych == '\t') goto yy128;
if (yych == ' ') goto yy128;
{
continue;
Expand Down
21 changes: 9 additions & 12 deletions src/lexer.in.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,9 @@ const char* Lexer::TokenErrorHint(Token expected) {

string Lexer::DescribeLastError() {
if (last_token_) {
switch (last_token_[0]) {
case '\t':
return "tabs are not allowed, use spaces";
}
return "lexing error <last token:"+string(last_token_)+">";
}
return "lexing error";
return "lexing error (EOF?)";
}

void Lexer::UnreadToken() {
Expand All @@ -133,10 +130,10 @@ Lexer::Token Lexer::ReadToken() {
simple_varname = [a-zA-Z0-9_-]+;
varname = [a-zA-Z0-9_.-]+;
[ ]*"#"[^\000\n]*"\n" { continue; }
[ ]*"\r\n" { token = NEWLINE; break; }
[ ]*"\n" { token = NEWLINE; break; }
[ ]+ { token = INDENT; break; }
[ \t]*"#"[^\000\n]*"\n" { continue; }
[ \t]*"\r\n" { token = NEWLINE; break; }
[ \t]*"\n" { token = NEWLINE; break; }
[ \t]+ { token = INDENT; break; }
"build" { token = BUILD; break; }
"pool" { token = POOL; break; }
"rule" { token = RULE; break; }
Expand Down Expand Up @@ -175,7 +172,7 @@ void Lexer::EatWhitespace() {
for (;;) {
ofs_ = p;
/*!re2c
[ ]+ { continue; }
[ \t]+ { continue; }
"$\r\n" { continue; }
"$\n" { continue; }
nul { break; }
Expand Down Expand Up @@ -241,10 +238,10 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
eval->AddText(StringPiece(" ", 1));
continue;
}
"$\r\n"[ ]* {
"$\r\n"[ \t]* {
continue;
}
"$\n"[ ]* {
"$\n"[ \t]* {
continue;
}
"${"varname"}" {
Expand Down
52 changes: 36 additions & 16 deletions src/lexer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,20 @@
#include "eval_env.h"
#include "test.h"

using namespace std;
std::string tok(Lexer::Token t) {
const char *str = Lexer::TokenName(t);
if (!str)
return "TokenOutOfRange: " + std::to_string(t);
return str;
}

#define EXPECT_EQ_TOK(t1, t2) \
EXPECT_EQ(tok(t1), tok(t2))

TEST(Lexer, ReadVarValue) {
Lexer lexer("plain text $var $VaR ${x}\n");
EvalString eval;
string err;
std::string err;
EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("", err);
EXPECT_EQ("[plain text ][$var][ ][$VaR][ ][$x]",
Expand All @@ -32,7 +40,7 @@ TEST(Lexer, ReadVarValue) {
TEST(Lexer, ReadEvalStringEscapes) {
Lexer lexer("$ $$ab c$: $\ncde\n");
EvalString eval;
string err;
std::string err;
EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("", err);
EXPECT_EQ("[ $ab c: cde]",
Expand All @@ -41,7 +49,7 @@ TEST(Lexer, ReadEvalStringEscapes) {

TEST(Lexer, ReadIdent) {
Lexer lexer("foo baR baz_123 foo-bar");
string ident;
std::string ident;
EXPECT_TRUE(lexer.ReadIdent(&ident));
EXPECT_EQ("foo", ident);
EXPECT_TRUE(lexer.ReadIdent(&ident));
Expand All @@ -56,12 +64,12 @@ TEST(Lexer, ReadIdentCurlies) {
// Verify that ReadIdent includes dots in the name,
// but in an expansion $bar.dots stops at the dot.
Lexer lexer("foo.dots $bar.dots ${bar.dots}\n");
string ident;
std::string ident;
EXPECT_TRUE(lexer.ReadIdent(&ident));
EXPECT_EQ("foo.dots", ident);

EvalString eval;
string err;
std::string err;
EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("", err);
EXPECT_EQ("[$bar][.dots ][$bar.dots]",
Expand All @@ -71,7 +79,7 @@ TEST(Lexer, ReadIdentCurlies) {
TEST(Lexer, Error) {
Lexer lexer("foo$\nbad $");
EvalString eval;
string err;
std::string err;
ASSERT_FALSE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n"
"bad $\n"
Expand All @@ -83,16 +91,28 @@ TEST(Lexer, CommentEOF) {
// Verify we don't run off the end of the string when the EOF is
// mid-comment.
Lexer lexer("# foo");
Lexer::Token token = lexer.ReadToken();
EXPECT_EQ(Lexer::ERROR, token);
EXPECT_EQ_TOK(Lexer::ERROR, lexer.ReadToken());
}

TEST(Lexer, Tabs) {
// Verify we print a useful error on a disallowed character.
Lexer lexer(" \tfoobar");
Lexer::Token token = lexer.ReadToken();
EXPECT_EQ(Lexer::INDENT, token);
token = lexer.ReadToken();
EXPECT_EQ(Lexer::ERROR, token);
EXPECT_EQ("tabs are not allowed, use spaces", lexer.DescribeLastError());
Lexer lexer("rule foo\n"
"\tcommand = foobin $in");

EXPECT_EQ_TOK(Lexer::RULE, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken());
}

TEST(Lexer, TabsInVars) {
Lexer lexer("cflags =\n"
"\t-std=c11");

EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
}

0 comments on commit fa7a486

Please sign in to comment.