Tests: Improve coverage for `File::get_csv_line()`
Adds a few more complex edge cases which are supported. Also adds some documentation, simplifies the code a bit and forbids using double quotes as a delimiter.
This commit is contained in:
parent
7eedc2c348
commit
b8c08ba5ad
|
@ -316,52 +316,53 @@ String FileAccess::get_line() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector<String> FileAccess::get_csv_line(const String &p_delim) const {
|
Vector<String> FileAccess::get_csv_line(const String &p_delim) const {
|
||||||
ERR_FAIL_COND_V(p_delim.length() != 1, Vector<String>());
|
ERR_FAIL_COND_V_MSG(p_delim.length() != 1, Vector<String>(), "Only single character delimiters are supported to parse CSV lines.");
|
||||||
|
ERR_FAIL_COND_V_MSG(p_delim[0] == '"', Vector<String>(), "The double quotation mark character (\") is not supported as a delimiter for CSV lines.");
|
||||||
|
|
||||||
String l;
|
String line;
|
||||||
|
|
||||||
|
// CSV can support entries with line breaks as long as they are enclosed
|
||||||
|
// in double quotes. So our "line" might be more than a single line in the
|
||||||
|
// text file.
|
||||||
int qc = 0;
|
int qc = 0;
|
||||||
do {
|
do {
|
||||||
if (eof_reached()) {
|
if (eof_reached()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
line += get_line() + "\n";
|
||||||
l += get_line() + "\n";
|
|
||||||
qc = 0;
|
qc = 0;
|
||||||
for (int i = 0; i < l.length(); i++) {
|
for (int i = 0; i < line.length(); i++) {
|
||||||
if (l[i] == '"') {
|
if (line[i] == '"') {
|
||||||
qc++;
|
qc++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} while (qc % 2);
|
} while (qc % 2);
|
||||||
|
|
||||||
l = l.substr(0, l.length() - 1);
|
// Remove the extraneous newline we've added above.
|
||||||
|
line = line.substr(0, line.length() - 1);
|
||||||
|
|
||||||
Vector<String> strings;
|
Vector<String> strings;
|
||||||
|
|
||||||
bool in_quote = false;
|
bool in_quote = false;
|
||||||
String current;
|
String current;
|
||||||
for (int i = 0; i < l.length(); i++) {
|
for (int i = 0; i < line.length(); i++) {
|
||||||
char32_t c = l[i];
|
char32_t c = line[i];
|
||||||
char32_t s[2] = { 0, 0 };
|
// A delimiter ends the current entry, unless it's in a quoted string.
|
||||||
|
|
||||||
if (!in_quote && c == p_delim[0]) {
|
if (!in_quote && c == p_delim[0]) {
|
||||||
strings.push_back(current);
|
strings.push_back(current);
|
||||||
current = String();
|
current = String();
|
||||||
} else if (c == '"') {
|
} else if (c == '"') {
|
||||||
if (l[i + 1] == '"' && in_quote) {
|
// Doubled quotes are escapes for intentional quotes in the string.
|
||||||
s[0] = '"';
|
if (line[i + 1] == '"' && in_quote) {
|
||||||
current += s;
|
current += '"';
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else {
|
||||||
in_quote = !in_quote;
|
in_quote = !in_quote;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s[0] = c;
|
current += c;
|
||||||
current += s;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
strings.push_back(current);
|
strings.push_back(current);
|
||||||
|
|
||||||
return strings;
|
return strings;
|
||||||
|
|
|
@ -119,8 +119,15 @@
|
||||||
<return type="PackedStringArray" />
|
<return type="PackedStringArray" />
|
||||||
<argument index="0" name="delim" type="String" default="","" />
|
<argument index="0" name="delim" type="String" default="","" />
|
||||||
<description>
|
<description>
|
||||||
Returns the next value of the file in CSV (Comma-Separated Values) format. You can pass a different delimiter [code]delim[/code] to use other than the default [code]","[/code] (comma). This delimiter must be one-character long.
|
Returns the next value of the file in CSV (Comma-Separated Values) format. You can pass a different delimiter [code]delim[/code] to use other than the default [code]","[/code] (comma). This delimiter must be one-character long, and cannot be a double quotation mark.
|
||||||
Text is interpreted as being UTF-8 encoded.
|
Text is interpreted as being UTF-8 encoded. Text values must be enclosed in double quotes if they include the delimiter character. Double quotes within a text value can be escaped by doubling their occurrence.
|
||||||
|
For example, the following CSV lines are valid and will be properly parsed as two strings each:
|
||||||
|
[codeblock]
|
||||||
|
Alice,"Hello, Bob!"
|
||||||
|
Bob,Alice! What a surprise!
|
||||||
|
Alice,"I thought you'd reply with ""Hello, world""."
|
||||||
|
[/codeblock]
|
||||||
|
Note how the second line can omit the enclosing quotes as it does not include the delimiter. However it [i]could[/i] very well use quotes, it was only written without for demonstration purposes. The third line must use [code]""[/code] for each quotation mark that needs to be interpreted as such instead of the end of a text value.
|
||||||
</description>
|
</description>
|
||||||
</method>
|
</method>
|
||||||
<method name="get_double" qualifiers="const">
|
<method name="get_double" qualifiers="const">
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
keys,en,de
|
keys,en,de
|
||||||
GOOD_MORNING,"Good Morning","Guten Morgen"
|
GOOD_MORNING,"Good Morning","Guten Morgen"
|
||||||
GOOD_EVENING,"Good Evening",""
|
GOOD_EVENING,"Good Evening",""
|
||||||
|
Without quotes,"With, comma","With ""inner"" quotes","With ""inner"", quotes"","" and comma","With ""inner
|
||||||
|
split"" quotes and
|
||||||
|
line breaks","With \nnewline chars"
|
||||||
|
Some other~delimiter~should still work, shouldn't it?
|
||||||
|
What about tab separated lines, good?
|
||||||
|
|
Can't render this file because it has a wrong number of fields in line 4.
|
|
@ -37,12 +37,12 @@
|
||||||
namespace TestFileAccess {
|
namespace TestFileAccess {
|
||||||
|
|
||||||
TEST_CASE("[FileAccess] CSV read") {
|
TEST_CASE("[FileAccess] CSV read") {
|
||||||
FileAccess *f = FileAccess::open(TestUtils::get_data_path("translations.csv"), FileAccess::READ);
|
FileAccessRef f = FileAccess::open(TestUtils::get_data_path("translations.csv"), FileAccess::READ);
|
||||||
|
|
||||||
Vector<String> header = f->get_csv_line(); // Default delimiter: ","
|
Vector<String> header = f->get_csv_line(); // Default delimiter: ",".
|
||||||
REQUIRE(header.size() == 3);
|
REQUIRE(header.size() == 3);
|
||||||
|
|
||||||
Vector<String> row1 = f->get_csv_line(",");
|
Vector<String> row1 = f->get_csv_line(","); // Explicit delimiter, should be the same.
|
||||||
REQUIRE(row1.size() == 3);
|
REQUIRE(row1.size() == 3);
|
||||||
CHECK(row1[0] == "GOOD_MORNING");
|
CHECK(row1[0] == "GOOD_MORNING");
|
||||||
CHECK(row1[1] == "Good Morning");
|
CHECK(row1[1] == "Good Morning");
|
||||||
|
@ -53,12 +53,31 @@ TEST_CASE("[FileAccess] CSV read") {
|
||||||
CHECK(row2[0] == "GOOD_EVENING");
|
CHECK(row2[0] == "GOOD_EVENING");
|
||||||
CHECK(row2[1] == "Good Evening");
|
CHECK(row2[1] == "Good Evening");
|
||||||
CHECK(row2[2] == ""); // Use case: not yet translated!
|
CHECK(row2[2] == ""); // Use case: not yet translated!
|
||||||
|
|
||||||
// https://github.com/godotengine/godot/issues/44269
|
// https://github.com/godotengine/godot/issues/44269
|
||||||
CHECK_MESSAGE(row2[2] != "\"", "Should not parse empty string as a single double quote.");
|
CHECK_MESSAGE(row2[2] != "\"", "Should not parse empty string as a single double quote.");
|
||||||
|
|
||||||
|
Vector<String> row3 = f->get_csv_line();
|
||||||
|
REQUIRE(row3.size() == 6);
|
||||||
|
CHECK(row3[0] == "Without quotes");
|
||||||
|
CHECK(row3[1] == "With, comma");
|
||||||
|
CHECK(row3[2] == "With \"inner\" quotes");
|
||||||
|
CHECK(row3[3] == "With \"inner\", quotes\",\" and comma");
|
||||||
|
CHECK(row3[4] == "With \"inner\nsplit\" quotes and\nline breaks");
|
||||||
|
CHECK(row3[5] == "With \\nnewline chars"); // Escaped, not an actual newline.
|
||||||
|
|
||||||
|
Vector<String> row4 = f->get_csv_line("~"); // Custom delimiter, makes inline commas easier.
|
||||||
|
REQUIRE(row4.size() == 3);
|
||||||
|
CHECK(row4[0] == "Some other");
|
||||||
|
CHECK(row4[1] == "delimiter");
|
||||||
|
CHECK(row4[2] == "should still work, shouldn't it?");
|
||||||
|
|
||||||
|
Vector<String> row5 = f->get_csv_line("\t"); // Tab separated variables.
|
||||||
|
REQUIRE(row5.size() == 3);
|
||||||
|
CHECK(row5[0] == "What about");
|
||||||
|
CHECK(row5[1] == "tab separated");
|
||||||
|
CHECK(row5[2] == "lines, good?");
|
||||||
|
|
||||||
f->close();
|
f->close();
|
||||||
memdelete(f);
|
|
||||||
}
|
}
|
||||||
} // namespace TestFileAccess
|
} // namespace TestFileAccess
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue