diff --git a/core/string/char_range.inc b/core/string/char_range.cpp similarity index 99% rename from core/string/char_range.inc rename to core/string/char_range.cpp index 719e0c8965..23822ad224 100644 --- a/core/string/char_range.inc +++ b/core/string/char_range.cpp @@ -1,5 +1,5 @@ /**************************************************************************/ -/* char_range.inc */ +/* char_range.cpp */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -30,19 +30,13 @@ // This file was generated using the `misc/scripts/char_range_fetch.py` script. -#pragma once - -#include "core/typedefs.h" +#include "core/string/char_utils.h" // Unicode Derived Core Properties // Source: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt -struct CharRange { - char32_t start; - char32_t end; -}; - -constexpr inline CharRange xid_start[] = { +const int xid_start_size = 692; +const CharRange xid_start[xid_start_size] = { { 0x41, 0x5a }, { 0x5f, 0x5f }, { 0x61, 0x7a }, @@ -737,7 +731,8 @@ constexpr inline CharRange xid_start[] = { { 0x31350, 0x33479 }, }; -constexpr inline CharRange xid_continue[] = { +const int xid_continue_size = 806; +const CharRange xid_continue[xid_continue_size] = { { 0x30, 0x39 }, { 0x41, 0x5a }, { 0x5f, 0x5f }, @@ -1546,7 +1541,8 @@ constexpr inline CharRange xid_continue[] = { { 0xe0100, 0xe01ef }, }; -constexpr inline CharRange uppercase_letter[] = { +const int uppercase_letter_size = 660; +const CharRange uppercase_letter[uppercase_letter_size] = { { 0x41, 0x5a }, { 0xc0, 0xd6 }, { 0xd8, 0xde }, @@ -2209,7 +2205,8 @@ constexpr inline CharRange uppercase_letter[] = { { 0x1f170, 0x1f189 }, }; -constexpr inline CharRange lowercase_letter[] = { +const int lowercase_letter_size = 677; +const CharRange lowercase_letter[lowercase_letter_size] = { { 0x61, 0x7a }, { 0xaa, 0xaa }, { 0xb5, 0xb5 }, @@ -2889,7 +2886,8 @@ constexpr inline CharRange lowercase_letter[] = { { 0x1e922, 0x1e943 }, }; -constexpr inline CharRange unicode_letter[] = { +const int unicode_letter_size = 761; +const CharRange unicode_letter[unicode_letter_size] = { { 0x41, 0x5a }, { 0x61, 0x7a }, { 0xaa, 0xaa }, diff --git a/core/string/char_utils.h b/core/string/char_utils.h index df52ac1f75..270371d1b2 100644 --- a/core/string/char_utils.h +++ b/core/string/char_utils.h @@ -32,14 +32,28 @@ #include "core/typedefs.h" -#include "char_range.inc" - static constexpr char hex_char_table_upper[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; static constexpr char hex_char_table_lower[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; -#define BSEARCH_CHAR_RANGE(m_array) \ +struct CharRange { + char32_t start; + char32_t end; +}; + +extern const CharRange xid_start[]; +extern const int xid_start_size; +extern const CharRange xid_continue[]; +extern const int xid_continue_size; +extern const CharRange uppercase_letter[]; +extern const int uppercase_letter_size; +extern const CharRange lowercase_letter[]; +extern const int lowercase_letter_size; +extern const CharRange unicode_letter[]; +extern const int unicode_letter_size; + +#define BSEARCH_CHAR_RANGE(m_array, m_size) \ int low = 0; \ - int high = std_size(m_array) - 1; \ + int high = m_size - 1; \ int middle = (low + high) / 2; \ \ while (low <= high) { \ @@ -56,24 +70,24 @@ static constexpr char hex_char_table_lower[16] = { '0', '1', '2', '3', '4', '5', \ return false -constexpr bool is_unicode_identifier_start(char32_t p_char) { - BSEARCH_CHAR_RANGE(xid_start); +inline bool is_unicode_identifier_start(char32_t p_char) { + BSEARCH_CHAR_RANGE(xid_start, xid_start_size); } -constexpr bool is_unicode_identifier_continue(char32_t p_char) { - BSEARCH_CHAR_RANGE(xid_continue); +inline bool is_unicode_identifier_continue(char32_t p_char) { + BSEARCH_CHAR_RANGE(xid_continue, xid_continue_size); } -constexpr bool is_unicode_upper_case(char32_t p_char) { - BSEARCH_CHAR_RANGE(uppercase_letter); +inline bool is_unicode_upper_case(char32_t p_char) { + BSEARCH_CHAR_RANGE(uppercase_letter, uppercase_letter_size); } -constexpr bool is_unicode_lower_case(char32_t p_char) { - BSEARCH_CHAR_RANGE(lowercase_letter); +inline bool is_unicode_lower_case(char32_t p_char) { + BSEARCH_CHAR_RANGE(lowercase_letter, lowercase_letter_size); } -constexpr bool is_unicode_letter(char32_t p_char) { - BSEARCH_CHAR_RANGE(unicode_letter); +inline bool is_unicode_letter(char32_t p_char) { + BSEARCH_CHAR_RANGE(unicode_letter, unicode_letter_size); } #undef BSEARCH_CHAR_RANGE diff --git a/misc/scripts/char_range_fetch.py b/misc/scripts/char_range_fetch.py index 3423c02c2e..336d31b6db 100755 --- a/misc/scripts/char_range_fetch.py +++ b/misc/scripts/char_range_fetch.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Script used to dump char ranges for specific properties from -# the Unicode Character Database to the `char_range.inc` file. +# the Unicode Character Database to the `char_range.cpp` file. # NOTE: This script is deliberately not integrated into the build system; # you should run it manually whenever you want to update the data. from __future__ import annotations @@ -89,7 +89,8 @@ def parse_unicode_data() -> None: def make_array(array_name: str, range_list: list[tuple[int, int]]) -> str: - result: str = f"\n\nconstexpr inline CharRange {array_name}[] = {{\n" + result: str = f"\n\nconst int {array_name}_size = {len(range_list)};\n" + result += f"const CharRange {array_name}[{array_name}_size] = {{\n" for start, end in range_list: result += f"\t{{ 0x{start:x}, 0x{end:x} }},\n" @@ -102,22 +103,16 @@ def make_array(array_name: str, range_list: list[tuple[int, int]]) -> str: def generate_char_range_inc() -> None: parse_unicode_data() - source: str = generate_copyright_header("char_range.inc") + source: str = generate_copyright_header("char_range.cpp") source += f""" // This file was generated using the `misc/scripts/char_range_fetch.py` script. -#pragma once - -#include "core/typedefs.h" +#include "core/string/char_utils.h" // Unicode Derived Core Properties -// Source: {URL} - -struct CharRange {{ -\tchar32_t start; -\tchar32_t end; -}};""" +// Source: {URL}\ +""" source += make_array("xid_start", xid_start) source += make_array("xid_continue", xid_continue) @@ -127,11 +122,11 @@ struct CharRange {{ source += "\n" - char_range_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/char_range.inc") + char_range_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/char_range.cpp") with open(char_range_path, "w", newline="\n") as f: f.write(source) - print("`char_range.inc` generated successfully.") + print("`char_range.cpp` generated successfully.") if __name__ == "__main__": diff --git a/misc/scripts/unicode_ranges_fetch.py b/misc/scripts/unicode_ranges_fetch.py index 23517f53ae..e84ca7545c 100755 --- a/misc/scripts/unicode_ranges_fetch.py +++ b/misc/scripts/unicode_ranges_fetch.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Script used to dump char ranges from -# the Unicode Character Database to the `char_range.inc` file. +# the Unicode Character Database to the `unicode_ranges.inc` file. # NOTE: This script is deliberately not integrated into the build system; # you should run it manually whenever you want to update the data. from __future__ import annotations