Merge pull request #116542 from Ivorforce/char-range-cpp

Move char ranges from `char_range.inc` to `char_range.cpp` (non-constexpr)
This commit is contained in:
Thaddeus Crews 2026-02-21 12:17:02 -06:00
commit a3e84cc2af
No known key found for this signature in database
GPG key ID: 8C6E5FEB5FC03CCC
4 changed files with 50 additions and 43 deletions

View file

@ -1,5 +1,5 @@
/**************************************************************************/
/* char_range.inc */
/* char_range.cpp */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
@ -30,19 +30,13 @@
// This file was generated using the `misc/scripts/char_range_fetch.py` script.
#pragma once
#include "core/typedefs.h"
#include "core/string/char_utils.h"
// Unicode Derived Core Properties
// Source: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
struct CharRange {
char32_t start;
char32_t end;
};
constexpr inline CharRange xid_start[] = {
const int xid_start_size = 692;
const CharRange xid_start[xid_start_size] = {
{ 0x41, 0x5a },
{ 0x5f, 0x5f },
{ 0x61, 0x7a },
@ -737,7 +731,8 @@ constexpr inline CharRange xid_start[] = {
{ 0x31350, 0x33479 },
};
constexpr inline CharRange xid_continue[] = {
const int xid_continue_size = 806;
const CharRange xid_continue[xid_continue_size] = {
{ 0x30, 0x39 },
{ 0x41, 0x5a },
{ 0x5f, 0x5f },
@ -1546,7 +1541,8 @@ constexpr inline CharRange xid_continue[] = {
{ 0xe0100, 0xe01ef },
};
constexpr inline CharRange uppercase_letter[] = {
const int uppercase_letter_size = 660;
const CharRange uppercase_letter[uppercase_letter_size] = {
{ 0x41, 0x5a },
{ 0xc0, 0xd6 },
{ 0xd8, 0xde },
@ -2209,7 +2205,8 @@ constexpr inline CharRange uppercase_letter[] = {
{ 0x1f170, 0x1f189 },
};
constexpr inline CharRange lowercase_letter[] = {
const int lowercase_letter_size = 677;
const CharRange lowercase_letter[lowercase_letter_size] = {
{ 0x61, 0x7a },
{ 0xaa, 0xaa },
{ 0xb5, 0xb5 },
@ -2889,7 +2886,8 @@ constexpr inline CharRange lowercase_letter[] = {
{ 0x1e922, 0x1e943 },
};
constexpr inline CharRange unicode_letter[] = {
const int unicode_letter_size = 761;
const CharRange unicode_letter[unicode_letter_size] = {
{ 0x41, 0x5a },
{ 0x61, 0x7a },
{ 0xaa, 0xaa },

View file

@ -32,14 +32,28 @@
#include "core/typedefs.h"
#include "char_range.inc"
static constexpr char hex_char_table_upper[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
static constexpr char hex_char_table_lower[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
#define BSEARCH_CHAR_RANGE(m_array) \
struct CharRange {
char32_t start;
char32_t end;
};
extern const CharRange xid_start[];
extern const int xid_start_size;
extern const CharRange xid_continue[];
extern const int xid_continue_size;
extern const CharRange uppercase_letter[];
extern const int uppercase_letter_size;
extern const CharRange lowercase_letter[];
extern const int lowercase_letter_size;
extern const CharRange unicode_letter[];
extern const int unicode_letter_size;
#define BSEARCH_CHAR_RANGE(m_array, m_size) \
int low = 0; \
int high = std_size(m_array) - 1; \
int high = m_size - 1; \
int middle = (low + high) / 2; \
\
while (low <= high) { \
@ -56,24 +70,24 @@ static constexpr char hex_char_table_lower[16] = { '0', '1', '2', '3', '4', '5',
\
return false
constexpr bool is_unicode_identifier_start(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_start);
inline bool is_unicode_identifier_start(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_start, xid_start_size);
}
constexpr bool is_unicode_identifier_continue(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_continue);
inline bool is_unicode_identifier_continue(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_continue, xid_continue_size);
}
constexpr bool is_unicode_upper_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(uppercase_letter);
inline bool is_unicode_upper_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(uppercase_letter, uppercase_letter_size);
}
constexpr bool is_unicode_lower_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(lowercase_letter);
inline bool is_unicode_lower_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(lowercase_letter, lowercase_letter_size);
}
constexpr bool is_unicode_letter(char32_t p_char) {
BSEARCH_CHAR_RANGE(unicode_letter);
inline bool is_unicode_letter(char32_t p_char) {
BSEARCH_CHAR_RANGE(unicode_letter, unicode_letter_size);
}
#undef BSEARCH_CHAR_RANGE

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# Script used to dump char ranges for specific properties from
# the Unicode Character Database to the `char_range.inc` file.
# the Unicode Character Database to the `char_range.cpp` file.
# NOTE: This script is deliberately not integrated into the build system;
# you should run it manually whenever you want to update the data.
from __future__ import annotations
@ -89,7 +89,8 @@ def parse_unicode_data() -> None:
def make_array(array_name: str, range_list: list[tuple[int, int]]) -> str:
result: str = f"\n\nconstexpr inline CharRange {array_name}[] = {{\n"
result: str = f"\n\nconst int {array_name}_size = {len(range_list)};\n"
result += f"const CharRange {array_name}[{array_name}_size] = {{\n"
for start, end in range_list:
result += f"\t{{ 0x{start:x}, 0x{end:x} }},\n"
@ -102,22 +103,16 @@ def make_array(array_name: str, range_list: list[tuple[int, int]]) -> str:
def generate_char_range_inc() -> None:
parse_unicode_data()
source: str = generate_copyright_header("char_range.inc")
source: str = generate_copyright_header("char_range.cpp")
source += f"""
// This file was generated using the `misc/scripts/char_range_fetch.py` script.
#pragma once
#include "core/typedefs.h"
#include "core/string/char_utils.h"
// Unicode Derived Core Properties
// Source: {URL}
struct CharRange {{
\tchar32_t start;
\tchar32_t end;
}};"""
// Source: {URL}\
"""
source += make_array("xid_start", xid_start)
source += make_array("xid_continue", xid_continue)
@ -127,11 +122,11 @@ struct CharRange {{
source += "\n"
char_range_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/char_range.inc")
char_range_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/char_range.cpp")
with open(char_range_path, "w", newline="\n") as f:
f.write(source)
print("`char_range.inc` generated successfully.")
print("`char_range.cpp` generated successfully.")
if __name__ == "__main__":

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# Script used to dump char ranges from
# the Unicode Character Database to the `char_range.inc` file.
# the Unicode Character Database to the `unicode_ranges.inc` file.
# NOTE: This script is deliberately not integrated into the build system;
# you should run it manually whenever you want to update the data.
from __future__ import annotations