feat: add dependency
This commit is contained in:
24
javascript-engine/external/boa/boa_interner/Cargo.toml
vendored
Normal file
24
javascript-engine/external/boa/boa_interner/Cargo.toml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "boa_interner"
|
||||
description = "String interner for the Boa JavaScript engine."
|
||||
keywords = ["javascript", "js", "string", "interner"]
|
||||
categories = ["data-structures"]
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
||||
[features]
|
||||
fuzz = ["arbitrary"]
|
||||
|
||||
[dependencies]
|
||||
boa_macros.workspace = true
|
||||
serde = { version = "1.0.152", features = ["derive"], optional = true }
|
||||
phf = { version = "0.11.1", features = ["macros"] }
|
||||
rustc-hash = "1.1.0"
|
||||
static_assertions = "1.1.0"
|
||||
once_cell = "1.17.0"
|
||||
indexmap = "1.9.2"
|
||||
arbitrary = { version = "1", optional = true, features = ["derive"] }
|
||||
78
javascript-engine/external/boa/boa_interner/src/fixed_string.rs
vendored
Normal file
78
javascript-engine/external/boa/boa_interner/src/fixed_string.rs
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
use crate::interned_str::InternedStr;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct FixedString<Char> {
|
||||
inner: Vec<Char>,
|
||||
}
|
||||
|
||||
impl<Char> Default for FixedString<Char> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inner: Vec::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> FixedString<Char> {
|
||||
/// Creates a new, pinned [`FixedString`].
|
||||
pub(super) fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
inner: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the maximum capacity of the [`FixedString`].
|
||||
pub(super) fn capacity(&self) -> usize {
|
||||
self.inner.capacity()
|
||||
}
|
||||
|
||||
/// Returns `true` if the [`FixedString`] has length zero,
|
||||
/// and `false` otherwise.
|
||||
pub(super) fn is_empty(&self) -> bool {
|
||||
self.inner.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> FixedString<Char>
|
||||
where
|
||||
Char: Clone,
|
||||
{
|
||||
/// Tries to push `string` to the [`FixedString`], and returns
|
||||
/// an [`InternedStr`] pointer to the stored `string`, or
|
||||
/// `None` if the capacity is not enough to store `string`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller is responsible for ensuring `self` outlives the returned
|
||||
/// [`InternedStr`].
|
||||
pub(super) unsafe fn push(&mut self, string: &[Char]) -> Option<InternedStr<Char>> {
|
||||
let capacity = self.inner.capacity();
|
||||
(capacity >= self.inner.len() + string.len()).then(|| {
|
||||
// SAFETY:
|
||||
// The caller is responsible for extending the lifetime
|
||||
// of `self` to outlive the return value.
|
||||
unsafe { self.push_unchecked(string) }
|
||||
})
|
||||
}
|
||||
|
||||
/// Pushes `string` to the [`FixedString`], and returns
|
||||
/// an [`InternedStr`] pointer to the stored `string`, without
|
||||
/// checking if the total `capacity` is enough to store `string`,
|
||||
/// and without checking if the string is correctly aligned.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller is responsible for ensuring that `self` outlives the returned
|
||||
/// [`InternedStr`] and that it has enough capacity to store `string` without
|
||||
/// reallocating.
|
||||
pub(super) unsafe fn push_unchecked(&mut self, string: &[Char]) -> InternedStr<Char> {
|
||||
let old_len = self.inner.len();
|
||||
self.inner.extend_from_slice(string);
|
||||
|
||||
// SAFETY: The caller is responsible for extending the lifetime
|
||||
// of `self` to outlive the return value, and for ensuring
|
||||
// the alignment of `string` is correct.
|
||||
let ptr = &self.inner[old_len..self.inner.len()];
|
||||
unsafe { InternedStr::new(ptr.into()) }
|
||||
}
|
||||
}
|
||||
80
javascript-engine/external/boa/boa_interner/src/interned_str.rs
vendored
Normal file
80
javascript-engine/external/boa/boa_interner/src/interned_str.rs
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
use std::{hash::Hash, ptr::NonNull};
|
||||
|
||||
/// Wrapper for an interned str pointer, required to
|
||||
/// quickly check using a hash if a string is inside an [`Interner`][`super::Interner`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This struct could cause Undefined Behaviour on:
|
||||
/// - Use without ensuring the referenced memory is still allocated.
|
||||
/// - Construction of an [`InternedStr`] from an invalid [`NonNull<Char>`] pointer.
|
||||
/// - Construction of an [`InternedStr`] from a [`NonNull<Char>`] pointer
|
||||
/// without checking if the pointed memory of the [`NonNull<Char>`] outlives
|
||||
/// the [`InternedStr`].
|
||||
///
|
||||
/// In general, this should not be used outside of an [`Interner`][`super::Interner`].
|
||||
#[derive(Debug)]
|
||||
pub(super) struct InternedStr<Char> {
|
||||
ptr: NonNull<[Char]>,
|
||||
}
|
||||
|
||||
impl<Char> InternedStr<Char> {
|
||||
/// Create a new interned string from the given `*const u8` pointer,
|
||||
/// length and encoding kind
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Not maintaining the invariants specified on the struct definition
|
||||
/// could cause Undefined Behaviour.
|
||||
pub(super) const unsafe fn new(ptr: NonNull<[Char]>) -> Self {
|
||||
Self { ptr }
|
||||
}
|
||||
|
||||
/// Returns a shared reference to the underlying string.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Not maintaining the invariants specified on the struct definition
|
||||
/// could cause Undefined Behaviour.
|
||||
pub(super) unsafe fn as_ref(&self) -> &[Char] {
|
||||
// SAFETY:
|
||||
// The caller must ensure `ptr` is still valid throughout the
|
||||
// lifetime of `self`.
|
||||
unsafe { self.ptr.as_ref() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> Clone for InternedStr<Char> {
|
||||
fn clone(&self) -> Self {
|
||||
Self { ptr: self.ptr }
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> Copy for InternedStr<Char> {}
|
||||
|
||||
impl<Char> Eq for InternedStr<Char> where Char: Eq {}
|
||||
|
||||
impl<Char> PartialEq for InternedStr<Char>
|
||||
where
|
||||
Char: PartialEq,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
// SAFETY: The caller must verify the invariants
|
||||
// specified in the struct definition.
|
||||
unsafe { self.as_ref() == other.as_ref() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> Hash for InternedStr<Char>
|
||||
where
|
||||
Char: Hash,
|
||||
{
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
// SAFETY:
|
||||
// The caller must ensure `ptr` is still valid throughout the
|
||||
// lifetime of `self`.
|
||||
unsafe {
|
||||
self.as_ref().hash(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
462
javascript-engine/external/boa/boa_interner/src/lib.rs
vendored
Normal file
462
javascript-engine/external/boa/boa_interner/src/lib.rs
vendored
Normal file
@@ -0,0 +1,462 @@
|
||||
//! Boa's **`boa_interner`** is a string interner for compiler performance.
|
||||
//!
|
||||
//! # Crate Overview
|
||||
//! The idea behind using a string interner is that in most of the code, strings such as
|
||||
//! identifiers and literals are often repeated. This causes extra burden when comparing them and
|
||||
//! storing them. A string interner stores a unique `usize` symbol for each string, making sure
|
||||
//! that there are no duplicates. This makes it much easier to compare, since it's just comparing
|
||||
//! to `usize`, and also it's easier to store, since instead of a heap-allocated string, you only
|
||||
//! need to store a `usize`. This reduces memory consumption and improves performance in the
|
||||
//! compiler.
|
||||
//!
|
||||
//! # About Boa
|
||||
//! Boa is an open-source, experimental ECMAScript Engine written in Rust for lexing, parsing and executing ECMAScript/JavaScript. Currently, Boa
|
||||
//! supports some of the [language][boa-conformance]. More information can be viewed at [Boa's website][boa-web].
|
||||
//!
|
||||
//! Try out the most recent release with Boa's live demo [playground][boa-playground].
|
||||
//!
|
||||
//! # Boa Crates
|
||||
//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree.
|
||||
//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution.
|
||||
//! - **`boa_gc`** - Boa's garbage collector.
|
||||
//! - **`boa_interner`** - Boa's string interner.
|
||||
//! - **`boa_parser`** - Boa's lexer and parser.
|
||||
//! - **`boa_profiler`** - Boa's code profiler.
|
||||
//! - **`boa_unicode`** - Boa's Unicode identifier.
|
||||
//! - **`boa_icu_provider`** - Boa's ICU4X data provider.
|
||||
//!
|
||||
//! [boa-conformance]: https://boa-dev.github.io/boa/test262/
|
||||
//! [boa-web]: https://boa-dev.github.io/
|
||||
//! [boa-playground]: https://boa-dev.github.io/boa/playground/
|
||||
|
||||
#![doc(
|
||||
html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg",
|
||||
html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg"
|
||||
)]
|
||||
#![cfg_attr(not(test), forbid(clippy::unwrap_used))]
|
||||
#![warn(missing_docs, clippy::dbg_macro)]
|
||||
#![deny(
|
||||
// rustc lint groups https://doc.rust-lang.org/rustc/lints/groups.html
|
||||
warnings,
|
||||
future_incompatible,
|
||||
let_underscore,
|
||||
nonstandard_style,
|
||||
rust_2018_compatibility,
|
||||
rust_2018_idioms,
|
||||
rust_2021_compatibility,
|
||||
unused,
|
||||
|
||||
// rustc allowed-by-default lints https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html
|
||||
macro_use_extern_crate,
|
||||
meta_variable_misuse,
|
||||
missing_abi,
|
||||
missing_copy_implementations,
|
||||
missing_debug_implementations,
|
||||
non_ascii_idents,
|
||||
noop_method_call,
|
||||
trivial_casts,
|
||||
trivial_numeric_casts,
|
||||
unreachable_pub,
|
||||
unsafe_op_in_unsafe_fn,
|
||||
unused_crate_dependencies,
|
||||
unused_import_braces,
|
||||
unused_lifetimes,
|
||||
unused_qualifications,
|
||||
unused_tuple_struct_fields,
|
||||
variant_size_differences,
|
||||
|
||||
// rustdoc lints https://doc.rust-lang.org/rustdoc/lints.html
|
||||
rustdoc::broken_intra_doc_links,
|
||||
rustdoc::private_intra_doc_links,
|
||||
rustdoc::missing_crate_level_docs,
|
||||
rustdoc::private_doc_tests,
|
||||
rustdoc::invalid_codeblock_attributes,
|
||||
rustdoc::invalid_rust_codeblocks,
|
||||
rustdoc::bare_urls,
|
||||
|
||||
// clippy categories https://doc.rust-lang.org/clippy/
|
||||
clippy::all,
|
||||
clippy::correctness,
|
||||
clippy::suspicious,
|
||||
clippy::style,
|
||||
clippy::complexity,
|
||||
clippy::perf,
|
||||
clippy::pedantic,
|
||||
clippy::nursery,
|
||||
)]
|
||||
#![allow(
|
||||
clippy::redundant_pub_crate,
|
||||
// TODO deny once false positive is fixed (https://github.com/rust-lang/rust-clippy/issues/9626).
|
||||
clippy::trait_duplication_in_bounds
|
||||
)]
|
||||
|
||||
extern crate static_assertions as sa;
|
||||
|
||||
mod fixed_string;
|
||||
mod interned_str;
|
||||
mod raw;
|
||||
mod sym;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use raw::RawInterner;
|
||||
use std::borrow::Cow;
|
||||
|
||||
pub use sym::*;
|
||||
|
||||
/// An enumeration of all slice types [`Interner`] can internally store.
|
||||
///
|
||||
/// This struct allows us to intern either `UTF-8` or `UTF-16` str references, which are the two
|
||||
/// encodings [`Interner`] can store.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum JStrRef<'a> {
|
||||
/// A `UTF-8` string reference.
|
||||
Utf8(&'a str),
|
||||
|
||||
/// A `UTF-16` string reference.
|
||||
Utf16(&'a [u16]),
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for JStrRef<'a> {
|
||||
fn from(s: &'a str) -> Self {
|
||||
JStrRef::Utf8(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a [u16]> for JStrRef<'a> {
|
||||
fn from(s: &'a [u16]) -> Self {
|
||||
JStrRef::Utf16(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, const N: usize> From<&'a [u16; N]> for JStrRef<'a> {
|
||||
fn from(s: &'a [u16; N]) -> Self {
|
||||
JStrRef::Utf16(s)
|
||||
}
|
||||
}
|
||||
|
||||
/// A double reference to an interned string inside [`Interner`].
|
||||
///
|
||||
/// [`JSInternedStrRef::utf8`] returns an [`Option`], since not every `UTF-16` string is fully
|
||||
/// representable as a `UTF-8` string (because of unpaired surrogates). However, every `UTF-8`
|
||||
/// string is representable as a `UTF-16` string, so `JSInternedStrRef::utf8` returns a
|
||||
/// [<code>&\[u16\]</code>][std::slice].
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct JSInternedStrRef<'a, 'b> {
|
||||
utf8: Option<&'a str>,
|
||||
utf16: &'b [u16],
|
||||
}
|
||||
|
||||
impl<'a, 'b> JSInternedStrRef<'a, 'b> {
|
||||
/// Returns the inner reference to the interned string in `UTF-8` encoding.
|
||||
/// if the string is not representable in `UTF-8`, returns [`None`]
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub const fn utf8(&self) -> Option<&'a str> {
|
||||
self.utf8
|
||||
}
|
||||
|
||||
/// Returns the inner reference to the interned string in `UTF-16` encoding.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub const fn utf16(&self) -> &'b [u16] {
|
||||
self.utf16
|
||||
}
|
||||
|
||||
/// Joins the result of both possible strings into a common type.
|
||||
///
|
||||
/// If `self` is representable by a `UTF-8` string and the `prioritize_utf8` argument is set,
|
||||
/// it will prioritize calling `f`, and will only call `g` if `self` is only representable by a
|
||||
/// `UTF-16` string. Otherwise, it will directly call `g`.
|
||||
pub fn join<F, G, T>(self, f: F, g: G, prioritize_utf8: bool) -> T
|
||||
where
|
||||
F: FnOnce(&'a str) -> T,
|
||||
G: FnOnce(&'b [u16]) -> T,
|
||||
{
|
||||
if prioritize_utf8 {
|
||||
if let Some(str) = self.utf8 {
|
||||
return f(str);
|
||||
}
|
||||
}
|
||||
g(self.utf16)
|
||||
}
|
||||
|
||||
/// Same as [`join`][`JSInternedStrRef::join`], but where you can pass an additional context.
|
||||
///
|
||||
/// Useful when you have a `&mut Context` context that cannot be borrowed by both closures at
|
||||
/// the same time.
|
||||
pub fn join_with_context<C, F, G, T>(self, f: F, g: G, ctx: C, prioritize_utf8: bool) -> T
|
||||
where
|
||||
F: FnOnce(&'a str, C) -> T,
|
||||
G: FnOnce(&'b [u16], C) -> T,
|
||||
{
|
||||
if prioritize_utf8 {
|
||||
if let Some(str) = self.utf8 {
|
||||
return f(str, ctx);
|
||||
}
|
||||
}
|
||||
g(self.utf16, ctx)
|
||||
}
|
||||
|
||||
/// Converts both string types into a common type `C`.
|
||||
///
|
||||
/// If `self` is representable by a `UTF-8` string and the `prioritize_utf8` argument is set, it
|
||||
/// will prioritize converting its `UTF-8` representation first, and will only convert its
|
||||
/// `UTF-16` representation if it is only representable by a `UTF-16` string. Otherwise, it will
|
||||
/// directly convert its `UTF-16` representation.
|
||||
pub fn into_common<C>(self, prioritize_utf8: bool) -> C
|
||||
where
|
||||
C: From<&'a str> + From<&'b [u16]>,
|
||||
{
|
||||
self.join(Into::into, Into::into, prioritize_utf8)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for JSInternedStrRef<'_, '_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.join_with_context(
|
||||
std::fmt::Display::fmt,
|
||||
|js, f| {
|
||||
char::decode_utf16(js.iter().copied())
|
||||
.map(|r| match r {
|
||||
Ok(c) => String::from(c),
|
||||
Err(e) => format!("\\u{:04X}", e.unpaired_surrogate()),
|
||||
})
|
||||
.collect::<String>()
|
||||
.fmt(f)
|
||||
},
|
||||
f,
|
||||
true,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// The string interner for Boa.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Interner {
|
||||
utf8_interner: RawInterner<u8>,
|
||||
utf16_interner: RawInterner<u16>,
|
||||
}
|
||||
|
||||
impl Interner {
|
||||
/// Creates a new [`Interner`].
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Creates a new [`Interner`] with the specified capacity.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
utf8_interner: RawInterner::with_capacity(capacity),
|
||||
utf16_interner: RawInterner::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of strings interned by the interner.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn len(&self) -> usize {
|
||||
// `utf16_interner.len()` == `utf8_interner.len()`,
|
||||
// so we can use any of them.
|
||||
COMMON_STRINGS_UTF8.len() + self.utf16_interner.len()
|
||||
}
|
||||
|
||||
/// Returns `true` if the [`Interner`] contains no interned strings.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
COMMON_STRINGS_UTF8.is_empty() && self.utf16_interner.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the symbol for the given string if any.
|
||||
///
|
||||
/// Can be used to query if a string has already been interned without interning.
|
||||
pub fn get<'a, T>(&self, string: T) -> Option<Sym>
|
||||
where
|
||||
T: Into<JStrRef<'a>>,
|
||||
{
|
||||
let string = string.into();
|
||||
Self::get_common(string).or_else(|| {
|
||||
let index = match string {
|
||||
JStrRef::Utf8(s) => self.utf8_interner.get(s.as_bytes()),
|
||||
JStrRef::Utf16(s) => self.utf16_interner.get(s),
|
||||
};
|
||||
// SAFETY:
|
||||
// `get_or_intern/get_or_intern_static` already have checks to avoid returning indices
|
||||
// that could cause overflows, meaning the indices returned by
|
||||
// `idx + 1 + COMMON_STRINGS_UTF8.len()` cannot cause overflows.
|
||||
unsafe { index.map(|i| Sym::new_unchecked(i + 1 + COMMON_STRINGS_UTF8.len())) }
|
||||
})
|
||||
}
|
||||
|
||||
/// Interns the given string.
|
||||
///
|
||||
/// Returns a symbol for resolution into the original string.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the interner already interns the maximum number of strings possible by the chosen symbol type.
|
||||
pub fn get_or_intern<'a, T>(&mut self, string: T) -> Sym
|
||||
where
|
||||
T: Into<JStrRef<'a>>,
|
||||
{
|
||||
let string = string.into();
|
||||
self.get(string).unwrap_or_else(|| {
|
||||
let (utf8, utf16) = match string {
|
||||
JStrRef::Utf8(s) => (
|
||||
Some(Cow::Borrowed(s)),
|
||||
Cow::Owned(s.encode_utf16().collect()),
|
||||
),
|
||||
JStrRef::Utf16(s) => (String::from_utf16(s).ok().map(Cow::Owned), Cow::Borrowed(s)),
|
||||
};
|
||||
|
||||
// We need a way to check for the strings that can be interned by `utf16_interner` but
|
||||
// not by `utf8_interner` (since there are some UTF-16 strings with surrogates that are
|
||||
// not representable in UTF-8), so we use the sentinel value `""` as a marker indicating
|
||||
// that the `Sym` corresponding to that string is only available in `utf16_interner`.
|
||||
//
|
||||
// We don't need to worry about matches with `""` inside `get`, because
|
||||
// `COMMON_STRINGS_UTF8` filters all the empty strings before interning.
|
||||
let index = if let Some(utf8) = utf8 {
|
||||
self.utf8_interner.intern(utf8.as_bytes())
|
||||
} else {
|
||||
self.utf8_interner.intern_static(b"")
|
||||
};
|
||||
|
||||
let utf16_index = self.utf16_interner.intern(&utf16);
|
||||
|
||||
// Just to check everything is okay
|
||||
assert_eq!(index, utf16_index);
|
||||
|
||||
index
|
||||
.checked_add(1 + COMMON_STRINGS_UTF8.len())
|
||||
.and_then(Sym::new)
|
||||
.expect("Cannot intern new string: integer overflow")
|
||||
})
|
||||
}
|
||||
|
||||
/// Interns the given `'static` string.
|
||||
///
|
||||
/// Returns a symbol for resolution into the original string.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// This is more efficient than [`Interner::get_or_intern`], since it avoids allocating space
|
||||
/// for one `string` inside the [`Interner`], with the disadvantage that you need to provide
|
||||
/// both the `UTF-8` and the `UTF-16` representation of the string.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the interner already interns the maximum number of strings possible by the chosen symbol type.
|
||||
pub fn get_or_intern_static(&mut self, utf8: &'static str, utf16: &'static [u16]) -> Sym {
|
||||
// Uses the utf8 because it's quicker to check inside `COMMON_STRINGS_UTF8`
|
||||
// (which is a perfect hash set) than to check inside `COMMON_STRINGS_UTF16`
|
||||
// (which is a lazy static hash set).
|
||||
self.get(utf8).unwrap_or_else(|| {
|
||||
let index = self.utf8_interner.intern(utf8.as_bytes());
|
||||
let utf16_index = self.utf16_interner.intern(utf16);
|
||||
|
||||
// Just to check everything is okay
|
||||
debug_assert_eq!(index, utf16_index);
|
||||
|
||||
index
|
||||
.checked_add(1 + COMMON_STRINGS_UTF8.len())
|
||||
.and_then(Sym::new)
|
||||
.expect("Cannot intern new string: integer overflow")
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the string for the given symbol if any.
|
||||
#[must_use]
|
||||
pub fn resolve(&self, symbol: Sym) -> Option<JSInternedStrRef<'_, '_>> {
|
||||
let index = symbol.get() - 1;
|
||||
|
||||
if let Some(utf8) = COMMON_STRINGS_UTF8.index(index).copied() {
|
||||
let utf16 = COMMON_STRINGS_UTF16
|
||||
.get_index(index)
|
||||
.copied()
|
||||
.expect("The sizes of both statics must be equal");
|
||||
return Some(JSInternedStrRef {
|
||||
utf8: Some(utf8),
|
||||
utf16,
|
||||
});
|
||||
}
|
||||
|
||||
let index = index - COMMON_STRINGS_UTF8.len();
|
||||
|
||||
if let Some(utf16) = self.utf16_interner.index(index) {
|
||||
let index = index - (self.utf16_interner.len() - self.utf8_interner.len());
|
||||
// SAFETY:
|
||||
// We only manipulate valid UTF-8 `str`s and convert them to `[u8]` for convenience,
|
||||
// so converting back to a `str` is safe.
|
||||
let utf8 = unsafe {
|
||||
std::str::from_utf8_unchecked(
|
||||
self.utf8_interner
|
||||
.index(index)
|
||||
.expect("both interners must have the same size"),
|
||||
)
|
||||
};
|
||||
return Some(JSInternedStrRef {
|
||||
utf8: if utf8.is_empty() { None } else { Some(utf8) },
|
||||
utf16,
|
||||
});
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the string for the given symbol.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the interner cannot resolve the given symbol.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn resolve_expect(&self, symbol: Sym) -> JSInternedStrRef<'_, '_> {
|
||||
self.resolve(symbol).expect("string disappeared")
|
||||
}
|
||||
|
||||
/// Gets the symbol of the common string if one of them
|
||||
fn get_common(string: JStrRef<'_>) -> Option<Sym> {
|
||||
match string {
|
||||
JStrRef::Utf8(s) => COMMON_STRINGS_UTF8.get_index(s).map(|idx| {
|
||||
// SAFETY: `idx >= 0`, since it's an `usize`, and `idx + 1 > 0`.
|
||||
// In this case, we don't need to worry about overflows because we have a static
|
||||
// assertion in place checking that `COMMON_STRINGS.len() < usize::MAX`.
|
||||
unsafe { Sym::new_unchecked(idx + 1) }
|
||||
}),
|
||||
JStrRef::Utf16(s) => COMMON_STRINGS_UTF16.get_index_of(&s).map(|idx| {
|
||||
// SAFETY: `idx >= 0`, since it's an `usize`, and `idx + 1 > 0`.
|
||||
// In this case, we don't need to worry about overflows because we have a static
|
||||
// assertion in place checking that `COMMON_STRINGS.len() < usize::MAX`.
|
||||
unsafe { Sym::new_unchecked(idx + 1) }
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements the display formatting with indentation.
|
||||
pub trait ToIndentedString {
|
||||
/// Converts the element to a string using an interner, with the given indentation.
|
||||
fn to_indented_string(&self, interner: &Interner, indentation: usize) -> String;
|
||||
}
|
||||
|
||||
/// Converts a given element to a string using an interner.
|
||||
pub trait ToInternedString {
|
||||
/// Converts a given element to a string using an interner.
|
||||
fn to_interned_string(&self, interner: &Interner) -> String;
|
||||
}
|
||||
|
||||
impl<T> ToInternedString for T
|
||||
where
|
||||
T: ToIndentedString,
|
||||
{
|
||||
fn to_interned_string(&self, interner: &Interner) -> String {
|
||||
self.to_indented_string(interner, 0)
|
||||
}
|
||||
}
|
||||
190
javascript-engine/external/boa/boa_interner/src/raw.rs
vendored
Normal file
190
javascript-engine/external/boa/boa_interner/src/raw.rs
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
use crate::{fixed_string::FixedString, interned_str::InternedStr};
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::hash::Hash;
|
||||
|
||||
/// Raw string interner, generic by a char type.
|
||||
#[derive(Debug)]
|
||||
pub(super) struct RawInterner<Char> {
|
||||
// COMMENT FOR DEVS:
|
||||
// This interner works on the assumption that
|
||||
// `head` won't ever be reallocated, since this could invalidate
|
||||
// some of our stored pointers inside `spans`.
|
||||
// This means that any operation on `head` and `full` should be carefully
|
||||
// reviewed to not cause Undefined Behaviour.
|
||||
// `intern` has a more thorough explanation on this.
|
||||
//
|
||||
// Also, if you want to implement `shrink_to_fit` (and friends),
|
||||
// please check out https://github.com/Robbepop/string-interner/pull/47 first.
|
||||
// This doesn't implement that method, since implementing it increases
|
||||
// our memory footprint.
|
||||
symbol_cache: FxHashMap<InternedStr<Char>, usize>,
|
||||
spans: Vec<InternedStr<Char>>,
|
||||
head: FixedString<Char>,
|
||||
full: Vec<FixedString<Char>>,
|
||||
}
|
||||
|
||||
impl<Char> Default for RawInterner<Char> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
symbol_cache: FxHashMap::default(),
|
||||
spans: Vec::default(),
|
||||
head: FixedString::default(),
|
||||
full: Vec::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> RawInterner<Char> {
|
||||
/// Creates a new `RawInterner` with the specified capacity.
|
||||
pub(super) fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
symbol_cache: FxHashMap::default(),
|
||||
spans: Vec::with_capacity(capacity),
|
||||
head: FixedString::new(capacity),
|
||||
full: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of strings interned by the interner.
|
||||
pub(super) fn len(&self) -> usize {
|
||||
self.spans.len()
|
||||
}
|
||||
|
||||
/// Returns `true` if the interner contains no interned strings.
|
||||
pub(super) fn is_empty(&self) -> bool {
|
||||
self.spans.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> RawInterner<Char>
|
||||
where
|
||||
Char: Hash + Eq,
|
||||
{
|
||||
/// Returns the index position for the given string if any.
|
||||
///
|
||||
/// Can be used to query if a string has already been interned without interning.
|
||||
pub(super) fn get(&self, string: &[Char]) -> Option<usize> {
|
||||
// SAFETY:
|
||||
// `string` is a valid slice that doesn't outlive the
|
||||
// created `InternedStr`, so this is safe.
|
||||
unsafe {
|
||||
self.symbol_cache
|
||||
.get(&InternedStr::new(string.into()))
|
||||
.copied()
|
||||
}
|
||||
}
|
||||
|
||||
/// Interns the given `'static` string.
|
||||
///
|
||||
/// Returns the index of `string` within the interner.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// This is more efficient than [`RawInterner::intern`], since it
|
||||
/// avoids storing `string` inside the interner.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the interner already interns the maximum number of strings possible
|
||||
/// by the chosen symbol type.
|
||||
pub(super) fn intern_static(&mut self, string: &'static [Char]) -> usize {
|
||||
// SAFETY:
|
||||
// A static string reference is always valid, meaning it cannot outlive
|
||||
// the lifetime of the created `InternedStr`. This makes this
|
||||
// operation safe.
|
||||
let string = unsafe { InternedStr::new(string.into()) };
|
||||
|
||||
// SAFETY:
|
||||
// A `InternedStr` created from a static reference
|
||||
// cannot be invalidated by allocations and deallocations,
|
||||
// so this is safe.
|
||||
unsafe { self.next_index(string) }
|
||||
}
|
||||
|
||||
/// Returns the string for the given index if any.
|
||||
pub(super) fn index(&self, index: usize) -> Option<&[Char]> {
|
||||
self.spans.get(index).map(|ptr|
|
||||
// SAFETY: We always ensure the stored `InternedStr`s always
|
||||
// reference memory inside `head` and `full`
|
||||
unsafe {ptr.as_ref()})
|
||||
}
|
||||
|
||||
/// Inserts a new string pointer into `spans` and returns its index.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must ensure `string` points to a valid
|
||||
/// memory inside `head` (or only valid in the case of statics)
|
||||
/// and that it won't be invalidated by allocations and deallocations.
|
||||
unsafe fn next_index(&mut self, string: InternedStr<Char>) -> usize {
|
||||
let next = self.len();
|
||||
self.spans.push(string);
|
||||
self.symbol_cache.insert(string, next);
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
impl<Char> RawInterner<Char>
|
||||
where
|
||||
Char: Hash + Eq + Clone,
|
||||
{
|
||||
/// Interns the given string.
|
||||
///
|
||||
/// Returns the index of `string` within the interner.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the interner already interns the maximum number of strings possible by the chosen symbol type.
|
||||
pub(super) fn intern(&mut self, string: &[Char]) -> usize {
|
||||
// SAFETY:
|
||||
//
|
||||
// Firstly, this interner works on the assumption that the allocated
|
||||
// memory by `head` won't ever be moved from its position on the heap,
|
||||
// which is an important point to understand why manipulating it like
|
||||
// this is safe.
|
||||
//
|
||||
// `String` (which is simply a `Vec<u8>` with additional invariants)
|
||||
// is essentially a pointer to heap memory that can be moved without
|
||||
// any problems, since copying a pointer cannot invalidate the memory
|
||||
// that it points to.
|
||||
//
|
||||
// However, `String` CAN be invalidated when pushing, extending or
|
||||
// shrinking it, since all those operations reallocate on the heap.
|
||||
//
|
||||
// To prevent that, we HAVE to ensure the capacity will succeed without
|
||||
// having to reallocate, and the only way to do that without invalidating
|
||||
// any other alive `InternedStr` is to create a brand new `head` with
|
||||
// enough capacity and push the old `head` to `full` to keep it alive
|
||||
// throughout the lifetime of the whole interner.
|
||||
//
|
||||
// `FixedString` encapsulates this by only allowing checked `push`es
|
||||
// to the internal string, but we still have to ensure the memory
|
||||
// of `head` is not deallocated until the whole interner deallocates,
|
||||
// which we can do by moving it inside the interner itself, specifically
|
||||
// on the `full` vector, where every other old `head` also lives.
|
||||
let interned_str = unsafe {
|
||||
self.head.push(string).unwrap_or_else(|| {
|
||||
let new_cap =
|
||||
(usize::max(self.head.capacity(), string.len()) + 1).next_power_of_two();
|
||||
let new_head = FixedString::new(new_cap);
|
||||
let old_head = std::mem::replace(&mut self.head, new_head);
|
||||
|
||||
// If the user creates an `Interner`
|
||||
// with `Interner::with_capacity(BIG_NUMBER)` and
|
||||
// the first interned string's length is bigger than `BIG_NUMBER`,
|
||||
// `self.full.push(old_head)` would push a big, empty string of
|
||||
// allocated size `BIG_NUMBER` into `full`.
|
||||
// This prevents that case.
|
||||
if !old_head.is_empty() {
|
||||
self.full.push(old_head);
|
||||
}
|
||||
self.head.push_unchecked(string)
|
||||
})
|
||||
};
|
||||
|
||||
// SAFETY: We are obtaining a pointer to the internal memory of
|
||||
// `head`, which is alive through the whole life of the interner, so
|
||||
// this is safe.
|
||||
unsafe { self.next_index(interned_str) }
|
||||
}
|
||||
}
|
||||
204
javascript-engine/external/boa/boa_interner/src/sym.rs
vendored
Normal file
204
javascript-engine/external/boa/boa_interner/src/sym.rs
vendored
Normal file
@@ -0,0 +1,204 @@
|
||||
use boa_macros::utf16;
|
||||
use indexmap::IndexSet;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// The string symbol type for Boa.
|
||||
///
|
||||
/// This symbol type is internally a `NonZeroUsize`, which makes it pointer-width in size and it's
|
||||
/// optimized so that it can occupy 1 pointer width even in an `Option` type.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "serde", serde(transparent))]
|
||||
#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))]
|
||||
#[allow(clippy::unsafe_derive_deserialize)]
|
||||
pub struct Sym {
|
||||
value: NonZeroUsize,
|
||||
}
|
||||
|
||||
impl Sym {
|
||||
/// Symbol for the empty string (`""`).
|
||||
pub const EMPTY_STRING: Self = unsafe { Self::new_unchecked(1) };
|
||||
|
||||
/// Symbol for the `"arguments"` string.
|
||||
pub const ARGUMENTS: Self = unsafe { Self::new_unchecked(2) };
|
||||
|
||||
/// Symbol for the `"await"` string.
|
||||
pub const AWAIT: Self = unsafe { Self::new_unchecked(3) };
|
||||
|
||||
/// Symbol for the `"yield"` string.
|
||||
pub const YIELD: Self = unsafe { Self::new_unchecked(4) };
|
||||
|
||||
/// Symbol for the `"eval"` string.
|
||||
pub const EVAL: Self = unsafe { Self::new_unchecked(5) };
|
||||
|
||||
/// Symbol for the `"default"` string.
|
||||
pub const DEFAULT: Self = unsafe { Self::new_unchecked(6) };
|
||||
|
||||
/// Symbol for the `"null"` string.
|
||||
pub const NULL: Self = unsafe { Self::new_unchecked(7) };
|
||||
|
||||
/// Symbol for the `"RegExp"` string.
|
||||
pub const REGEXP: Self = unsafe { Self::new_unchecked(8) };
|
||||
|
||||
/// Symbol for the `"get"` string.
|
||||
pub const GET: Self = unsafe { Self::new_unchecked(9) };
|
||||
|
||||
/// Symbol for the `"set"` string.
|
||||
pub const SET: Self = unsafe { Self::new_unchecked(10) };
|
||||
|
||||
/// Symbol for the `"<main>"` string.
|
||||
pub const MAIN: Self = unsafe { Self::new_unchecked(11) };
|
||||
|
||||
/// Symbol for the `"raw"` string.
|
||||
pub const RAW: Self = unsafe { Self::new_unchecked(12) };
|
||||
|
||||
/// Symbol for the `"static"` string.
|
||||
pub const STATIC: Self = unsafe { Self::new_unchecked(13) };
|
||||
|
||||
/// Symbol for the `"prototype"` string.
|
||||
pub const PROTOTYPE: Self = unsafe { Self::new_unchecked(14) };
|
||||
|
||||
/// Symbol for the `"constructor"` string.
|
||||
pub const CONSTRUCTOR: Self = unsafe { Self::new_unchecked(15) };
|
||||
|
||||
/// Symbol for the `"implements"` string.
|
||||
pub const IMPLEMENTS: Self = unsafe { Self::new_unchecked(16) };
|
||||
|
||||
/// Symbol for the `"interface"` string.
|
||||
pub const INTERFACE: Self = unsafe { Self::new_unchecked(17) };
|
||||
|
||||
/// Symbol for the `"let"` string.
|
||||
pub const LET: Self = unsafe { Self::new_unchecked(18) };
|
||||
|
||||
/// Symbol for the `"package"` string.
|
||||
pub const PACKAGE: Self = unsafe { Self::new_unchecked(19) };
|
||||
|
||||
/// Symbol for the `"private"` string.
|
||||
pub const PRIVATE: Self = unsafe { Self::new_unchecked(20) };
|
||||
|
||||
/// Symbol for the `"protected"` string.
|
||||
pub const PROTECTED: Self = unsafe { Self::new_unchecked(21) };
|
||||
|
||||
/// Symbol for the `"public"` string.
|
||||
pub const PUBLIC: Self = unsafe { Self::new_unchecked(22) };
|
||||
|
||||
/// Symbol for the `"anonymous"` string.
|
||||
pub const ANONYMOUS: Self = unsafe { Self::new_unchecked(23) };
|
||||
|
||||
/// Symbol for the `"true"` string.
|
||||
pub const TRUE: Self = unsafe { Self::new_unchecked(24) };
|
||||
|
||||
/// Symbol for the `"false"` string.
|
||||
pub const FALSE: Self = unsafe { Self::new_unchecked(25) };
|
||||
|
||||
/// Symbol for the `"async"` string.
|
||||
pub const ASYNC: Self = unsafe { Self::new_unchecked(26) };
|
||||
|
||||
/// Symbol for the `"of"` string.
|
||||
pub const OF: Self = unsafe { Self::new_unchecked(27) };
|
||||
|
||||
/// Symbol for the `"target"` string.
|
||||
pub const TARGET: Self = unsafe { Self::new_unchecked(28) };
|
||||
|
||||
/// Symbol for the `"__proto__"` string.
|
||||
pub const __PROTO__: Self = unsafe { Self::new_unchecked(29) };
|
||||
|
||||
/// Symbol for the `"name"` string.
|
||||
pub const NAME: Self = unsafe { Self::new_unchecked(30) };
|
||||
|
||||
/// Creates a new [`Sym`] from the provided `value`, or returns `None` if `index` is zero.
|
||||
pub(super) fn new(value: usize) -> Option<Self> {
|
||||
NonZeroUsize::new(value).map(|value| Self { value })
|
||||
}
|
||||
|
||||
/// Creates a new [`Sym`] from the provided `value`, without checking if `value` is not zero
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `value` must not be zero.
|
||||
pub(super) const unsafe fn new_unchecked(value: usize) -> Self {
|
||||
Self {
|
||||
value:
|
||||
// SAFETY: The caller must ensure the invariants of the function.
|
||||
unsafe {
|
||||
NonZeroUsize::new_unchecked(value)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the internal value of the [`Sym`]
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub const fn get(self) -> usize {
|
||||
self.value.get()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! create_static_strings {
|
||||
( $( $s:literal ),+$(,)? ) => {
|
||||
/// Ordered set of commonly used static `UTF-8` strings.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants
|
||||
/// defined in [`Sym`] must always be in sync.
|
||||
pub(super) static COMMON_STRINGS_UTF8: phf::OrderedSet<&'static str> = {
|
||||
const COMMON_STRINGS: phf::OrderedSet<&'static str> = phf::phf_ordered_set! {
|
||||
$( $s ),+
|
||||
};
|
||||
// A `COMMON_STRINGS` of size `usize::MAX` would cause an overflow on our `Interner`
|
||||
sa::const_assert!(COMMON_STRINGS.len() < usize::MAX);
|
||||
COMMON_STRINGS
|
||||
};
|
||||
|
||||
/// Ordered set of commonly used static `UTF-16` strings.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants
|
||||
/// defined in [`Sym`] must always be in sync.
|
||||
// FIXME: use phf when const expressions are allowed. https://github.com/rust-phf/rust-phf/issues/188
|
||||
pub(super) static COMMON_STRINGS_UTF16: Lazy<IndexSet<&'static [u16]>> = Lazy::new(|| {
|
||||
let mut set = IndexSet::with_capacity(COMMON_STRINGS_UTF8.len());
|
||||
$( set.insert(utf16!($s)); )+
|
||||
set
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
create_static_strings! {
|
||||
"",
|
||||
"arguments",
|
||||
"await",
|
||||
"yield",
|
||||
"eval",
|
||||
"default",
|
||||
"null",
|
||||
"RegExp",
|
||||
"get",
|
||||
"set",
|
||||
"<main>",
|
||||
"raw",
|
||||
"static",
|
||||
"prototype",
|
||||
"constructor",
|
||||
"implements",
|
||||
"interface",
|
||||
"let",
|
||||
"package",
|
||||
"private",
|
||||
"protected",
|
||||
"public",
|
||||
"anonymous",
|
||||
"true",
|
||||
"false",
|
||||
"async",
|
||||
"of",
|
||||
"target",
|
||||
"__proto__",
|
||||
"name",
|
||||
}
|
||||
114
javascript-engine/external/boa/boa_interner/src/tests.rs
vendored
Normal file
114
javascript-engine/external/boa/boa_interner/src/tests.rs
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
use crate::{Interner, Sym, COMMON_STRINGS_UTF16, COMMON_STRINGS_UTF8};
|
||||
use boa_macros::utf16;
|
||||
|
||||
#[track_caller]
|
||||
fn sym_from_usize(index: usize) -> Sym {
|
||||
Sym::new(index).expect("Invalid NonZeroUsize")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_static_strings() {
|
||||
let mut interner = Interner::default();
|
||||
|
||||
for (i, &str) in COMMON_STRINGS_UTF8.into_iter().enumerate() {
|
||||
assert_eq!(interner.get_or_intern(str), sym_from_usize(i + 1));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_new_string() {
|
||||
let mut interner = Interner::default();
|
||||
|
||||
assert!(interner.get_or_intern("my test string").get() > COMMON_STRINGS_UTF8.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_resolve() {
|
||||
let mut interner = Interner::default();
|
||||
|
||||
let utf_8_strings = ["test string", "arguments", "hello"];
|
||||
let utf_8_strings = utf_8_strings.into_iter();
|
||||
let utf_16_strings = [utf16!("TEST STRING"), utf16!("ARGUMENTS"), utf16!("HELLO")];
|
||||
let utf_16_strings = utf_16_strings.into_iter();
|
||||
|
||||
for (s8, s16) in utf_8_strings.zip(utf_16_strings) {
|
||||
let sym = interner.get_or_intern(s8);
|
||||
let resolved = interner.resolve(sym).unwrap();
|
||||
assert_eq!(Some(s8), resolved.utf8());
|
||||
let new_sym = interner.get_or_intern(s8);
|
||||
assert_eq!(sym, new_sym);
|
||||
|
||||
let sym = interner.get_or_intern(s16);
|
||||
let resolved = interner.resolve(sym).unwrap();
|
||||
assert_eq!(s16, resolved.utf16());
|
||||
let new_sym = interner.get_or_intern(s16);
|
||||
assert_eq!(sym, new_sym);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_static_resolve() {
|
||||
let mut interner = Interner::default();
|
||||
|
||||
for (utf8, utf16) in COMMON_STRINGS_UTF8
|
||||
.into_iter()
|
||||
.copied()
|
||||
.zip(COMMON_STRINGS_UTF16.iter().copied())
|
||||
.chain(
|
||||
[
|
||||
("my test str", utf16!("my test str")),
|
||||
("hello world", utf16!("hello world")),
|
||||
(";", utf16!(";")),
|
||||
]
|
||||
.into_iter(),
|
||||
)
|
||||
{
|
||||
let sym = interner.get_or_intern_static(utf8, utf16);
|
||||
let resolved = interner.resolve(sym).unwrap();
|
||||
assert_eq!(Some(utf8), resolved.utf8());
|
||||
assert_eq!(utf16, resolved.utf16());
|
||||
|
||||
let new_sym = interner.get_or_intern(utf8);
|
||||
|
||||
assert_eq!(sym, new_sym);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_unpaired_surrogates() {
|
||||
let mut interner = Interner::default();
|
||||
|
||||
let unp = &[
|
||||
0xDC15u16, 0xDC19, 'h' as u16, 'e' as u16, 'l' as u16, 'l' as u16, 'o' as u16,
|
||||
];
|
||||
let unp2 = &[
|
||||
0xDC01u16, 'w' as u16, 'o' as u16, 'r' as u16, 0xDCF4, 'l' as u16, 'd' as u16,
|
||||
];
|
||||
|
||||
let sym = interner.get_or_intern("abc");
|
||||
let sym2 = interner.get_or_intern("def");
|
||||
|
||||
let sym3 = interner.get_or_intern(unp);
|
||||
let sym4 = interner.get_or_intern(utf16!("ghi"));
|
||||
let sym5 = interner.get_or_intern(unp2);
|
||||
|
||||
let sym6 = interner.get_or_intern("jkl");
|
||||
|
||||
assert_eq!(interner.resolve_expect(sym).utf8(), Some("abc"));
|
||||
assert_eq!(interner.resolve_expect(sym).utf16(), utf16!("abc"));
|
||||
|
||||
assert_eq!(interner.resolve_expect(sym2).utf8(), Some("def"));
|
||||
assert_eq!(interner.resolve_expect(sym2).utf16(), utf16!("def"));
|
||||
|
||||
assert!(interner.resolve_expect(sym3).utf8().is_none());
|
||||
assert_eq!(interner.resolve_expect(sym3).utf16(), unp);
|
||||
|
||||
assert_eq!(interner.resolve_expect(sym4).utf8(), Some("ghi"));
|
||||
assert_eq!(interner.resolve_expect(sym4).utf16(), utf16!("ghi"));
|
||||
|
||||
assert!(interner.resolve_expect(sym5).utf8().is_none());
|
||||
assert_eq!(interner.resolve_expect(sym5).utf16(), unp2);
|
||||
|
||||
assert_eq!(interner.resolve_expect(sym6).utf8(), Some("jkl"));
|
||||
assert_eq!(interner.resolve_expect(sym6).utf16(), utf16!("jkl"));
|
||||
}
|
||||
Reference in New Issue
Block a user