/* * Copyright (C) 2004, 2005, 2006 Apple Computer, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "DeprecatedString.h" #include "CString.h" #include "FloatConversion.h" #include "Logging.h" #include "PlatformString.h" #include "RegularExpression.h" #include "TextEncoding.h" #include #include #include #include #include #include #if PLATFORM(WIN_OS) #include #endif #if PLATFORM(QT) #include #endif using namespace std; using namespace KJS; namespace WebCore { COMPILE_ASSERT(sizeof(DeprecatedChar) == 2, deprecated_char_is_2_bytes) #define CHECK_FOR_HANDLE_LEAKS 0 #if PLATFORM(SYMBIAN) #undef CHECK_FOR_HANDLE_LEAKS // symbian:fixme need page aligned allocations as Symbian platform does not have support for valloc #define CHECK_FOR_HANDLE_LEAKS 1 #endif #define ALLOC_QCHAR_GOOD_SIZE(X) (X) #define ALLOC_CHAR_GOOD_SIZE(X) (X) #define ALLOC_CHAR(N) (char*)fastMalloc(N) #define REALLOC_CHAR(P, N) (char *)fastRealloc(P, N) #define DELETE_CHAR(P) fastFree(P) #define WEBCORE_ALLOCATE_CHARACTERS(N) (DeprecatedChar*)fastMalloc(sizeof(DeprecatedChar)*(N)) #define WEBCORE_REALLOCATE_CHARACTERS(P, N) (DeprecatedChar *)fastRealloc(P, sizeof(DeprecatedChar)*(N)) #define DELETE_QCHAR(P) fastFree(P) #ifndef CHECK_FOR_HANDLE_LEAKS struct HandleNode; struct HandlePageNode; static HandleNode *allocateNode(HandlePageNode *pageNode); static HandlePageNode *allocatePageNode(); static HandlePageNode *usedNodeAllocationPages = 0; static HandlePageNode *freeNodeAllocationPages = 0; static inline void initializeHandleNodes() { if (freeNodeAllocationPages == 0) freeNodeAllocationPages = allocatePageNode(); } #endif static inline DeprecatedStringData **allocateHandle() { #ifdef CHECK_FOR_HANDLE_LEAKS return static_cast(fastMalloc(sizeof(DeprecatedStringData *))); #else initializeHandleNodes(); return reinterpret_cast(allocateNode(freeNodeAllocationPages)); #endif } static void freeHandle(DeprecatedStringData **); #define IS_ASCII_QCHAR(c) ((c).unicode() > 0 && (c).unicode() <= 0xff) static const int caseDelta = ('a' - 'A'); const char * const DeprecatedString::null = 0; DeprecatedStringData *DeprecatedString::shared_null = 0; DeprecatedStringData **DeprecatedString::shared_null_handle = 0; // ------------------------------------------------------------------------- // Utility functions // ------------------------------------------------------------------------- static inline int ucstrcmp( const DeprecatedString &as, const DeprecatedString &bs ) { const DeprecatedChar *a = as.unicode(); const DeprecatedChar *b = bs.unicode(); if ( a == b ) return 0; if ( a == 0 ) return 1; if ( b == 0 ) return -1; int l = min(as.length(), bs.length()); while ( l-- && *a == *b ) a++,b++; if ( l == -1 ) return ( as.length() - bs.length() ); return a->unicode() - b->unicode(); } static bool equal(const DeprecatedChar *a, const char *b, int l) { ASSERT(l >= 0); while (l--) { if (*a != *b) return false; a++; b++; } return true; } // Not a "true" case insensitive compare; only insensitive for plain ASCII. static bool equalCaseInsensitive(const char *a, const char *b, int l) { ASSERT(l >= 0); while (l--) { if (tolower(*a) != tolower(*b)) return false; a++; b++; } return true; } static bool equalCaseInsensitive(const DeprecatedChar *a, const char *b, int l) { ASSERT(l >= 0); while (l--) { if (tolower(a->unicode()) != tolower(*b)) return false; a++; b++; } return true; } static bool equalCaseInsensitive(const DeprecatedChar *a, const DeprecatedChar *b, int l) { ASSERT(l >= 0); while (l--) { if (tolower(a->unicode()) != tolower(b->unicode())) return false; a++; b++; } return true; } static inline bool equalCaseInsensitive(char c1, char c2) { return tolower(c1) == tolower(c2); } static inline bool equalCaseInsensitive(DeprecatedChar c1, char c2) { return tolower(c1.unicode()) == tolower(static_cast(c2)); } static bool isCharacterAllowedInBase(DeprecatedChar c, int base) { int uc = c.unicode(); if (isdigit(uc)) return uc - '0' < base; if (isalpha(uc)) { if (base > 36) base = 36; return (uc >= 'a' && uc < 'a' + base - 10) || (uc >= 'A' && uc < 'A' + base - 10); } return false; } // ------------------------------------------------------------------------- // DeprecatedStringData // ------------------------------------------------------------------------- // FIXME, make constructor explicity take a 'copy' flag. // This can be used to hand off ownership of allocated data when detaching and // deleting QStrings. DeprecatedStringData::DeprecatedStringData() : refCount(1), _length(0), _unicode(0), _ascii(0), _maxUnicode(WEBCORE_DS_INTERNAL_BUFFER_UCHARS), _isUnicodeValid(0), _isHeapAllocated(0), _maxAscii(WEBCORE_DS_INTERNAL_BUFFER_CHARS), _isAsciiValid(1) { _ascii = _internalBuffer; _internalBuffer[0] = 0; } void DeprecatedStringData::initialize() { refCount = 1; _length = 0; _unicode = 0; _ascii = _internalBuffer; _maxUnicode = WEBCORE_DS_INTERNAL_BUFFER_UCHARS; _isUnicodeValid = 0; _maxAscii = WEBCORE_DS_INTERNAL_BUFFER_CHARS; _isAsciiValid = 1; _internalBuffer[0] = 0; _isHeapAllocated = 0; } // Don't copy data. DeprecatedStringData::DeprecatedStringData(DeprecatedChar *u, unsigned l, unsigned m) : refCount(1), _length(l), _unicode(u), _ascii(0), _maxUnicode(m), _isUnicodeValid(1), _isHeapAllocated(0), _maxAscii(WEBCORE_DS_INTERNAL_BUFFER_CHARS), _isAsciiValid(0) { ASSERT(m >= l); } // Don't copy data. void DeprecatedStringData::initialize(DeprecatedChar *u, unsigned l, unsigned m) { ASSERT(m >= l); refCount = 1; _length = l; _unicode = u; _ascii = 0; _maxUnicode = m; _isUnicodeValid = 1; _maxAscii = 0; _isAsciiValid = 0; _isHeapAllocated = 0; } // Copy data DeprecatedStringData::DeprecatedStringData(const DeprecatedChar *u, unsigned l) { initialize (u, l); } // Copy data void DeprecatedStringData::initialize(const DeprecatedChar *u, unsigned l) { refCount = 1; _length = l; _ascii = 0; _isUnicodeValid = 1; _maxAscii = 0; _isAsciiValid = 0; _isHeapAllocated = 0; if (l > WEBCORE_DS_INTERNAL_BUFFER_UCHARS) { _maxUnicode = ALLOC_QCHAR_GOOD_SIZE(l); _unicode = WEBCORE_ALLOCATE_CHARACTERS(_maxUnicode); memcpy(_unicode, u, l*sizeof(DeprecatedChar)); } else { _maxUnicode = WEBCORE_DS_INTERNAL_BUFFER_UCHARS; _unicode = (DeprecatedChar *)_internalBuffer; if (l) memcpy(_internalBuffer, u, l*sizeof(DeprecatedChar)); } } // Copy data DeprecatedStringData::DeprecatedStringData(const char *a, unsigned l) { initialize(a, l); } // Copy data void DeprecatedStringData::initialize(const char *a, unsigned l) { refCount = 1; _length = l; _unicode = 0; _isUnicodeValid = 0; _maxUnicode = 0; _isAsciiValid = 1; _isHeapAllocated = 0; if (l > WEBCORE_DS_INTERNAL_BUFFER_CHARS) { _maxAscii = ALLOC_CHAR_GOOD_SIZE(l+1); _ascii = ALLOC_CHAR(_maxAscii); if (a) memcpy(_ascii, a, l); _ascii[l] = 0; } else { _maxAscii = WEBCORE_DS_INTERNAL_BUFFER_CHARS; _ascii = _internalBuffer; if (a) memcpy(_internalBuffer, a, l); _internalBuffer[l] = 0; } } DeprecatedStringData* DeprecatedStringData::createAndAdopt(DeprecatedStringData &o) { DeprecatedStringData* data = new DeprecatedStringData(); data->adopt(o); return data; } void DeprecatedStringData::adopt(DeprecatedStringData& o) { ASSERT(refCount == 1); _length = o._length; _unicode = o._unicode; _ascii = o._ascii; _maxUnicode = o._maxUnicode; _isUnicodeValid = o._isUnicodeValid; _isHeapAllocated = 0; _maxAscii = o._maxAscii; _isAsciiValid = o._isAsciiValid; // Handle the case where either the Unicode or 8-bit pointer was // pointing to the internal buffer. We need to point at the // internal buffer in the new object, and copy the characters. if (_unicode == reinterpret_cast(o._internalBuffer)) { if (_isUnicodeValid) { ASSERT(!_isAsciiValid || _ascii != o._internalBuffer); ASSERT(_length <= WEBCORE_DS_INTERNAL_BUFFER_UCHARS); memcpy(_internalBuffer, o._internalBuffer, _length * sizeof(DeprecatedChar)); _unicode = reinterpret_cast(_internalBuffer); } else { _unicode = 0; } } if (_ascii == o._internalBuffer) { if (_isAsciiValid) { ASSERT(_length <= WEBCORE_DS_INTERNAL_BUFFER_CHARS); memcpy(_internalBuffer, o._internalBuffer, _length); _internalBuffer[_length] = 0; _ascii = _internalBuffer; } else { _ascii = 0; } } // Clean up the other DeprecatedStringData just enough so that it can be destroyed // cleanly. It's not in a good enough state to use, but that's OK. It just // needs to be in a state where ~DeprecatedStringData won't do anything harmful, // and setting these to 0 will do that (preventing any double-free problems). o._unicode = 0; o._ascii = 0; } DeprecatedStringData *DeprecatedString::makeSharedNull() { if (!shared_null) { shared_null = new DeprecatedStringData; shared_null->ref(); shared_null->_maxAscii = 0; shared_null->_maxUnicode = 0; shared_null->_unicode = (DeprecatedChar *)&shared_null->_internalBuffer[0]; shared_null->_isUnicodeValid = 1; } return shared_null; } DeprecatedStringData **DeprecatedString::makeSharedNullHandle() { if (!shared_null_handle) { shared_null_handle = allocateHandle(); *shared_null_handle = makeSharedNull(); } return shared_null_handle; } DeprecatedStringData::~DeprecatedStringData() { ASSERT(refCount == 0); if (_unicode && !isUnicodeInternal()) DELETE_QCHAR(_unicode); if (_ascii && !isAsciiInternal()) DELETE_CHAR(_ascii); } void DeprecatedStringData::increaseAsciiSize(unsigned size) { ASSERT(this != DeprecatedString::shared_null); unsigned newSize = (unsigned)ALLOC_CHAR_GOOD_SIZE((size * 3 + 1) / 2); if (!_isAsciiValid) makeAscii(); ASSERT(_isAsciiValid); if (isAsciiInternal()) { char *newAscii = ALLOC_CHAR(newSize); if (_length) memcpy(newAscii, _ascii, _length); _ascii = newAscii; } else { _ascii = REALLOC_CHAR(_ascii, newSize); } _maxAscii = newSize; _isAsciiValid = 1; _isUnicodeValid = 0; } void DeprecatedStringData::increaseUnicodeSize(unsigned size) { ASSERT(size > _length); ASSERT(this != DeprecatedString::shared_null); unsigned newSize = (unsigned)ALLOC_QCHAR_GOOD_SIZE((size * 3 + 1) / 2); if (!_isUnicodeValid) makeUnicode(); ASSERT(_isUnicodeValid); if (isUnicodeInternal()) { DeprecatedChar *newUni = WEBCORE_ALLOCATE_CHARACTERS(newSize); if (_length) memcpy(newUni, _unicode, _length*sizeof(DeprecatedChar)); _unicode = newUni; } else { _unicode = WEBCORE_REALLOCATE_CHARACTERS(_unicode, newSize); } _maxUnicode = newSize; _isUnicodeValid = 1; _isAsciiValid = 0; } char *DeprecatedStringData::makeAscii() { ASSERT(this != DeprecatedString::shared_null); if (_isUnicodeValid){ DeprecatedChar copyBuf[WEBCORE_DS_INTERNAL_BUFFER_CHARS]; DeprecatedChar *str; if (_ascii && !isAsciiInternal()) DELETE_QCHAR(_ascii); if (_length < WEBCORE_DS_INTERNAL_BUFFER_CHARS){ if (isUnicodeInternal()) { unsigned i = _length; DeprecatedChar *tp = ©Buf[0], *fp = _unicode; while (i--) *tp++ = *fp++; str = ©Buf[0]; _isUnicodeValid = 0; } else str = _unicode; _ascii = _internalBuffer; _maxAscii = WEBCORE_DS_INTERNAL_BUFFER_CHARS; } else { unsigned newSize = ALLOC_CHAR_GOOD_SIZE(_length+1); _ascii = ALLOC_CHAR(newSize); _maxAscii = newSize; str = _unicode; } unsigned i = _length; char* cp = _ascii; while (i--) // FIXME: this converts non-Latin1 characters to '\0', which may be not what we want in some cases. // In particular, toDouble() may fail to report errors, believing that the string ends earlier // than it actually does. *cp++ = (*str++).latin1(); *cp = 0; _isAsciiValid = 1; } else if (!_isAsciiValid) FATAL("ASCII character cache not valid"); return _ascii; } DeprecatedChar *DeprecatedStringData::makeUnicode() { ASSERT(this != DeprecatedString::shared_null); if (_isAsciiValid){ char copyBuf[WEBCORE_DS_INTERNAL_BUFFER_CHARS]; char *str; if (_unicode && !isUnicodeInternal()) DELETE_QCHAR(_unicode); if (_length <= WEBCORE_DS_INTERNAL_BUFFER_UCHARS){ if (isAsciiInternal()) { unsigned i = _length; char *tp = ©Buf[0], *fp = _ascii; while (i--) *tp++ = *fp++; str = ©Buf[0]; _isAsciiValid = 0; } else str = _ascii; _unicode = (DeprecatedChar *)_internalBuffer; _maxUnicode = WEBCORE_DS_INTERNAL_BUFFER_UCHARS; } else { unsigned newSize = ALLOC_QCHAR_GOOD_SIZE(_length); _unicode = WEBCORE_ALLOCATE_CHARACTERS(newSize); _maxUnicode = newSize; str = _ascii; } unsigned i = _length; DeprecatedChar *cp = _unicode; while ( i-- ) *cp++ = *str++; _isUnicodeValid = 1; } else if (!_isUnicodeValid) FATAL("invalid character cache"); return _unicode; } // ------------------------------------------------------------------------- // DeprecatedString // ------------------------------------------------------------------------- DeprecatedString DeprecatedString::number(int n) { DeprecatedString qs; qs.setNum(n); return qs; } DeprecatedString DeprecatedString::number(unsigned n) { DeprecatedString qs; qs.setNum(n); return qs; } DeprecatedString DeprecatedString::number(long n) { DeprecatedString qs; qs.setNum(n); return qs; } DeprecatedString DeprecatedString::number(unsigned long n) { DeprecatedString qs; qs.setNum(n); return qs; } DeprecatedString DeprecatedString::number(double n) { DeprecatedString qs; qs.setNum(n); return qs; } inline void DeprecatedString::detachIfInternal() { DeprecatedStringData *oldData = *dataHandle; if (oldData->refCount > 1 && oldData == &internalData) { DeprecatedStringData *newData = DeprecatedStringData::createAndAdopt(*oldData); newData->_isHeapAllocated = 1; newData->refCount = oldData->refCount; oldData->refCount = 1; oldData->deref(); *dataHandle = newData; } } const DeprecatedChar *DeprecatedString::stableUnicode() { // if we're using the internal data of another string, detach now if (!dataHandle[0]->_isHeapAllocated && *dataHandle != &internalData) { detach(); } return unicode(); } DeprecatedString::~DeprecatedString() { ASSERT(dataHandle); ASSERT(dataHandle[0]->refCount != 0); // Only free the handle if no other string has a reference to the // data. The handle will be freed by the string that has the // last reference to data. bool needToFreeHandle = dataHandle[0]->refCount == 1 && *dataHandle != shared_null; // Copy our internal data if necessary, other strings still need it. detachIfInternal(); // Remove our reference. This should always be the last reference // if *dataHandle points to our internal DeprecatedStringData. If we just detached, // this will remove the extra ref from the new handle. dataHandle[0]->deref(); ASSERT(*dataHandle != &internalData || dataHandle[0]->refCount == 0); if (needToFreeHandle) freeHandle(dataHandle); #ifndef NDEBUG dataHandle = 0; #endif } DeprecatedString::DeprecatedString() { internalData.deref(); dataHandle = makeSharedNullHandle(); dataHandle[0]->ref(); } // Careful, just used by DeprecatedConstString DeprecatedString::DeprecatedString(DeprecatedStringData *constData, bool /*dummy*/) { internalData.deref(); dataHandle = allocateHandle(); *dataHandle = constData; // The DeprecatedConstString constructor allocated the DeprecatedStringData. constData->_isHeapAllocated = 1; } DeprecatedString::DeprecatedString(DeprecatedChar qc) { dataHandle = allocateHandle(); // Copy the DeprecatedChar. if (IS_ASCII_QCHAR(qc)) { char c = qc.unicode(); *dataHandle = &internalData; internalData.initialize( &c, 1 ); } else { *dataHandle = &internalData; internalData.initialize( &qc, 1 ); } } DeprecatedString::DeprecatedString(const DeprecatedChar *unicode, unsigned length) { if (!unicode || !length) { internalData.deref(); dataHandle = makeSharedNullHandle(); dataHandle[0]->ref(); } else { dataHandle = allocateHandle(); // Copy the DeprecatedChar * *dataHandle = &internalData; internalData.initialize(unicode, length); } } DeprecatedString::DeprecatedString(const char *chs) { if (chs) { internalData.initialize(chs,strlen(chs)); dataHandle = allocateHandle(); *dataHandle = &internalData; } else { internalData.deref(); dataHandle = makeSharedNullHandle(); dataHandle[0]->ref(); } } DeprecatedString::DeprecatedString(const char *chs, int len) { dataHandle = allocateHandle(); *dataHandle = &internalData; internalData.initialize(chs,len); } DeprecatedString::DeprecatedString(const DeprecatedString &qs) : dataHandle(qs.dataHandle) { internalData.deref(); dataHandle[0]->ref(); } DeprecatedString &DeprecatedString::operator=(const DeprecatedString &qs) { if (this == &qs) return *this; // Free our handle if it isn't the shared null handle, and if no-one else is using it. bool needToFreeHandle = dataHandle != shared_null_handle && dataHandle[0]->refCount == 1; qs.dataHandle[0]->ref(); deref(); if (needToFreeHandle) freeHandle(dataHandle); dataHandle = qs.dataHandle; return *this; } DeprecatedString &DeprecatedString::operator=(const DeprecatedCString &qcs) { return setLatin1(qcs); } DeprecatedString &DeprecatedString::operator=(const char *chs) { return setLatin1(chs); } DeprecatedString &DeprecatedString::operator=(DeprecatedChar qc) { return *this = DeprecatedString(qc); } DeprecatedString &DeprecatedString::operator=(char ch) { return *this = DeprecatedString(DeprecatedChar(ch)); } DeprecatedChar DeprecatedString::at(unsigned i) const { DeprecatedStringData *thisData = *dataHandle; if (i >= thisData->_length) return 0; if (thisData->_isAsciiValid) { return thisData->_ascii[i]; } ASSERT(thisData->_isUnicodeValid); return thisData->_unicode[i]; } int DeprecatedString::compare(const DeprecatedString& s) const { if (dataHandle[0]->_isAsciiValid && s.dataHandle[0]->_isAsciiValid) return strcmp(ascii(), s.ascii()); return ucstrcmp(*this,s); } int DeprecatedString::compare(const char *chs) const { if (!chs) return isEmpty() ? 0 : 1; DeprecatedStringData *d = dataHandle[0]; if (d->_isAsciiValid) return strcmp(ascii(), chs); const DeprecatedChar *s = unicode(); unsigned len = d->_length; for (unsigned i = 0; i != len; ++i) { char c2 = chs[i]; if (!c2) return 1; DeprecatedChar c1 = s[i]; if (c1.unicode() < c2) return -1; if (c1.unicode() > c2) return 1; } return chs[len] ? -1 : 0; } bool DeprecatedString::startsWith( const DeprecatedString& s ) const { if (dataHandle[0]->_isAsciiValid){ const char *asc = ascii(); for ( int i =0; i < (int) s.dataHandle[0]->_length; i++ ) { if ( i >= (int) dataHandle[0]->_length || asc[i] != s[i] ) return false; } } else if (dataHandle[0]->_isUnicodeValid){ const DeprecatedChar *uni = unicode(); for ( int i =0; i < (int) s.dataHandle[0]->_length; i++ ) { if ( i >= (int) dataHandle[0]->_length || uni[i] != s[i] ) return false; } } else FATAL("invalid character cache"); return true; } bool DeprecatedString::startsWith(const char *prefix) const { DeprecatedStringData *data = *dataHandle; unsigned prefixLength = strlen(prefix); if (data->_isAsciiValid) { return strncmp(prefix, data->_ascii, prefixLength) == 0; } else { ASSERT(data->_isUnicodeValid); if (prefixLength > data->_length) { return false; } const DeprecatedChar *uni = data->_unicode; for (unsigned i = 0; i < prefixLength; ++i) { if (uni[i] != prefix[i]) { return false; } } return true; } } bool DeprecatedString::startsWith(const char *prefix, bool caseSensitive) const { if (caseSensitive) { return startsWith(prefix); } DeprecatedStringData *data = *dataHandle; unsigned prefixLength = strlen(prefix); if (data->_isAsciiValid) { return strncasecmp(prefix, data->_ascii, prefixLength) == 0; } else { ASSERT(data->_isUnicodeValid); if (prefixLength > data->_length) { return false; } const DeprecatedChar *uni = data->_unicode; for (unsigned i = 0; i < prefixLength; ++i) { if (!equalCaseInsensitive(uni[i], prefix[i])) { return false; } } return true; } } bool DeprecatedString::endsWith(const DeprecatedString& s) const { const DeprecatedChar *uni = unicode(); int length = dataHandle[0]->_length; int slength = s.dataHandle[0]->_length; if (length < slength) return false; for (int i = length - slength, j = 0; i < length; i++, j++) { if (uni[i] != s[j]) return false; } return true; } bool DeprecatedString::isNull() const { return dataHandle == shared_null_handle; } int DeprecatedString::find(DeprecatedChar qc, int index) const { if (dataHandle[0]->_isAsciiValid) { if (!IS_ASCII_QCHAR(qc)) return -1; return find(qc.unicode(), index); } return find(DeprecatedString(qc), index, true); } int DeprecatedString::find(char ch, int index) const { if (dataHandle[0]->_isAsciiValid){ const char *cp = ascii(); if ( index < 0 ) index += dataHandle[0]->_length; if (index >= (int)dataHandle[0]->_length) return -1; for (int i = index; i < (int)dataHandle[0]->_length; i++) if (cp[i] == ch) return i; } else if (dataHandle[0]->_isUnicodeValid) return find(DeprecatedChar(ch), index, true); else FATAL("invalid character cache"); return -1; } int DeprecatedString::find(const DeprecatedString &str, int index, bool caseSensitive) const { // FIXME, use the first character algorithm /* We use some weird hashing for efficiency's sake. Instead of comparing strings, we compare the sum of str with that of a part of this DeprecatedString. Only if that matches, we call memcmp or ucstrnicmp. The hash value of a string is the sum of the cells of its QChars. */ if ( index < 0 ) index += dataHandle[0]->_length; int lstr = str.dataHandle[0]->_length; int lthis = dataHandle[0]->_length - index; if ( (unsigned)lthis > dataHandle[0]->_length ) return -1; int delta = lthis - lstr; if ( delta < 0 ) return -1; const DeprecatedChar *uthis = unicode() + index; const DeprecatedChar *ustr = str.unicode(); unsigned hthis = 0; unsigned hstr = 0; int i; if ( caseSensitive ) { for ( i = 0; i < lstr; i++ ) { hthis += uthis[i].unicode(); hstr += ustr[i].unicode(); } i = 0; while ( true ) { if ( hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(DeprecatedChar)) == 0 ) return index + i; if ( i == delta ) return -1; hthis += uthis[i + lstr].unicode(); hthis -= uthis[i].unicode(); i++; } } else { for ( i = 0; i < lstr; i++ ) { hthis += tolower(uthis[i].unicode()); hstr += tolower(ustr[i].unicode()); } i = 0; while ( true ) { if ( hthis == hstr && equalCaseInsensitive(uthis + i, ustr, lstr) ) return index + i; if ( i == delta ) return -1; hthis += tolower(uthis[i + lstr].unicode()); hthis -= tolower(uthis[i].unicode()); i++; } } } // This function should be as fast as possible, every little bit helps. // Our usage patterns are typically small strings. In time trials // this simplistic algorithm is much faster than Boyer-Moore or hash // based algorithms. int DeprecatedString::find(const char *chs, int index, bool caseSensitive) const { if (!chs || index < 0) return -1; DeprecatedStringData *data = *dataHandle; int chsLength = strlen(chs); int n = data->_length - index; if (n < 0) return -1; n -= chsLength - 1; if (n <= 0) return -1; const char *chsPlusOne = chs + 1; int chsLengthMinusOne = chsLength - 1; if (data->_isAsciiValid) { char *ptr = data->_ascii + index - 1; if (caseSensitive) { char c = *chs; do { if (*++ptr == c && memcmp(ptr + 1, chsPlusOne, chsLengthMinusOne) == 0) { return data->_length - chsLength - n + 1; } } while (--n); } else { int lc = tolower(*chs); do { if (tolower(*++ptr) == lc && equalCaseInsensitive(ptr + 1, chsPlusOne, chsLengthMinusOne)) { return data->_length - chsLength - n + 1; } } while (--n); } } else { ASSERT(data->_isUnicodeValid); const DeprecatedChar *ptr = data->_unicode + index - 1; if (caseSensitive) { DeprecatedChar c = *chs; do { if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) { return data->_length - chsLength - n + 1; } } while (--n); } else { int lc = tolower((unsigned char)*chs); do { if (tolower((++ptr)->unicode()) == lc && equalCaseInsensitive(ptr + 1, chsPlusOne, chsLengthMinusOne)) { return data->_length - chsLength - n + 1; } } while (--n); } } return -1; } int DeprecatedString::find(const RegularExpression &qre, int index) const { if ( index < 0 ) index += dataHandle[0]->_length; return qre.match( *this, index ); } int DeprecatedString::findRev(char ch, int index) const { if (dataHandle[0]->_isAsciiValid){ const char *cp = ascii(); if (index < 0) index += dataHandle[0]->_length; if (index > (int)dataHandle[0]->_length) return -1; for (int i = index; i >= 0; i--) { if (cp[i] == ch) return i; } } else if (dataHandle[0]->_isUnicodeValid) return findRev(DeprecatedString(DeprecatedChar(ch)), index); else FATAL("invalid character cache"); return -1; } int DeprecatedString::findRev(const char *chs, int index) const { return findRev(DeprecatedString(chs), index); } int DeprecatedString::findRev( const DeprecatedString& str, int index, bool cs ) const { // FIXME, use the first character algorithm /* See DeprecatedString::find() for explanations. */ int lthis = dataHandle[0]->_length; if ( index < 0 ) index += lthis; int lstr = str.dataHandle[0]->_length; int delta = lthis - lstr; if ( index < 0 || index > lthis || delta < 0 ) return -1; if ( index > delta ) index = delta; const DeprecatedChar *uthis = unicode(); const DeprecatedChar *ustr = str.unicode(); unsigned hthis = 0; unsigned hstr = 0; int i; if ( cs ) { for ( i = 0; i < lstr; i++ ) { hthis += uthis[index + i].unicode(); hstr += ustr[i].unicode(); } i = index; while ( true ) { if ( hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(DeprecatedChar)) == 0 ) return i; if ( i == 0 ) return -1; i--; hthis -= uthis[i + lstr].unicode(); hthis += uthis[i].unicode(); } } else { for ( i = 0; i < lstr; i++ ) { hthis += uthis[index + i].lower().unicode(); hstr += ustr[i].lower().unicode(); } i = index; while ( true ) { if ( hthis == hstr && equalCaseInsensitive(uthis + i, ustr, lstr) ) return i; if ( i == 0 ) return -1; i--; hthis -= uthis[i + lstr].lower().unicode(); hthis += uthis[i].lower().unicode(); } } // Should never get here. return -1; } int DeprecatedString::contains(DeprecatedChar c, bool cs) const { int count = 0; DeprecatedStringData *data = *dataHandle; if (data->_isAsciiValid) { if (!IS_ASCII_QCHAR(c)) return 0; const char *cPtr = data->_ascii; int n = data->_length; char ac = c.unicode(); if (cs) { // case sensitive while (n--) count += *cPtr++ == ac; } else { // case insensitive int lc = tolower(ac); while (n--) { count += tolower(*cPtr++) == lc; } } } else { ASSERT(data->_isUnicodeValid); const DeprecatedChar *uc = data->_unicode; int n = data->_length; if (cs) { // case sensitive while ( n-- ) count += *uc++ == c; } else { // case insensitive int lc = tolower(c.unicode()); while (n--) { count += tolower(uc->unicode()) == lc; uc++; } } } return count; } int DeprecatedString::contains(char ch) const { return contains(DeprecatedChar(ch), true); } int DeprecatedString::contains(const char *str, bool caseSensitive) const { if (!str) return 0; int len = strlen(str); char c = *str; DeprecatedStringData *data = *dataHandle; int n = data->_length; n -= len - 1; if (n <= 0) return 0; int count = 0; if (data->_isAsciiValid) { const char *p = data->_ascii; if (caseSensitive) { do { count += *p == c && memcmp(p + 1, str + 1, len - 1) == 0; p++; } while (--n); } else { int lc = tolower(c); do { count += tolower(*p) == lc && equalCaseInsensitive(p + 1, str + 1, len - 1); p++; } while (--n); } } else { ASSERT(data->_isUnicodeValid); const DeprecatedChar *p = data->_unicode; if (caseSensitive) { do { count += *p == c && equal(p + 1, str + 1, len - 1); p++; } while (--n); } else { int lc = tolower(c); do { count += tolower(p->unicode()) == lc && equalCaseInsensitive(p + 1, str + 1, len - 1); p++; } while (--n); } } return count; } int DeprecatedString::contains(const DeprecatedString &str, bool caseSensitive) const { if (str.isEmpty()) return 0; const DeprecatedChar *strP = str.unicode(); int len = str.dataHandle[0]->_length; DeprecatedChar c = *strP; const DeprecatedChar *p = unicode(); int n = dataHandle[0]->_length; n -= len - 1; if (n <= 0) return 0; int count = 0; if (caseSensitive) { int byteCount = len * sizeof(DeprecatedChar); do { count += *p == c && memcmp(p, strP, byteCount) == 0; ++p; } while (--n); } else { do { count += p->lower() == c && equalCaseInsensitive(p, strP, len) == 0; ++p; } while (--n); } return count; } bool DeprecatedString::isAllASCII() const { DeprecatedStringData *data = *dataHandle; int n = data->_length; if (data->_isAsciiValid) { const char *p = data->_ascii; while (n--) { unsigned char c = *p++; if (c > 0x7F) { return false; } } } else { ASSERT(data->_isUnicodeValid); const DeprecatedChar *p = data->_unicode; while (n--) { if ((*p++).unicode() > 0x7F) { return false; } } } return true; } bool DeprecatedString::isAllLatin1() const { DeprecatedStringData *data = *dataHandle; if (data->_isAsciiValid) { return true; } ASSERT(data->_isUnicodeValid); int n = data->_length; const DeprecatedChar *p = data->_unicode; while (n--) { if ((*p++).unicode() > 0xFF) { return false; } } return true; } bool DeprecatedString::hasFastLatin1() const { DeprecatedStringData *data = *dataHandle; return data->_isAsciiValid; } void DeprecatedString::copyLatin1(char *buffer, unsigned position, unsigned maxLength) const { DeprecatedStringData *data = *dataHandle; int length = data->_length; if (position > static_cast(length)) length = 0; else length -= position; if (static_cast(length) > maxLength) length = static_cast(maxLength); buffer[length] = 0; if (data->_isAsciiValid) { memcpy(buffer, data->_ascii + position, length); return; } ASSERT(data->_isUnicodeValid); const DeprecatedChar* uc = data->_unicode + position; while (length--) *buffer++ = (*uc++).latin1(); } short DeprecatedString::toShort(bool *ok, int base) const { int v = toInt(ok, base); short sv = v; if (sv != v) { if (ok) *ok = false; return 0; } return sv; } unsigned short DeprecatedString::toUShort(bool *ok, int base) const { unsigned v = toUInt(ok, base); unsigned short sv = v; if (sv != v) { if (ok) *ok = false; return 0; } return sv; } template static inline IntegralType toIntegralType(const DeprecatedString& string, bool *ok, int base) { static const IntegralType integralMax = std::numeric_limits::max(); static const bool isSigned = std::numeric_limits::is_signed; const DeprecatedChar* p = string.unicode(); const IntegralType maxMultiplier = integralMax / base; int length = string.length(); IntegralType value = 0; bool isOk = false; bool isNegative = false; if (!p) goto bye; // skip leading whitespace while (length && p->isSpace()) { length--; p++; } if (isSigned && length && *p == '-') { length--; p++; isNegative = true; } else if (length && *p == '+') { length--; p++; } if (!length || !isCharacterAllowedInBase(*p, base)) goto bye; while (length && isCharacterAllowedInBase(*p, base)) { length--; IntegralType digitValue; int c = p->unicode(); if (isdigit(c)) digitValue = c - '0'; else if (c >= 'a') digitValue = c - 'a' + 10; else digitValue = c - 'A' + 10; if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) goto bye; value = base * value + digitValue; p++; } if (isNegative) value = -value; // skip trailing space while (length && p->isSpace()) { length--; p++; } if (!length) isOk = true; bye: if (ok) *ok = isOk; return isOk ? value : 0; } int DeprecatedString::toInt(bool *ok, int base) const { return toIntegralType(*this, ok, base); } int64_t DeprecatedString::toInt64(bool *ok, int base) const { return toIntegralType(*this, ok, base); } unsigned DeprecatedString::toUInt(bool *ok, int base) const { return toIntegralType(*this, ok, base); } uint64_t DeprecatedString::toUInt64(bool *ok, int base) const { return toIntegralType(*this, ok, base); } double DeprecatedString::toDouble(bool *ok) const { if (isEmpty()) { if (ok) *ok = false; return 0; } const char *s = latin1(); char *end; double val = kjs_strtod(s, &end); if (ok) *ok = end == 0 || *end == '\0'; return val; } float DeprecatedString::toFloat(bool* ok) const { // FIXME: this will return ok even when the string does not fit into a float return narrowPrecisionToFloat(toDouble(ok)); } DeprecatedString DeprecatedString::left(unsigned len) const { return mid(0, len); } DeprecatedString DeprecatedString::right(unsigned len) const { return mid(length() - len, len); } DeprecatedString DeprecatedString::mid(unsigned start, unsigned len) const { if (dataHandle && *dataHandle) { DeprecatedStringData &data = **dataHandle; // clip length if (start >= data._length) return DeprecatedString(); if (len > data._length - start) len = data._length - start; if (len == 0) return DeprecatedString(); if (start == 0 && len == data._length) return *this; ASSERT(start + len >= start && // unsigned overflow start + len <= data._length); // past the end // ascii case if (data._isAsciiValid && data._ascii) return DeprecatedString(&data._ascii[start] , len); // unicode case if (data._isUnicodeValid && data._unicode) return DeprecatedString(&data._unicode[start], len); } // degenerate case return DeprecatedString(); } DeprecatedString DeprecatedString::copy() const { // does not need to be a deep copy return DeprecatedString(*this); } DeprecatedString DeprecatedString::lower() const { DeprecatedString s(*this); DeprecatedStringData *d = *s.dataHandle; int l = d->_length; if (l) { bool detached = false; if (d->_isAsciiValid) { char *p = d->_ascii; while (l--) { char c = *p; // FIXME: Doesn't work for 0x80-0xFF. if (c >= 'A' && c <= 'Z') { if (!detached) { s.detach(); d = *s.dataHandle; p = d->_ascii + d->_length - l - 1; detached = true; } *p = c + ('a' - 'A'); } p++; } } else { ASSERT(d->_isUnicodeValid); DeprecatedChar *p = d->_unicode; while (l--) { DeprecatedChar c = *p; // FIXME: Doesn't work for 0x80-0xFF. if (IS_ASCII_QCHAR(c)) { if (c.unicode() >= 'A' && c.unicode() <= 'Z') { if (!detached) { s.detach(); d = *s.dataHandle; p = d->_unicode + d->_length - l - 1; detached = true; } *p = c.unicode() + ('a' - 'A'); } } else { DeprecatedChar clower = c.lower(); if (clower != c) { if (!detached) { s.detach(); d = *s.dataHandle; p = d->_unicode + d->_length - l - 1; detached = true; } *p = clower; } } p++; } } } return s; } DeprecatedString DeprecatedString::stripWhiteSpace() const { if ( isEmpty() ) // nothing to do return *this; if ( !at(0).isSpace() && !at(dataHandle[0]->_length-1).isSpace() ) return *this; int start = 0; int end = dataHandle[0]->_length - 1; DeprecatedString result = fromLatin1(""); while ( start<=end && at(start).isSpace() ) // skip white space from start start++; if ( start > end ) { // only white space return result; } while ( end && at(end).isSpace() ) // skip white space from end end--; int l = end - start + 1; if (dataHandle[0]->_isAsciiValid){ result.setLength( l ); if ( l ) memcpy(const_cast(result.dataHandle[0]->ascii()), &ascii()[start], l ); } else if (dataHandle[0]->_isUnicodeValid){ result.setLength( l ); if ( l ) memcpy(result.forceUnicode(), &unicode()[start], sizeof(DeprecatedChar)*l ); } else FATAL("invalid character cache"); return result; } DeprecatedString DeprecatedString::simplifyWhiteSpace() const { if ( isEmpty() ) // nothing to do return *this; DeprecatedString result; if (dataHandle[0]->_isAsciiValid){ result.setLength( dataHandle[0]->_length ); const char *from = ascii(); const char *fromend = from + dataHandle[0]->_length; int outc=0; char *to = const_cast(result.ascii()); while ( true ) { while ( from!=fromend && DeprecatedChar(*from).isSpace() ) from++; while ( from!=fromend && !DeprecatedChar(*from).isSpace() ) to[outc++] = *from++; if ( from!=fromend ) to[outc++] = ' '; else break; } if ( outc > 0 && to[outc-1] == ' ' ) outc--; result.truncate( outc ); } else if (dataHandle[0]->_isUnicodeValid){ result.setLength( dataHandle[0]->_length ); const DeprecatedChar *from = unicode(); const DeprecatedChar *fromend = from + dataHandle[0]->_length; int outc=0; DeprecatedChar *to = result.forceUnicode(); while ( true ) { while ( from!=fromend && from->isSpace() ) from++; while ( from!=fromend && !from->isSpace() ) to[outc++] = *from++; if ( from!=fromend ) to[outc++] = ' '; else break; } if ( outc > 0 && to[outc-1] == ' ' ) outc--; result.truncate( outc ); } else FATAL("invalid character cache"); return result; } void DeprecatedString::deref() { dataHandle[0]->deref(); } DeprecatedString &DeprecatedString::setUnicode(const DeprecatedChar *uni, unsigned len) { detachAndDiscardCharacters(); // Free our handle if it isn't the shared null handle, and if no-one else is using it. bool needToFreeHandle = dataHandle != shared_null_handle && dataHandle[0]->refCount == 1; if (len == 0) { deref(); if (needToFreeHandle) freeHandle(dataHandle); dataHandle = makeSharedNullHandle(); dataHandle[0]->ref(); } else if (len > dataHandle[0]->_maxUnicode || dataHandle[0]->refCount != 1 || !dataHandle[0]->_isUnicodeValid) { deref(); if (needToFreeHandle) freeHandle(dataHandle); dataHandle = allocateHandle(); *dataHandle = new DeprecatedStringData(uni, len); dataHandle[0]->_isHeapAllocated = 1; } else { if ( uni ) memcpy( (void *)unicode(), uni, sizeof(DeprecatedChar)*len ); dataHandle[0]->_length = len; dataHandle[0]->_isAsciiValid = 0; } return *this; } DeprecatedString &DeprecatedString::setLatin1(const char *str, int len) { if ( str == 0 ) return setUnicode(0,0); if ( len < 0 ) len = strlen(str); detachAndDiscardCharacters(); // Free our handle if it isn't the shared null handle, and if no-one else is using it. bool needToFreeHandle = dataHandle != shared_null_handle && dataHandle[0]->refCount == 1; if (len+1 > (int)dataHandle[0]->_maxAscii || dataHandle[0]->refCount != 1 || !dataHandle[0]->_isAsciiValid) { deref(); if (needToFreeHandle) freeHandle(dataHandle); dataHandle = allocateHandle(); *dataHandle = new DeprecatedStringData(str,len); dataHandle[0]->_isHeapAllocated = 1; } else { strcpy(const_cast(ascii()), str ); dataHandle[0]->_length = len; dataHandle[0]->_isUnicodeValid = 0; } return *this; } DeprecatedString &DeprecatedString::setNum(short n) { return format("%d", n); } DeprecatedString &DeprecatedString::setNum(unsigned short n) { return format("%u", n); } DeprecatedString &DeprecatedString::setNum(int n) { return format("%d", n); } DeprecatedString &DeprecatedString::setNum(unsigned n) { return format("%u", n); } DeprecatedString &DeprecatedString::setNum(long n) { return format("%ld", n); } DeprecatedString &DeprecatedString::setNum(unsigned long n) { return format("%lu", n); } DeprecatedString &DeprecatedString::setNum(double n) { return format("%.6lg", n); } DeprecatedString &DeprecatedString::format(const char *format, ...) { // FIXME: this needs the same windows compat fixes as String::format va_list args; va_start(args, format); // Do the format once to get the length. #if COMPILER(MSVC) int result = _vscprintf(format, args); #else char ch; int result = vsnprintf(&ch, 1, format, args); #endif // Handle the empty string case to simplify the code below. if (result <= 0) { // POSIX returns 0 in error; Windows returns a negative number. setUnicode(0, 0); return *this; } unsigned len = result; // Arrange for storage for the resulting string. detachAndDiscardCharacters(); if (len >= dataHandle[0]->_maxAscii || dataHandle[0]->refCount != 1 || !dataHandle[0]->_isAsciiValid) { // Free our handle if it isn't the shared null handle, and if no-one else is using it. bool needToFreeHandle = dataHandle != shared_null_handle && dataHandle[0]->refCount == 1; deref(); if (needToFreeHandle) freeHandle(dataHandle); dataHandle = allocateHandle(); *dataHandle = new DeprecatedStringData((char *)0, len); dataHandle[0]->_isHeapAllocated = 1; } else { dataHandle[0]->_length = len; dataHandle[0]->_isUnicodeValid = 0; } // Now do the formatting again, guaranteed to fit. vsprintf(const_cast(ascii()), format, args); va_end(args); return *this; } DeprecatedString &DeprecatedString::prepend(const DeprecatedString &qs) { return insert(0, qs); } DeprecatedString &DeprecatedString::prepend(const DeprecatedChar *characters, unsigned length) { return insert(0, characters, length); } DeprecatedString &DeprecatedString::append(const DeprecatedString &qs) { return insert(dataHandle[0]->_length, qs); } DeprecatedString &DeprecatedString::append(const char *characters, unsigned length) { return insert(dataHandle[0]->_length, characters, length); } DeprecatedString &DeprecatedString::append(const DeprecatedChar *characters, unsigned length) { return insert(dataHandle[0]->_length, characters, length); } DeprecatedString &DeprecatedString::insert(unsigned index, const char *insertChars, unsigned insertLength) { if (insertLength == 0) return *this; detach(); if (dataHandle[0]->_isAsciiValid){ unsigned originalLength = dataHandle[0]->_length; char *targetChars; // Ensure that we have enough space. setLength (originalLength + insertLength); targetChars = const_cast(ascii()); // Move tail to make space for inserted characters. memmove (targetChars+index+insertLength, targetChars+index, originalLength-index); // Insert characters. memcpy (targetChars+index, insertChars, insertLength); dataHandle[0]->_isUnicodeValid = 0; } else if (dataHandle[0]->_isUnicodeValid){ unsigned originalLength = dataHandle[0]->_length; DeprecatedChar *targetChars; // Ensure that we have enough space. setLength (originalLength + insertLength); targetChars = (DeprecatedChar *)unicode(); // Move tail to make space for inserted characters. memmove (targetChars+(index+insertLength), targetChars+index, (originalLength-index)*sizeof(DeprecatedChar)); // Insert characters. unsigned i = insertLength; DeprecatedChar *target = targetChars+index; while (i--) *target++ = *insertChars++; } else FATAL("invalid character cache"); return *this; } DeprecatedString &DeprecatedString::insert(unsigned index, const DeprecatedString &qs) { if (qs.dataHandle[0]->_length == 0) return *this; if (dataHandle[0]->_isAsciiValid && qs.isAllLatin1()) { insert(index, qs.latin1(), qs.length()); } else { unsigned insertLength = qs.dataHandle[0]->_length; unsigned originalLength = dataHandle[0]->_length; forceUnicode(); // Ensure that we have enough space. setLength (originalLength + insertLength); DeprecatedChar *targetChars = const_cast(unicode()); // Move tail to make space for inserted characters. memmove (targetChars+(index+insertLength), targetChars+index, (originalLength-index)*sizeof(DeprecatedChar)); // Insert characters. if (qs.dataHandle[0]->_isAsciiValid){ unsigned i = insertLength; DeprecatedChar *target = targetChars+index; char *a = const_cast(qs.ascii()); while (i--) *target++ = *a++; } else { DeprecatedChar *insertChars = (DeprecatedChar *)qs.unicode(); memcpy (targetChars+index, insertChars, insertLength*sizeof(DeprecatedChar)); } dataHandle[0]->_isAsciiValid = 0; } return *this; } DeprecatedString &DeprecatedString::insert(unsigned index, const DeprecatedChar *insertChars, unsigned insertLength) { if (insertLength == 0) return *this; forceUnicode(); unsigned originalLength = dataHandle[0]->_length; setLength(originalLength + insertLength); DeprecatedChar *targetChars = const_cast(unicode()); if (originalLength > index) { memmove(targetChars + index + insertLength, targetChars + index, (originalLength - index) * sizeof(DeprecatedChar)); } memcpy(targetChars + index, insertChars, insertLength * sizeof(DeprecatedChar)); return *this; } DeprecatedString &DeprecatedString::insert(unsigned index, DeprecatedChar qc) { detach(); if (dataHandle[0]->_isAsciiValid && IS_ASCII_QCHAR(qc)){ unsigned originalLength = dataHandle[0]->_length; char insertChar = qc.unicode(); char *targetChars; // Ensure that we have enough space. setLength (originalLength + 1); targetChars = const_cast(ascii()); // Move tail to make space for inserted character. memmove (targetChars+index+1, targetChars+index, originalLength-index); // Insert character. targetChars[index] = insertChar; targetChars[dataHandle[0]->_length] = 0; dataHandle[0]->_isUnicodeValid = 0; } else { unsigned originalLength = dataHandle[0]->_length; forceUnicode(); // Ensure that we have enough space. setLength (originalLength + 1); DeprecatedChar *targetChars = const_cast(unicode()); // Move tail to make space for inserted character. memmove (targetChars+(index+1), targetChars+index, (originalLength-index)*sizeof(DeprecatedChar)); targetChars[index] = qc; } return *this; } DeprecatedString &DeprecatedString::insert(unsigned index, char ch) { detach(); if (dataHandle[0]->_isAsciiValid) { unsigned originalLength = dataHandle[0]->_length; char *targetChars; // Ensure that we have enough space. setLength (originalLength + 1); targetChars = const_cast(ascii()); // Move tail to make space for inserted character. memmove (targetChars+index+1, targetChars+index, originalLength-index); // Insert character. targetChars[index] = ch; targetChars[dataHandle[0]->_length] = 0; dataHandle[0]->_isUnicodeValid = 0; } else if (dataHandle[0]->_isUnicodeValid){ unsigned originalLength = dataHandle[0]->_length; DeprecatedChar *targetChars; // Ensure that we have enough space. setLength (originalLength + 1); targetChars = (DeprecatedChar *)unicode(); // Move tail to make space for inserted character. memmove (targetChars+(index+1), targetChars+index, (originalLength-index)*sizeof(DeprecatedChar)); targetChars[index] = (DeprecatedChar)ch; } else FATAL("invalid character cache"); return *this; } // Copy DeprecatedStringData if necessary. Must be called before the string data is mutated. void DeprecatedString::detach() { DeprecatedStringData *oldData = *dataHandle; if (oldData->refCount == 1 && oldData != shared_null) return; // Copy data for this string so we can safely mutate it. DeprecatedStringData *newData; if (oldData->_isAsciiValid) newData = new DeprecatedStringData(oldData->ascii(), oldData->_length); else newData = new DeprecatedStringData(oldData->unicode(), oldData->_length); newData->_isHeapAllocated = 1; // There is now one less client for the old data. oldData->deref(); // If the old data is our internal data, then we'll keep that. // This decreases the chance we'll have to do a detachInternal later // when this object is destroyed. if (oldData == &internalData) { newData->refCount = oldData->refCount; oldData->refCount = 1; *dataHandle = newData; newData = oldData; } // Create a new handle. dataHandle = allocateHandle(); *dataHandle = newData; } void DeprecatedString::detachAndDiscardCharacters() { // Missing optimization: Don't bother copying the old data if we detach. detach(); } DeprecatedString &DeprecatedString::remove(unsigned index, unsigned len) { unsigned olen = dataHandle[0]->_length; if ( index >= olen ) { // range problems } else if ( index + len >= olen ) { // index ok setLength( index ); } else if ( len != 0 ) { // Missing optimization: Could avoid copying characters we are going to remove // by making a special version of detach(). detach(); if (dataHandle[0]->_isAsciiValid){ memmove( dataHandle[0]->ascii()+index, dataHandle[0]->ascii()+index+len, sizeof(char)*(olen-index-len) ); setLength( olen-len ); dataHandle[0]->_isUnicodeValid = 0; } else if (dataHandle[0]->_isUnicodeValid){ memmove( dataHandle[0]->unicode()+index, dataHandle[0]->unicode()+index+len, sizeof(DeprecatedChar)*(olen-index-len) ); setLength( olen-len ); } else FATAL("invalid character cache"); } return *this; } DeprecatedString &DeprecatedString::replace(unsigned index, unsigned len, const DeprecatedString& str) { return remove(index, len).insert(index, str); } DeprecatedString &DeprecatedString::replace(char pattern, const DeprecatedString &str) { int slen = str.dataHandle[0]->_length; int index = 0; while ((index = find(pattern, index)) >= 0) { replace(index, 1, str); index += slen; } return *this; } DeprecatedString &DeprecatedString::replace(DeprecatedChar pattern, const DeprecatedString &str) { int slen = str.dataHandle[0]->_length; int index = 0; while ((index = find(pattern, index)) >= 0) { replace(index, 1, str); index += slen; } return *this; } DeprecatedString &DeprecatedString::replace(const DeprecatedString &pattern, const DeprecatedString &str) { if (pattern.isEmpty()) return *this; int plen = pattern.dataHandle[0]->_length; int slen = str.dataHandle[0]->_length; int index = 0; while ((index = find(pattern, index)) >= 0) { replace(index, plen, str); index += slen; } return *this; } DeprecatedString &DeprecatedString::replace(const RegularExpression &qre, const DeprecatedString &str) { if ( isEmpty() ) return *this; int index = 0; int slen = str.dataHandle[0]->_length; int len; while ( index < (int)dataHandle[0]->_length ) { index = qre.match( *this, index, &len); if ( index >= 0 ) { replace( index, len, str ); index += slen; if ( !len ) break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* } else break; } return *this; } DeprecatedString &DeprecatedString::replace(DeprecatedChar oldChar, DeprecatedChar newChar) { if (oldChar != newChar && find(oldChar) != -1) { unsigned length = dataHandle[0]->_length; detach(); if (dataHandle[0]->_isAsciiValid && IS_ASCII_QCHAR(newChar)) { char *p = const_cast(ascii()); dataHandle[0]->_isUnicodeValid = 0; char oldC = oldChar.unicode(); char newC = newChar.unicode(); for (unsigned i = 0; i != length; ++i) { if (p[i] == oldC) { p[i] = newC; } } } else { DeprecatedChar *p = const_cast(unicode()); dataHandle[0]->_isAsciiValid = 0; for (unsigned i = 0; i != length; ++i) { if (p[i] == oldChar) { p[i] = newChar; } } } } return *this; } DeprecatedChar *DeprecatedString::forceUnicode() { detach(); DeprecatedChar *result = const_cast(unicode()); dataHandle[0]->_isAsciiValid = 0; return result; } // Increase buffer size if necessary. Newly allocated // bytes will contain garbage. void DeprecatedString::setLength(unsigned newLen) { if (newLen == 0) { setUnicode(0, 0); return; } // Missing optimization: Could avoid copying characters we are going to remove // by making a special version of detach(). detach(); ASSERT(dataHandle != shared_null_handle); if (dataHandle[0]->_isAsciiValid){ if (newLen+1 > dataHandle[0]->_maxAscii) { dataHandle[0]->increaseAsciiSize(newLen+1); } // Ensure null termination, although newly allocated // bytes contain garbage. dataHandle[0]->_ascii[newLen] = 0; } if (dataHandle[0]->_isUnicodeValid){ if (newLen > dataHandle[0]->_maxUnicode) { dataHandle[0]->increaseUnicodeSize(newLen); } } dataHandle[0]->_length = newLen; } void DeprecatedString::truncate(unsigned newLen) { if ( newLen < dataHandle[0]->_length ) setLength( newLen ); } void DeprecatedString::fill(DeprecatedChar qc, int len) { detachAndDiscardCharacters(); // len == -1 means fill to string length. if (len < 0) { len = dataHandle[0]->_length; } if (len == 0) { if (dataHandle != shared_null_handle) { ASSERT(dataHandle[0]->refCount == 1); deref(); freeHandle(dataHandle); dataHandle = makeSharedNullHandle(); shared_null->ref(); } } else { if (dataHandle[0]->_isAsciiValid && IS_ASCII_QCHAR(qc)) { setLength(len); char *nd = const_cast(ascii()); while (len--) *nd++ = qc.unicode(); dataHandle[0]->_isUnicodeValid = 0; } else { setLength(len); DeprecatedChar *nd = forceUnicode(); while (len--) *nd++ = qc; } } } DeprecatedString &DeprecatedString::append(DeprecatedChar qc) { detach(); DeprecatedStringData *thisData = *dataHandle; if (thisData->_isUnicodeValid && thisData->_length + 1 < thisData->_maxUnicode){ thisData->_unicode[thisData->_length] = qc; thisData->_length++; thisData->_isAsciiValid = 0; return *this; } else if (thisData->_isAsciiValid && IS_ASCII_QCHAR(qc) && thisData->_length + 2 < thisData->_maxAscii){ thisData->_ascii[thisData->_length] = qc.unicode(); thisData->_length++; thisData->_ascii[thisData->_length] = 0; thisData->_isUnicodeValid = 0; return *this; } return insert(thisData->_length, qc); } DeprecatedString &DeprecatedString::append(char ch) { detach(); DeprecatedStringData *thisData = *dataHandle; if (thisData->_isUnicodeValid && thisData->_length + 1 < thisData->_maxUnicode){ thisData->_unicode[thisData->_length] = (DeprecatedChar)ch; thisData->_length++; thisData->_isAsciiValid = 0; return *this; } else if (thisData->_isAsciiValid && thisData->_length + 2 < thisData->_maxAscii){ thisData->_ascii[thisData->_length] = ch; thisData->_length++; thisData->_ascii[thisData->_length] = 0; thisData->_isUnicodeValid = 0; return *this; } return insert(thisData->_length, ch); } void DeprecatedString::reserve(unsigned length) { if (length > dataHandle[0]->_maxUnicode) { detach(); dataHandle[0]->increaseUnicodeSize(length); } } bool operator==(const DeprecatedString &s1, const DeprecatedString &s2) { if (s1.dataHandle[0]->_isAsciiValid && s2.dataHandle[0]->_isAsciiValid) { return strcmp(s1.ascii(), s2.ascii()) == 0; } return s1.dataHandle[0]->_length == s2.dataHandle[0]->_length && memcmp(s1.unicode(), s2.unicode(), s1.dataHandle[0]->_length * sizeof(DeprecatedChar)) == 0; } bool operator==(const DeprecatedString &s1, const char *chs) { if (!chs) return s1.isNull(); DeprecatedStringData *d = s1.dataHandle[0]; unsigned len = d->_length; if (d->_isAsciiValid) { const char *s = s1.ascii(); for (unsigned i = 0; i != len; ++i) { char c = chs[i]; if (!c || s[i] != c) return false; } } else { const DeprecatedChar *s = s1.unicode(); for (unsigned i = 0; i != len; ++i) { char c = chs[i]; if (!c || s[i] != c) return false; } } return chs[len] == '\0'; } DeprecatedString operator+(const DeprecatedString &qs1, const DeprecatedString &qs2) { return DeprecatedString(qs1) += qs2; } DeprecatedString operator+(const DeprecatedString &qs, const char *chs) { return DeprecatedString(qs) += chs; } DeprecatedString operator+(const DeprecatedString &qs, DeprecatedChar qc) { return DeprecatedString(qs) += qc; } DeprecatedString operator+(const DeprecatedString &qs, char ch) { return DeprecatedString(qs) += ch; } DeprecatedString operator+(const char *chs, const DeprecatedString &qs) { return DeprecatedString(chs) += qs; } DeprecatedString operator+(DeprecatedChar qc, const DeprecatedString &qs) { return DeprecatedString(qc) += qs; } DeprecatedString operator+(char ch, const DeprecatedString &qs) { return DeprecatedString(DeprecatedChar(ch)) += qs; } DeprecatedConstString::DeprecatedConstString(const DeprecatedChar* unicode, unsigned length) : DeprecatedString(new DeprecatedStringData((DeprecatedChar *)unicode, length, length), true) { } DeprecatedConstString::~DeprecatedConstString() { DeprecatedStringData *data = *dataHandle; if (data->refCount > 1) { DeprecatedChar *tp; if (data->_length <= WEBCORE_DS_INTERNAL_BUFFER_UCHARS) { data->_maxUnicode = WEBCORE_DS_INTERNAL_BUFFER_UCHARS; tp = (DeprecatedChar *)&data->_internalBuffer[0]; } else { data->_maxUnicode = ALLOC_QCHAR_GOOD_SIZE(data->_length); tp = WEBCORE_ALLOCATE_CHARACTERS(data->_maxUnicode); } memcpy(tp, data->_unicode, data->_length * sizeof(DeprecatedChar)); data->_unicode = tp; data->_isUnicodeValid = 1; data->_isAsciiValid = 0; } else { data->_unicode = 0; } } struct HandlePageNode { HandlePageNode *next; HandlePageNode *previous; void *nodes; }; struct HandleNode { union { struct { unsigned short next; unsigned short previous; } internalNode; HandleNode *freeNodes; // Always at block[0] in page. HandlePageNode *pageNode; // Always at block[1] in page void *handle; } type; }; #ifndef CHECK_FOR_HANDLE_LEAKS static const size_t pageSize = 4096; static const uintptr_t pageMask = ~(pageSize - 1); static const size_t nodeBlockSize = pageSize / sizeof(HandleNode); static HandleNode *initializeHandleNodeBlock(HandlePageNode *pageNode) { unsigned i; HandleNode* block; HandleNode* aNode; #if PLATFORM(WIN_OS) block = (HandleNode*)VirtualAlloc(0, pageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); #elif PLATFORM(SYMBIAN) // symbian::fixme needs to do page aligned allocation as valloc is not supported. block = NULL; #else block = (HandleNode*)valloc(pageSize); #endif for (i = 2; i < nodeBlockSize; i++) { aNode = &block[i]; if (i > 2) aNode->type.internalNode.previous = i-1; else aNode->type.internalNode.previous = 0; if (i != nodeBlockSize - 1) aNode->type.internalNode.next = i+1; else aNode->type.internalNode.next = 0; } block[0].type.freeNodes = &block[nodeBlockSize - 1]; block[1].type.pageNode = pageNode; return block; } static HandlePageNode *allocatePageNode() { HandlePageNode *node = (HandlePageNode *)fastMalloc(sizeof(HandlePageNode)); node->next = node->previous = 0; node->nodes = initializeHandleNodeBlock(node); return node; } static HandleNode *allocateNode(HandlePageNode *pageNode) { HandleNode *block = (HandleNode *)pageNode->nodes; HandleNode *freeNodes = block[0].type.freeNodes; HandleNode *allocated; // Check to see if we're out of nodes. if (freeNodes == 0) { FATAL("out of nodes"); return 0; } // Remove node from end of free list allocated = freeNodes; if (allocated->type.internalNode.previous >= 2) { block[0].type.freeNodes = block + allocated->type.internalNode.previous; block[0].type.freeNodes->type.internalNode.next = 0; } else { // Used last node on this page. block[0].type.freeNodes = 0; freeNodeAllocationPages = freeNodeAllocationPages->previous; if (freeNodeAllocationPages) freeNodeAllocationPages->next = 0; pageNode->previous = usedNodeAllocationPages; pageNode->next = 0; if (usedNodeAllocationPages) usedNodeAllocationPages->next = pageNode; usedNodeAllocationPages = pageNode; } return allocated; } #endif void freeHandle(DeprecatedStringData **_free) { #ifdef CHECK_FOR_HANDLE_LEAKS fastFree(_free); return; #else HandleNode *free = (HandleNode *)_free; HandleNode *base = (HandleNode *)((uintptr_t)free & pageMask); HandleNode *freeNodes = base[0].type.freeNodes; HandlePageNode *pageNode = base[1].type.pageNode; if (freeNodes == 0){ free->type.internalNode.previous = 0; } else { // Insert at head of free list. free->type.internalNode.previous = freeNodes - base; freeNodes->type.internalNode.next = free - base; } free->type.internalNode.next = 0; base[0].type.freeNodes = free; // Remove page from used/free list and place on free list if (freeNodeAllocationPages != pageNode) { if (pageNode->previous) pageNode->previous->next = pageNode->next; if (pageNode->next) pageNode->next->previous = pageNode->previous; if (usedNodeAllocationPages == pageNode) usedNodeAllocationPages = pageNode->previous; pageNode->previous = freeNodeAllocationPages; pageNode->next = 0; if (freeNodeAllocationPages) freeNodeAllocationPages->next = pageNode; freeNodeAllocationPages = pageNode; } #endif } DeprecatedString DeprecatedString::fromUtf8(const char *chs) { return UTF8Encoding().decode(chs, strlen(chs)).deprecatedString(); } DeprecatedString DeprecatedString::fromUtf8(const char *chs, int len) { return UTF8Encoding().decode(chs, len).deprecatedString(); } DeprecatedCString DeprecatedString::utf8(int& length) const { DeprecatedCString result = UTF8Encoding().encode((::UChar*)unicode(), this->length()).deprecatedCString(); length = result.length(); return result; } DeprecatedString::DeprecatedString(const Identifier& str) { if (str.isNull()) { internalData.deref(); dataHandle = makeSharedNullHandle(); dataHandle[0]->ref(); } else { dataHandle = allocateHandle(); *dataHandle = &internalData; internalData.initialize(reinterpret_cast(str.data()), str.size()); } } DeprecatedString::DeprecatedString(const UString& str) { if (str.isNull()) { internalData.deref(); dataHandle = makeSharedNullHandle(); dataHandle[0]->ref(); } else { dataHandle = allocateHandle(); *dataHandle = &internalData; internalData.initialize(reinterpret_cast(str.data()), str.size()); } } #if PLATFORM(QT) DeprecatedString::DeprecatedString(const QString& str) { if (str.isNull()) { internalData.deref(); dataHandle = makeSharedNullHandle(); dataHandle[0]->ref(); } else { dataHandle = allocateHandle(); *dataHandle = &internalData; internalData.initialize(reinterpret_cast(str.data()), str.length()); } } #endif DeprecatedString::operator Identifier() const { if (isNull()) return Identifier(); return Identifier(reinterpret_cast(unicode()), length()); } DeprecatedString::operator UString() const { if (isNull()) return UString(); return UString(reinterpret_cast(unicode()), length()); } bool equalIgnoringCase(const DeprecatedString& a, const DeprecatedString& b) { unsigned len = a.length(); if (len != b.length()) return false; DeprecatedStringData* dataA = a.dataHandle[0]; DeprecatedStringData* dataB = b.dataHandle[0]; if (dataA->_isAsciiValid != dataB->_isAsciiValid) return false; if (dataA->_isAsciiValid && dataB->_isAsciiValid) return strncasecmp(dataA->_ascii, dataB->_ascii, len) == 0; ASSERT(dataA->_isUnicodeValid); ASSERT(dataB->_isUnicodeValid); return equalCaseInsensitive(dataA->_unicode, dataB->_unicode, len); } } // namespace WebCore