/* * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "TextEncodingRegistry.h" #include "PlatformString.h" #include "TextCodecLatin1.h" #include "TextCodecUTF16.h" #include #include #include #if USE(ICU_UNICODE) #include "TextCodecICU.h" #endif #if PLATFORM(MAC) #include "TextCodecMac.h" #endif #if PLATFORM(QT) #include "qt/TextCodecQt.h" #endif namespace WebCore { const size_t maxEncodingNameLength = 63; // Hash for all-ASCII strings that does case folding and skips any characters // that are not alphanumeric. If passed any non-ASCII characters, depends on // the behavior of isalnum -- if that returns false as it does on OS X, then // it will properly skip those characters too. struct TextEncodingNameHash { // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's // or anything like that. static const unsigned PHI = 0x9e3779b9U; static bool equal(const char* s1, const char* s2) { char c1; char c2; do { do c1 = *s1++; while (c1 && !isalnum(c1)); do c2 = *s2++; while (c2 && !isalnum(c2)); if (tolower(c1) != tolower(c2)) return false; } while (c1 && c2); return !c1 && !c2; } // This algorithm is the one-at-a-time hash from: // http://burtleburtle.net/bob/hash/hashfaq.html // http://burtleburtle.net/bob/hash/doobs.html static unsigned hash(const char* s) { unsigned h = PHI; for (;;) { char c; do { c = *s++; if (!c) { h += (h << 3); h ^= (h >> 11); h += (h << 15); return h; } } while (!isalnum(c)); h += tolower(c); h += (h << 10); h ^= (h >> 6); } } }; struct TextCodecFactory { NewTextCodecFunction function; const void* additionalData; TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { } }; typedef HashMap TextEncodingNameMap; typedef HashMap TextCodecMap; static TextEncodingNameMap* textEncodingNameMap; static TextCodecMap* textCodecMap; static bool didExtendTextCodecMaps; #if ERROR_DISABLED static inline void checkExistingName(const char*, const char*) { } #else static void checkExistingName(const char* alias, const char* atomicName) { const char* oldAtomicName = textEncodingNameMap->get(alias); if (!oldAtomicName) return; if (oldAtomicName == atomicName) return; // Keep the warning silent about one case where we know this will happen. if (strcmp(alias, "ISO-8859-8-I") == 0 && strcmp(oldAtomicName, "ISO-8859-8-I") == 0 && strcmp(atomicName, "ISO_8859-8:1988") == 0) return; LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName); } #endif static void addToTextEncodingNameMap(const char* alias, const char* name) { ASSERT(strlen(alias) <= maxEncodingNameLength); const char* atomicName = textEncodingNameMap->get(name); ASSERT(strcmp(alias, name) == 0 || atomicName); if (!atomicName) atomicName = name; checkExistingName(alias, atomicName); textEncodingNameMap->add(alias, atomicName); } static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData) { TextEncoding encoding(name); ASSERT(encoding.isValid()); textCodecMap->add(encoding.name(), TextCodecFactory(function, additionalData)); } static void buildBaseTextCodecMaps() { textCodecMap = new TextCodecMap; textEncodingNameMap = new TextEncodingNameMap; TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap); TextCodecLatin1::registerCodecs(addToTextCodecMap); TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap); TextCodecUTF16::registerCodecs(addToTextCodecMap); #if USE(ICU_UNICODE) TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap); TextCodecICU::registerBaseCodecs(addToTextCodecMap); #endif } static void extendTextCodecMaps() { #if USE(ICU_UNICODE) TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap); TextCodecICU::registerExtendedCodecs(addToTextCodecMap); #endif #if USE(QT4_UNICODE) TextCodecQt::registerEncodingNames(addToTextEncodingNameMap); TextCodecQt::registerCodecs(addToTextCodecMap); #endif #if PLATFORM(MAC) TextCodecMac::registerEncodingNames(addToTextEncodingNameMap); TextCodecMac::registerCodecs(addToTextCodecMap); #endif } std::auto_ptr newTextCodec(const TextEncoding& encoding) { ASSERT(textCodecMap); TextCodecFactory factory = textCodecMap->get(encoding.name()); ASSERT(factory.function); return factory.function(encoding, factory.additionalData); } const char* atomicCanonicalTextEncodingName(const char* name) { if (!name || !name[0]) return 0; if (!textEncodingNameMap) buildBaseTextCodecMaps(); if (const char* atomicName = textEncodingNameMap->get(name)) return atomicName; if (didExtendTextCodecMaps) return 0; extendTextCodecMaps(); didExtendTextCodecMaps = true; return textEncodingNameMap->get(name); } const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length) { char buffer[maxEncodingNameLength + 1]; size_t j = 0; for (size_t i = 0; i < length; ++i) { UChar c = characters[i]; if (isalnum(c)) { if (j == maxEncodingNameLength) return 0; buffer[j++] = c; } } buffer[j] = 0; return atomicCanonicalTextEncodingName(buffer); } bool noExtendedTextEncodingNameUsed() { return !didExtendTextCodecMaps; } } // namespace WebCore