00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include "data/string.hpp"
00035
00036 #include "data/pointer.hpp"
00037 #include "data/shared.hpp"
00038 #include "data/array.hpp"
00039 #include "data/list.hpp"
00040 #include "data/stream.hpp"
00041
00042 #include <cstdlib>
00043 #include <cstdarg>
00044 #include <cwchar>
00045
00046 namespace gsgl
00047 {
00048
00049 class DATA_API string_impl
00050 : public data::shared_object
00051 {
00052 enum
00053 {
00054 STRING_IMPL_OLD_CDATA = 1 << 0,
00055 STRING_IMPL_OLD_PDATA = 1 << 1,
00056 STRING_IMPL_MODIFIED = ~0
00057 };
00058
00059 mutable gsgl::flags_t modified_flags;
00060
00061 data::simple_array<wchar_t> w_data;
00062 mutable data::simple_array<char> c_data;
00063 mutable data::simple_array<unsigned char> p_data;
00064
00065 friend class gsgl::string;
00066
00067 public:
00068 string_impl();
00069 string_impl(const string_impl &);
00070 explicit string_impl(const wchar_t *);
00071
00072 string_impl & operator= (const string_impl &);
00073
00074 virtual ~string_impl();
00075
00076
00077
00078 void set_modified() { modified_flags |= STRING_IMPL_MODIFIED; }
00079
00080 const wchar_t *get_w_string() const;
00081 const char *get_c_string() const;
00082 const unsigned char *get_p_string() const;
00083 };
00084
00085
00086 string_impl::string_impl()
00087 : shared_object(), modified_flags(STRING_IMPL_MODIFIED)
00088 {
00089 w_data.append(0);
00090 }
00091
00092
00093 string_impl::string_impl(const string_impl & si)
00094 : shared_object(), modified_flags(STRING_IMPL_MODIFIED)
00095 {
00096 w_data = si.w_data;
00097 }
00098
00099
00100 string_impl::string_impl(const wchar_t *str)
00101 : shared_object(), modified_flags(STRING_IMPL_MODIFIED)
00102 {
00103 if (str)
00104 {
00105 size_t len = ::wcslen(str);
00106 w_data[static_cast<gsgl::index_t>(len)] = 0;
00107 ::memcpy(w_data.ptr(), str, sizeof(wchar_t) * len);
00108 }
00109 else
00110 {
00111 w_data.append(0);
00112 }
00113 }
00114
00115
00116 string_impl::~string_impl()
00117 {
00118 }
00119
00120
00121 const wchar_t *string_impl::get_w_string() const
00122 {
00123 return w_data.ptr();
00124 }
00125
00126
00127 const char *string_impl::get_c_string() const
00128 {
00129 if (modified_flags & STRING_IMPL_OLD_CDATA)
00130 {
00131 c_data.clear();
00132 for (const wchar_t *ch = w_data.ptr(); *ch; ++ch)
00133 c_data.append(static_cast<char>(*ch));
00134 c_data.append(0);
00135
00136 modified_flags &= ~STRING_IMPL_OLD_CDATA;
00137 }
00138
00139 return c_data.ptr();
00140 }
00141
00142
00143 const unsigned char *string_impl::get_p_string() const
00144 {
00145 if (modified_flags & STRING_IMPL_OLD_PDATA)
00146 {
00147 p_data.clear();
00148 p_data.append(0);
00149
00150 int count = 0;
00151
00152 for (const wchar_t *ch = w_data.ptr(); *ch; ++ch)
00153 {
00154 if (count++ < 255)
00155 p_data.append(static_cast<char>(*ch));
00156 else
00157 break;
00158 }
00159
00160 p_data[0] = static_cast<char>(count);
00161 modified_flags &= ~STRING_IMPL_OLD_PDATA;
00162 }
00163
00164 return p_data.ptr();
00165 }
00166
00167
00168
00169 #ifdef DEBUG
00170
00171 static const wchar_t *STATIC_NULL_STRING = L"<null string>";
00172 static const wchar_t *STATIC_INVALID_STRING = L"<INVALID STRING>";
00173
00174 #define ASSIGN_DEBUG_PTR() \
00175 { \
00176 switch (mode) \
00177 { \
00178 case STRING_NULL: \
00179 wchar_ptr = STATIC_NULL_STRING; \
00180 break; \
00181 case STRING_CONST_REF: \
00182 wchar_ptr = ref; \
00183 break; \
00184 case STRING_SHARED_IMPL: \
00185 wchar_ptr = impl->get_w_string(); \
00186 break; \
00187 default: \
00188 wchar_ptr = STATIC_INVALID_STRING; \
00189 break; \
00190 } \
00191 }
00192
00193 #else
00194 #define ASSIGN_DEBUG_PTR()
00195 #endif
00196
00197
00198 const string string::EMPTY_STRING = L"";
00199
00200
00201 string::string()
00202 : data_object(),
00203 comparable(),
00204 iterable<wchar_t, string_iterator>(),
00205 indexable<wchar_t, gsgl::index_t>(),
00206 io::printable(),
00207 io::serializable(),
00208 mode(STRING_NULL),
00209 ref(0)
00210 {
00211 ASSIGN_DEBUG_PTR();
00212 }
00213
00214
00215 string::string(const string & s)
00216 : data_object(),
00217 comparable(),
00218 iterable<wchar_t, string_iterator>(),
00219 indexable<wchar_t, gsgl::index_t>(),
00220 io::printable(),
00221 io::serializable(),
00222 mode(STRING_NULL),
00223 ref(0)
00224 {
00225 *this = s;
00226
00227 ASSIGN_DEBUG_PTR();
00228 }
00229
00230
00231 string::string(wchar_t *str)
00232 : data_object(),
00233 comparable(),
00234 iterable<wchar_t, string_iterator>(),
00235 indexable<wchar_t, gsgl::index_t>(),
00236 io::printable(),
00237 io::serializable(),
00238 mode(str ? STRING_CONST_REF : STRING_NULL),
00239 ref(str)
00240 {
00241 unshare();
00242
00243 ASSIGN_DEBUG_PTR();
00244 }
00245
00246
00247 string::string(const wchar_t *str)
00248 : data_object(),
00249 comparable(),
00250 iterable<wchar_t, string_iterator>(),
00251 indexable<wchar_t, gsgl::index_t>(),
00252 io::printable(),
00253 io::serializable(),
00254 mode(str ? STRING_CONST_REF : STRING_NULL),
00255 ref(str)
00256 {
00257 ASSIGN_DEBUG_PTR();
00258 }
00259
00260
00261
00262 string::string(const char *str)
00263 : data_object(),
00264 comparable(),
00265 iterable<wchar_t, string_iterator>(),
00266 indexable<wchar_t, gsgl::index_t>(),
00267 io::printable(),
00268 io::serializable(),
00269 mode(STRING_NULL),
00270 ref(0)
00271 {
00272 if (str)
00273 {
00274 data::simple_array<wchar_t> buf;
00275 for (const char *ch = str; *ch; ++ch)
00276 buf.append(static_cast<wchar_t>(*ch));
00277 buf.append(0);
00278
00279 mode = STRING_SHARED_IMPL;
00280 impl = new string_impl(buf.ptr());
00281 impl->attach();
00282
00283 }
00284
00285 ASSIGN_DEBUG_PTR();
00286 }
00287
00288
00289 string & string::operator= (wchar_t *str)
00290 {
00291 make_null();
00292
00293 mode = str ? STRING_CONST_REF : STRING_NULL;
00294 ref = str;
00295
00296 unshare();
00297
00298 ASSIGN_DEBUG_PTR();
00299
00300 return *this;
00301 };
00302
00303
00304 string & string::operator= (const wchar_t *str)
00305 {
00306 make_null();
00307
00308 mode = str ? STRING_CONST_REF : STRING_NULL;
00309 ref = str;
00310
00311 ASSIGN_DEBUG_PTR();
00312
00313 return *this;
00314 }
00315
00316
00317 string & string::operator= (const string & s)
00318 {
00319 make_null();
00320
00321 switch (s.mode)
00322 {
00323 case STRING_NULL:
00324 break;
00325 case STRING_CONST_REF:
00326 assert(s.ref);
00327 mode = STRING_CONST_REF;
00328 ref = s.ref;
00329 break;
00330 case STRING_SHARED_IMPL:
00331 assert(s.impl);
00332 mode = STRING_SHARED_IMPL;
00333 impl = s.impl;
00334 impl->attach();
00335 break;
00336 default:
00337 throw internal_exception(__FILE__, __LINE__, L"can't happen in string::operator= ()");
00338 }
00339
00340 ASSIGN_DEBUG_PTR();
00341
00342 return *this;
00343 }
00344
00345
00346 string::~string()
00347 {
00348 make_null();
00349 mode = STRING_INVALID;
00350
00351 ASSIGN_DEBUG_PTR();
00352 }
00353
00354
00355
00356
00357 gsgl::index_t string::size() const
00358 {
00359 switch (mode)
00360 {
00361 case STRING_NULL:
00362 return 0;
00363 case STRING_CONST_REF:
00364 return static_cast<gsgl::index_t>(::wcslen(ref));
00365 case STRING_SHARED_IMPL:
00366 assert(impl);
00367 return impl->w_data.size() - 1;
00368 default:
00369 throw internal_exception(__FILE__, __LINE__, L"can't happen in string::size()");
00370 }
00371 }
00372
00373
00374 void string::clear()
00375 {
00376 switch (mode)
00377 {
00378 case STRING_NULL:
00379 case STRING_CONST_REF:
00380 make_null();
00381 break;
00382 case STRING_SHARED_IMPL:
00383 unshare();
00384 impl->set_modified();
00385 impl->w_data.clear();
00386 impl->w_data.append(0);
00387 break;
00388 default:
00389 throw internal_exception(__FILE__, __LINE__, L"can't happen in string::clear()");
00390 }
00391
00392 ASSIGN_DEBUG_PTR();
00393 }
00394
00395
00396
00397
00398 void string::append(const wchar_t & ch)
00399 {
00400 unshare();
00401 impl->set_modified();
00402 impl->w_data[impl->w_data.size()-1] = ch;
00403 impl->w_data.append(0);
00404 ASSIGN_DEBUG_PTR();
00405 }
00406
00407
00408 void string::append(const wchar_t *str)
00409 {
00410 if (str && *str)
00411 {
00412 unshare();
00413 impl->set_modified();
00414
00415 size_t la = size();
00416 size_t lb = ::wcslen(str);
00417 impl->w_data[static_cast<gsgl::index_t>(la + lb)] = 0;
00418 ::memcpy(impl->w_data.ptr() + la, str, sizeof(wchar_t) * lb);
00419 }
00420 ASSIGN_DEBUG_PTR();
00421 }
00422
00423
00424 void string::append(const string & str)
00425 {
00426 append(str.w_string());
00427 }
00428
00429
00430 void string::insert(const iterator & i, const wchar_t & ch)
00431 {
00432 assert(impl);
00433
00434 unshare();
00435 impl->set_modified();
00436 impl->w_data.insert(ch, i.position);
00437 }
00438
00439
00440 void string::remove(const iterator & i)
00441 {
00442 assert(impl);
00443
00444 unshare();
00445 impl->set_modified();
00446 impl->w_data.remove(i.position);
00447 }
00448
00449
00450
00451
00452 const wchar_t & string::item(const gsgl::index_t & index) const
00453 {
00454 switch (mode)
00455 {
00456 case STRING_NULL:
00457 throw memory_exception(__FILE__, __LINE__, L"bounds error in string.");
00458 case STRING_CONST_REF:
00459 if (index < static_cast<gsgl::index_t>(::wcslen(ref)))
00460 return ref[index];
00461 else
00462 throw memory_exception(__FILE__, __LINE__, L"bounds error in string");
00463 default:
00464 assert(impl);
00465
00466 try
00467 {
00468 return impl->w_data.item(index);
00469 }
00470 catch (memory_exception &)
00471 {
00472 throw memory_exception(__FILE__, __LINE__, L"bounds error in string");
00473 }
00474 }
00475 }
00476
00477
00478 wchar_t & string::item(const gsgl::index_t & index)
00479 {
00480 unshare();
00481
00482 if (index < size())
00483 return impl->w_data.item(index);
00484 else
00485 throw memory_exception(__FILE__, __LINE__, L"bounds error in string");
00486 }
00487
00488
00489 bool string::contains_index(const gsgl::index_t & index) const
00490 {
00491 return index < size();
00492 }
00493
00494
00495
00496
00497 int string::compare(const comparable & c) const
00498 {
00499 const string *cp = dynamic_cast<const string *>(&c);
00500 if (cp)
00501 {
00502 return compare(cp->w_string());
00503 }
00504 else
00505 {
00506 throw internal_exception(__FILE__, __LINE__, L"attempted to compare string with non-string");
00507 }
00508 }
00509
00510
00511 int string::compare(const wchar_t *str) const
00512 {
00513 const wchar_t *a = w_string();
00514 const wchar_t *b = str;
00515
00516 while (a && b && *a && *b)
00517 {
00518 if (*a != *b)
00519 return (static_cast<int>(*a)) - (static_cast<int>(*b));
00520 a++; b++;
00521 }
00522
00523 if (a && *a)
00524 return 1;
00525 else if (b && *b)
00526 return -1;
00527 else
00528 return 0;
00529 }
00530
00531
00532
00533
00534 void string::to_stream(io::text_stream & s) const
00535 {
00536 s << w_string();
00537 }
00538
00539
00540 void string::from_stream(io::text_stream & s)
00541 {
00542 clear();
00543
00544 for (wchar_t ch = s.get(); ch != WEOF; ch = s.get())
00545 {
00546 if (ch == L'\r')
00547 {
00548 ch = L'\n';
00549 if (s.peek() == L'\n')
00550 s.get();
00551 }
00552
00553 if (ch == L'\n')
00554 break;
00555 else
00556 append(ch);
00557 }
00558
00559 ASSIGN_DEBUG_PTR();
00560 }
00561
00562
00563
00564
00565 void string::to_stream(io::data_stream & s) const
00566 {
00567 gsgl::index_t sz = size();
00568 s << sz;
00569 s.write(reinterpret_cast<const unsigned char *>(w_string()), sizeof(wchar_t) * sz);
00570 };
00571
00572
00573 void string::from_stream(io::data_stream & s)
00574 {
00575 gsgl::index_t sz;
00576 s >> sz;
00577 data::smart_pointer<wchar_t, true> buf(new wchar_t[sz+1]);
00578 s.read(reinterpret_cast<unsigned char *>(buf.ptr()), sizeof(wchar_t) * sz);
00579 buf[sz] = 0;
00580
00581 *this = buf;
00582
00583 ASSIGN_DEBUG_PTR();
00584 };
00585
00586
00587
00588
00589 static const wchar_t *NULL_STRING_LITERAL = L"";
00590
00591 const wchar_t *string::w_string() const
00592 {
00593 switch (mode)
00594 {
00595 case STRING_NULL:
00596 return NULL_STRING_LITERAL;
00597 case STRING_CONST_REF:
00598 return ref;
00599 case STRING_SHARED_IMPL:
00600 assert(impl);
00601 return impl->get_w_string();
00602 default:
00603 throw internal_exception(__FILE__, __LINE__, L"can't happen in string::w_string()");
00604 }
00605 }
00606
00607
00608 const char *string::c_string() const
00609 {
00610 unshare();
00611 return impl->get_c_string();
00612 }
00613
00614
00615 const unsigned char *string::p_string() const
00616 {
00617 unshare();
00618 return impl->get_p_string();
00619 }
00620
00621
00622
00623
00624 string & string::operator+= (const wchar_t & ch)
00625 {
00626 append(ch);
00627 return *this;
00628 }
00629
00630
00631 string & string::operator+= (const wchar_t *str)
00632 {
00633 append(str);
00634 return *this;
00635 }
00636
00637
00638 string & string::operator+= (const string & str)
00639 {
00640 append(str.w_string());
00641 return *this;
00642 }
00643
00644
00645 string string::operator+ (const wchar_t & ch) const
00646 {
00647 string res(*this);
00648 res.append(ch);
00649 return res;
00650 }
00651
00652
00653 string string::operator+ (const wchar_t *str) const
00654 {
00655 string res(*this);
00656 res.append(str);
00657 return res;
00658 }
00659
00660
00661 string string::operator+ (const string & str) const
00662 {
00663 string res(*this);
00664 res.append(str.w_string());
00665 return res;
00666 }
00667
00668
00669
00670
00671 string string::substring(const gsgl::index_t index, const gsgl::index_t len) const
00672 {
00673 const gsgl::index_t sz = size();
00674 gsgl::index_t length = len;
00675 string res;
00676
00677 if (length == 0 || index >= sz)
00678 return res;
00679 if (length == -1 || index + length > sz)
00680 length = sz - index;
00681
00682 res.unshare();
00683 res.impl->set_modified();
00684 res.impl->w_data[length] = 0;
00685 ::memcpy(res.impl->w_data.ptr(), w_string() + index, sizeof(wchar_t) * length);
00686
00687 return res;
00688 }
00689
00690
00691 string string::left_substring(const gsgl::index_t length) const
00692 {
00693 return substring(0, length);
00694 }
00695
00696
00697 string string::right_substring(const gsgl::index_t length) const
00698 {
00699 if (length == 0)
00700 return string();
00701
00702 const gsgl::index_t sz = size();
00703
00704 if (length >= sz)
00705 return *this;
00706
00707 return substring(sz - length);
00708 }
00709
00710
00711 gsgl::index_t string::find(const wchar_t *substr, const gsgl::index_t index) const
00712 {
00713 const wchar_t *ptr = w_string();
00714 const wchar_t *found_ptr = ::wcswcs(ptr + index, substr);
00715 return found_ptr ? static_cast<gsgl::index_t>(found_ptr - ptr) : -1;
00716 }
00717
00718
00719 gsgl::index_t string::find(const string & substr, const gsgl::index_t index) const
00720 {
00721 return find(substr.w_string(), index);
00722 }
00723
00724
00725 gsgl::index_t string::find_reverse(const wchar_t *substr, const gsgl::index_t index) const
00726 {
00727 const wchar_t *ptr = w_string();
00728 const int this_len = static_cast<const int>(::wcslen(ptr));
00729 const int str_len = static_cast<const int>(::wcslen(substr));
00730
00731 for (int i = index != -1 ? index : this_len; i > 0; --i)
00732 {
00733 int j;
00734 for (j = 0; j < str_len; ++j)
00735 {
00736 int this_pos = i+j-1;
00737
00738 if (this_pos > this_len || ptr[this_pos] != substr[j])
00739 break;
00740 }
00741
00742 if (j == str_len)
00743 return i;
00744 }
00745
00746 return -1;
00747 }
00748
00749
00750 gsgl::index_t string::find_reverse(const string & substr, const gsgl::index_t index) const
00751 {
00752 return find_reverse(substr.w_string(), index);
00753 }
00754
00755
00756
00757
00758 string string::copy()
00759 {
00760 string result = *this;
00761 result.unshare();
00762 return result;
00763 }
00764
00765
00766 string & string::trim()
00767 {
00768
00769 gsgl::index_t sz = size();
00770
00771 if (sz)
00772 {
00773 data::smart_pointer<wchar_t, true> buf(new wchar_t[sz+1]);
00774 ::memcpy(buf, w_string(), sizeof(wchar_t) * (sz+1));
00775
00776 wchar_t *start = buf;
00777 wchar_t *end = buf + (sz-1);
00778
00779 while (*start && ::iswspace(*start))
00780 start++;
00781
00782 while (end > start && ::iswspace(*end))
00783 *end-- = 0;
00784
00785 *this = start;
00786 }
00787
00788 ASSIGN_DEBUG_PTR();
00789
00790 return *this;
00791 }
00792
00793
00794 string & string::make_upper()
00795 {
00796 unshare();
00797
00798 for (wchar_t *ch = impl->w_data.ptr(); ch && *ch; ++ch)
00799 {
00800 if (*ch >= L'a' && *ch <= L'z')
00801 *ch -= 32;
00802 }
00803
00804 ASSIGN_DEBUG_PTR();
00805
00806 return *this;
00807 }
00808
00809
00810 string & string::make_lower()
00811 {
00812 unshare();
00813
00814 for (wchar_t *ch = impl->w_data.ptr(); ch && *ch; ++ch)
00815 {
00816 if (*ch >= L'A' && *ch <= L'Z')
00817 *ch += 32;
00818 }
00819
00820 ASSIGN_DEBUG_PTR();
00821
00822 return *this;
00823 }
00824
00825
00826 bool string::to_bool() const
00827 {
00828 wchar_t *ch = impl->w_data.ptr();
00829 while (ch && *ch && ::iswspace(*ch))
00830 ++ch;
00831
00832 return ch && (*ch == L't' || *ch == L'T');
00833 }
00834
00835
00836 int string::to_int() const
00837 {
00838 int res = 0;
00839 if (size())
00840 ::swscanf(w_string(), L"%d", &res);
00841 return res;
00842 }
00843
00844
00845 double string::to_double() const
00846 {
00847 double res = 0.0f;
00848 if (size())
00849 ::swscanf(w_string(), L"%lf", &res);
00850 return res;
00851 }
00852
00853
00854 data::list<string> string::split(const string & sep) const
00855 {
00856 return split(sep.w_string());
00857 }
00858
00859
00860 data::list<string> string::split(const wchar_t *sep) const
00861 {
00862 data::list<string> tokens;
00863 const gsgl::index_t sz = size();
00864
00865 if (sz)
00866 {
00867 data::smart_pointer<wchar_t, true> buf(new wchar_t[sz+1]);
00868 ::memcpy(buf, w_string(), sizeof(wchar_t) * (sz+1));
00869
00870 wchar_t *start, *cur;
00871 start = cur = buf;
00872
00873 while (*cur)
00874 {
00875 bool in_sep = false;
00876 for (const wchar_t *ss = sep; *ss; ++ss)
00877 if ((in_sep = (*cur == *ss)))
00878 break;
00879
00880 if (in_sep || sep[0] == 0)
00881 {
00882 *cur = 0;
00883 tokens.append(string(start));
00884 start = cur+1;
00885 }
00886 cur++;
00887 }
00888
00889 if (*start)
00890 {
00891 tokens.append(string(start));
00892 }
00893 }
00894
00895 return tokens;
00896 }
00897
00898
00899 string string::format(const string & format, ...)
00900 {
00901 const gsgl::index_t len = (format.size() + 64) * 4;
00902
00903 data::smart_pointer<wchar_t, true> buf(new wchar_t[len+1]);
00904
00905 va_list ap;
00906 va_start(ap, format);
00907 ::vswprintf(buf, len, format.w_string(), ap);
00908 va_end(ap);
00909
00910 return string(buf);
00911 }
00912
00913
00914 string string::format(const wchar_t *format, ...)
00915 {
00916 const gsgl::index_t len = (static_cast<gsgl::index_t>(::wcslen(format)) + 64) * 4;
00917
00918 data::smart_pointer<wchar_t, true> buf(new wchar_t[len+1]);
00919
00920 va_list ap;
00921 va_start(ap, format);
00922 ::vswprintf_s(buf, len, format, ap);
00923 va_end(ap);
00924
00925 return string(buf);
00926 }
00927
00928
00929
00930
00931 void string::make_null()
00932 {
00933 if (mode == STRING_SHARED_IMPL)
00934 {
00935 assert(impl);
00936 impl->detach();
00937 }
00938
00939 mode = STRING_NULL;
00940 ref = 0;
00941
00942 ASSIGN_DEBUG_PTR();
00943 }
00944
00945
00946 void string::unshare() const
00947 {
00948 switch (mode)
00949 {
00950 case STRING_NULL:
00951 case STRING_CONST_REF:
00952 mode = STRING_SHARED_IMPL;
00953 impl = new string_impl(ref);
00954 impl->attach();
00955 break;
00956 case STRING_SHARED_IMPL:
00957 assert(impl);
00958 assert(impl->get_ref_count() > 0);
00959
00960 if (impl->get_ref_count() > 1)
00961 {
00962 string_impl *copy = new string_impl(*impl);
00963 copy->attach();
00964
00965 impl->detach();
00966 impl = copy;
00967 }
00968 break;
00969 default:
00970 throw internal_exception(__FILE__, __LINE__, L"can't happen in string::unshare()");
00971 }
00972
00973 assert(impl);
00974 ASSIGN_DEBUG_PTR();
00975 }
00976
00977
00978 }