1 /** 2 * Text translation framework. 3 * 4 * 5 * This library aims to facilitate native language support in applications and 6 * libraries written in D. String resources containing natural language _text 7 * are read from XML documents, called $(I catalogs), supplied at compile-time. 8 * In source code, string resources are referenced with the $(MREF strings) 9 * interface. The languages to use are configured automatically at the start 10 * of the program based on the running user's environment, before the $(D main) 11 * function is entered. 12 * 13 * $(SECTION2 Catalogs) 14 * There are two kinds of catalogs: the singular $(I primary catalog) and one 15 * $(I translation catalog) for each translation. Each catalog is an XML 16 * document made visible to the framework as a string import. 17 * 18 * $(SECTION2 Primary Catalog) 19 * The primary catalog is loaded from the string import $(D i18n/strings.xml) 20 * and specifies the primary table of string resources, which is used when: 21 * $(UL 22 * $(LI The language of the primary table matches that of the user's 23 * preferred language) 24 * $(LI A translation catalog for the user's preferred language is not 25 * supplied) 26 * $(LI A translation catalog for the user's preferred language is supplied, 27 * but does not contain a translation for the particular string being looked 28 * up) 29 * $(LI Internationalization is disabled)) 30 * The primary catalog must have the following structure: 31 ---- 32 $(LESS)?xml version="1.0" encoding="utf-8"?$(GREATER) 33 $(LESS)resources language="primary_catalog_language"$(GREATER) 34 $(LESS)translation language="translation1"/$(GREATER) 35 $(LESS)translation language="translation2"/$(GREATER) 36 $(LESS)translation language="..."/$(GREATER) 37 $(LESS)string name="id1"$(GREATER)text1$(LESS)/string$(GREATER) 38 $(LESS)string name="id2"$(GREATER)text2$(LESS)/string$(GREATER) 39 $(LESS)string name="..."$(GREATER)...$(LESS)/string$(GREATER) 40 $(LESS)/resources$(GREATER) 41 ---- 42 * For the primary catalog, the root element's $(D language) attribute is 43 * required and contains the language used in the primary catalog. 44 * Each $(D translation) element declares that a translation catalog for the 45 * given language is supplied and should be loaded. 46 * All $(D language) attributes are ISO-639 language codes. 47 * Each $(D string) element defines a string resource, where the $(D name) 48 * attribute is the resource identifier, and the element's content is the 49 * resource _text. 50 * 51 * $(SECTION2 Translation Catalogs) 52 * Translation catalogs are loaded as string imports from 53 * $(D i18n/strings.$(I ll).xml) where $(D $(I ll)) is the ISO-639 language code 54 * for the language translation provided within the document. Each translation 55 * must be enumerated in the primary catalog with the $(D translation) tag. 56 * 57 * The structure of translation catalogs is a subset of the structure of the 58 * primary catalog: 59 ---- 60 $(LESS)?xml version="1.0" encoding="utf-8"?$(GREATER) 61 $(LESS)resources$(GREATER) 62 $(LESS)string name="id1"$(GREATER)text1$(LESS)/string$(GREATER) 63 $(LESS)string name="id2"$(GREATER)text2$(LESS)/string$(GREATER) 64 $(LESS)string name="..."$(GREATER)...$(LESS)/string$(GREATER) 65 $(LESS)/resources$(GREATER) 66 ---- 67 * Each $(D string) element provides a translation of the string resource 68 * with the given identifier. The identifier must match the identifier 69 * of a string resource in the primary catalog. 70 * 71 * $(SECTION2 String References) 72 * In source code, string resources are referenced with the $(MREF strings) interface: 73 ------ 74 void main() { 75 import std.stdio, i18n.text; 76 // Writes the string resource with the identifier "hello_world" to stdout 77 writeln(strings.hello_world); 78 } 79 ------ 80 * $(SECTION2 Language Selection) 81 * Platform-specific standards are used for selecting the preferred language. 82 * On POSIX systems, this is the POSIX standard of using environment variables, 83 * including the fallback/priority syntax supported by $(I gettext). See 84 * $(HTTPS www.gnu.org/software/gettext/manual/html_node/Setting-the-POSIX-Locale.html#Setting-the-POSIX-Locale, 85 * gettext's documentation on setting the POSIX locale). 86 * 87 * $(SECTION2 Version Identifiers) 88 * The behavior of this module can be configured with version identifiers. 89 * $(UL 90 * $(LI $(I i18n_list_references): source code locations of string references 91 * will be output during compilation) 92 * $(LI $(I i18n_use_utf32): string resources are encoded in UTF-32 by default) 93 * $(LI $(I i18n_use_utf16): string resources are encoded in UTF-16 by default) 94 * ) 95 * See_Also: 96 * $(I gettext)'s advice on $(HTTPS www.gnu.org/software/gettext/manual/gettext.html#Preparing-Strings, separating strings) 97 * and $(HTTPS www.gnu.org/software/gettext/manual/gettext.html#Names, translating proper names) 98 * Macros: 99 * SECTION2=<h3>$1</h3> 100 */ 101 module i18n.text; 102 103 private: 104 struct StringTable 105 { 106 struct StringResource 107 { 108 string id; 109 string content; 110 } 111 112 string language; 113 StringResource[] strings; 114 115 string lookup(string id) const pure nothrow @safe @nogc 116 { 117 import std.range : assumeSorted; 118 119 auto lookup = strings.assumeSorted!"a.id < b.id" 120 .equalRange(StringResource(id, null)); 121 122 return lookup.empty? null : lookup.front.content; 123 } 124 } 125 126 struct Catalog 127 { 128 struct Translation 129 { 130 string language, path; 131 } 132 Translation[] translations; 133 StringTable table; 134 } 135 136 Catalog parseCatalog(string language, string source, StringTable parent) 137 { 138 // Only parser for a human-readable format I could find 139 // that works at compile-time 140 import arsd.dom; 141 import std.algorithm.sorting : sort; 142 import std.exception : enforce; 143 import std.format : format; 144 import std.path : buildPath; 145 146 immutable isPrimaryCatalog = language.length == 0; 147 auto document = new Document(source, true, true); 148 149 Element root; 150 foreach(elem; document["resources"]) 151 { 152 assert(root is null); 153 root = elem; 154 } 155 enforce(root, "root element must be `resources`"); 156 157 if(isPrimaryCatalog) 158 { 159 enforce(root.hasAttribute("language"), 160 "primary message catalog must have `language` attribute"); 161 language = root.getAttribute("language"); 162 // TODO: verify language specification 163 } 164 165 Catalog catalog; 166 catalog.table.language = language; 167 foreach(elem; document["resources translation"]) 168 { 169 enforce(isPrimaryCatalog, 170 "only the primary catalog can list translations"); 171 enforce(elem.hasAttribute("language"), 172 "translation element must have `language` attribute"); 173 auto translationLanguage = elem.getAttribute("language"); 174 // TODO: verify language specification 175 auto inner = elem.innerText; 176 catalog.translations ~= Catalog.Translation(translationLanguage, 177 buildPath("i18n", inner.length? inner : 178 "strings." ~ translationLanguage ~ ".xml")); 179 } 180 181 foreach(elem; document["resources string"]) 182 { 183 enforce(elem.hasAttribute("name"), "string resource must have name attribute"); 184 185 auto id = elem.getAttribute("name"); 186 enforce(id.length, "string resource name cannot be empty"); 187 // TODO: verify that name follows D identifier rules 188 189 if(!isPrimaryCatalog) 190 enforce(parent.lookup(id).ptr, format( 191 "unknown string identifier in catalog `%s`: `%s`", 192 language, id)); 193 194 catalog.table.strings ~= StringTable.StringResource(id, elem.innerText); 195 } 196 197 catalog.table.strings.sort!"a.id < b.id"; 198 return catalog; 199 } 200 201 unittest 202 { 203 auto catalog = parseCatalog(null, q{ 204 <?xml version="1.0" encoding="utf-8"?> 205 <resources language="en"> 206 <translation language="de"/> 207 <translation language="es">spanish.xml</translation> 208 <string name="foo">bar</string> 209 <string name="baz">foobar</string> 210 </resources> 211 }, StringTable.init); 212 213 assert(catalog.table.language == "en"); 214 215 assert(catalog.translations == [ 216 Catalog.Translation("de", "i18n/strings.de.xml"), 217 Catalog.Translation("es", "i18n/spanish.xml") 218 ]); 219 220 alias S = StringTable.StringResource; 221 assert(catalog.table.strings == [S("baz", "foobar"), S("foo", "bar")]); 222 } 223 224 struct Locale 225 { 226 version(Posix) 227 string language, country, encoding, variant; 228 else 229 static assert(false); 230 } 231 232 version(Posix) 233 { 234 // POSIX and gettext standard 235 auto selectedLocales() /+ nothrow +/ @safe 236 { 237 import std.algorithm : all, canFind, filter, find, map, splitter; 238 import std.ascii : isAlpha; 239 import std.functional : not; 240 import std.process : environment; 241 import std.range : chain, empty, front, only; 242 import std.string : strip; 243 244 string localeSpecs = null; 245 246 // if $LANG == "C", ignore other envvars 247 auto lang = environment.get("LANG").strip; 248 if(lang != "C") 249 { 250 // These are sorted by priority 251 static immutable envVars = ["LANGUAGE", "LC_ALL", "LC_MESSAGES"]; 252 auto localeSearch = envVars.map!(environment.get) 253 .map!strip 254 .chain(only(lang)) // $LANG is otherwise considered last 255 .find!(var => var.length); 256 257 if (!localeSearch.empty) 258 { 259 if(localeSearch.front != "C" && 260 localeSearch.front.front != '/') 261 localeSpecs = localeSearch.front; 262 } 263 } 264 265 // spec = ll_CC.ENCODING@variant 266 static Locale parseLocale(string spec) 267 { 268 import std.string : lastIndexOf; 269 Locale locale; 270 auto index = spec.lastIndexOf('@'); 271 if(index != -1) 272 { 273 locale.variant = spec[index + 1 .. $]; 274 spec = spec[0 .. index]; 275 } 276 index = spec.lastIndexOf('.'); 277 if(index != -1) 278 { 279 locale.encoding = spec[index + 1 .. $]; 280 spec = spec[0 .. index]; 281 } 282 index = spec.lastIndexOf('_'); 283 if(index != -1) 284 { 285 locale.country = spec[index + 1 .. $]; 286 locale.language = spec[0 .. index]; 287 } 288 else 289 locale.language = spec; 290 return locale; 291 } 292 293 return localeSpecs 294 .splitter(':') 295 .map!parseLocale 296 .filter!(locale => 297 !locale.language.empty && locale.language.all!isAlpha); 298 } 299 300 @safe unittest 301 { 302 import std.algorithm : equal; 303 import std.process : environment; 304 305 foreach(envVar; ["LANGUAGE", "LC_ALL", "LC_MESSAGES", "LANG"]) 306 environment.remove(envVar); 307 308 environment["LANG"] = "en_US.UTF-8"; 309 assert(selectedLocales.equal([Locale("en", "US", "UTF-8")])); 310 311 environment["LANG"] = "en_US"; 312 assert(selectedLocales.equal([Locale("en", "US")])); 313 314 environment["LANG"] = "en"; 315 assert(selectedLocales.equal([Locale("en")])); 316 317 environment["LC_MESSAGES"] = "de_DE@euro"; 318 assert(selectedLocales.equal([Locale("de", "DE", null, "euro")])); 319 320 environment["LC_ALL"] = "ja.UTF-8"; 321 assert(selectedLocales.equal([Locale("ja", null, "UTF-8")])); 322 323 environment["LANGUAGE"] = "en_US.UTF-8:de_DE@euro:ja.UTF-8"; 324 assert(selectedLocales.equal([ 325 Locale("en", "US", "UTF-8"), 326 Locale("de", "DE", null, "euro"), 327 Locale("ja", null, "UTF-8", null)])); 328 329 environment["LANG"] = "C"; 330 assert(selectedLocales.empty); 331 332 environment["LANGUAGE"] = "en_US:en_GB:en_US:de_DE:en_GB"; 333 assert(selectedLocales.empty); // $LANG = "C" overrides $LANGUAGE 334 environment.remove("LANG"); 335 assert(selectedLocales.equal([ 336 Locale("en", "US"), 337 Locale("en", "GB"), 338 Locale("en", "US"), 339 Locale("de", "DE"), 340 Locale("en", "GB")])); 341 } 342 } 343 344 public: 345 346 /// 347 struct Strings() 348 { 349 import std.algorithm.iteration : map; 350 import std.algorithm.sorting : sort; 351 import std.array : array; 352 import std.meta : staticMap; 353 354 static if(__VERSION__ < 2070) 355 import std_backport.meta : aliasSeqOf; 356 else 357 import std.meta : aliasSeqOf; 358 359 import std.path : buildPath; 360 import std.range : chain, only, zip; 361 import std.typecons : Tuple; 362 import std.traits : isSomeString; 363 364 private: 365 enum primaryCatalog = parseCatalog(null, 366 import(buildPath("i18n", "strings.xml")), StringTable.init); 367 static immutable primaryTable = primaryCatalog.table; 368 369 enum languages = primaryCatalog.translations.map!( 370 (ref Catalog.Translation t) => t.language).array; 371 enum paths = primaryCatalog.translations.map!( 372 (ref Catalog.Translation t) => t.path).array; 373 374 enum Import(string path) = import(path); 375 enum sources = zip(languages, [staticMap!(Import, aliasSeqOf!paths)]).array; 376 377 static if(sources.length) 378 { 379 static immutable translationTables = chain(only(primaryCatalog.table), 380 sources.map!((ref Tuple!(string, string) pair) => 381 parseCatalog(pair.expand, primaryCatalog.table).table)) 382 .array 383 .sort!"a.language < b.language".release(); 384 385 static immutable size_t[translationTables.length] translationIndexesBuffer; 386 static immutable size_t numChosenLocales; 387 388 static immutable(size_t)[] translationIndexes() @property pure nothrow @safe @nogc 389 { 390 return translationIndexesBuffer[0 .. numChosenLocales]; 391 } 392 393 shared static this() @safe 394 { 395 import std.algorithm : canFind, filter, map; 396 import std.range : assumeSorted, empty; 397 398 size_t i = 0; 399 foreach(translationIndex; selectedLocales.map!(locale => 400 translationTables.assumeSorted!("a.language < b.language") 401 .equalRange(StringTable(locale.language))) 402 .filter!(match => !match.empty) // No translation for selected language 403 .map!(match => match.release.ptr - translationTables.ptr)) 404 { 405 // If this is the language of a higher priority locale, 406 // ignore this entry 407 if(!translationIndexesBuffer[0 .. i].canFind(translationIndex)) 408 translationIndexesBuffer[i++] = translationIndex; 409 } 410 numChosenLocales = i; 411 } 412 } 413 414 public: 415 @disable this(this); 416 417 /** 418 * Returns: 419 * $(D true) iff id is defined in the primary catalog 420 * Complexity: 421 * $(BIGOH log n) 422 */ 423 static bool identifierExists(string id) pure nothrow @safe @nogc 424 { 425 return primaryTable.lookup(id) != null; 426 } 427 428 private template opDispatchImpl(string id, S) 429 { 430 import std.conv : to; 431 static immutable fallback = primaryTable.lookup(id).to!S; 432 433 static if(sources.length) 434 { 435 static immutable S[translationTables.length] translationTable = 436 translationTables.map!((ref immutable StringTable table) => 437 table.lookup(id).to!S) 438 .array; 439 440 static S opDispatchImpl() @property pure nothrow @safe @nogc 441 { 442 foreach(index; translationIndexes) 443 { 444 auto text = translationTable[index]; 445 if(text.ptr) 446 return text; 447 } 448 return fallback; 449 } 450 } 451 else 452 alias opDispatchImpl = fallback; 453 } 454 455 version(i18n_use_utf32) 456 alias I18NString = dstring; 457 else version(i18n_use_utf16) 458 alias I18NString = wstring; 459 else 460 alias I18NString = string; 461 462 version(i18n_list_references) 463 { 464 template opDispatch(string id, string file = __FILE__, uint line = __LINE__) 465 if(identifierExists(id)) 466 { 467 alias opDispatch = getEncoded!(id, I18NString, file, line); 468 } 469 470 template getEncoded(string id, S, string file = __FILE__, uint line = __LINE__) 471 if(identifierExists(id) && isSomeString!S) 472 { 473 import std.format : format; 474 pragma(msg, format("i18n %s(%s): %s", file, line, id)); 475 alias getEncoded = opDispatchImpl!(id, S); 476 } 477 } 478 else 479 { 480 template opDispatch(string id) 481 if(identifierExists(id)) 482 { 483 alias opDispatch = opDispatchImpl!(id, I18NString); 484 } 485 486 template getEncoded(string id, S) 487 if(identifierExists(id) && isSomeString!S) 488 { 489 alias getEncoded = opDispatchImpl!(id, S); 490 } 491 } 492 493 version(D_Ddoc) 494 { 495 /** 496 * Default encoding for string resources, returned by 497 * $(MREF Strings.opDispatch). 498 * 499 * Set version $(I i18n_use_utf32) to use $(D dstring), or 500 * version $(I i18n_use_utf16) to use $(D wstring); otherwise uses 501 * $(D string) (UTF-8). 502 */ 503 alias I18NString = string; 504 505 /** 506 * Get the text for $(I id) according to the user's preferred 507 * language(s). 508 * Params: 509 * id = identifier of string resource (the $(D name) attribute) 510 * S = encoding for returned string, either $(D string), 511 * $(D wstring) or $(D dstring) 512 * Complexity: 513 * $(BIGOH 1). The upper bound is proportional to the number of 514 * translations provided at compile-time. The number of string 515 * resources does $(I not) affect runtime. 516 * Example: 517 * ---- 518 * void main() 519 * { 520 * import std.stdio, i18n.text; 521 * writeln(strings.hello_world); // Default encoding 522 * writeln(strings.getEncoded!("hello_world", wstring)); // UTF-16 523 * } 524 * ---- 525 */ 526 @property pure nothrow @safe @nogc 527 static I18NString opDispatch(string id)() 528 if(identifierExists(id)); 529 530 /// Ditto 531 pure nothrow @safe @nogc 532 static S getEncoded(string id, S)() 533 if(identifierExists(id)); 534 } 535 } 536 537 /** 538 * See_Also: 539 * $(MREF Strings) 540 */ 541 Strings!() strings()() @property pure nothrow @safe @nogc 542 { 543 return Strings!()(); 544 } 545 546 version(test_empty) unittest 547 { 548 static assert(!__traits(compiles, strings.nonexistant)); 549 } 550 551 // The catalogs in test/ja-de provide `ja` and `de` translations, with `en` primary. 552 // The following unit test expects priority ja -> de. 553 // Therefore, run with: LANGUAGE="[x:]ja_JP.UTF-8:[y:]de_DE.UTF-8[z:]" 554 // Where x, y and z can contain anything but an `en` spec 555 version(test_ja_de) unittest 556 { 557 import std.algorithm : equal, map; 558 import std.stdio; 559 560 assert(Strings!().translationIndexes 561 .map!(i => Strings!().translationTables[i].language).equal(["ja", "de"])); 562 563 assert(strings.greeting == "今日は"); // ja 564 assert(strings.yes == "ja"); // de 565 assert(strings.no == "no"); // fallback to primary catalog (en) 566 567 static assert(!__traits(compiles, strings.nonexistant)); 568 569 assert(strings.getEncoded!("greeting", dstring) == "今日は"d); 570 assert(strings.getEncoded!("yes", wstring) == "ja"w); 571 assert(strings.getEncoded!("no", string) == "no"c); 572 573 static assert(!__traits(compiles, 574 strings.getEncoded!("nonexistant", dstring))); 575 } 576