1 /**
2   * Text translation framework.
3   *
4   *
5   * This library aims to facilitate native language support in applications and
6   * libraries written in D. String resources containing natural language _text
7   * are read from XML documents, called $(I catalogs), supplied at compile-time.
8   * In source code, string resources are referenced with the $(MREF strings)
9   * interface. The languages to use are configured automatically at the start
10   * of the program based on the running user's environment, before the $(D main)
11   * function is entered.
12   *
13   * $(SECTION2 Catalogs)
14   * There are two kinds of catalogs: the singular $(I primary catalog) and one
15   * $(I translation catalog) for each translation. Each catalog is an XML
16 	* document made visible to the framework as a string import.
17   *
18   * $(SECTION2 Primary Catalog)
19   * The primary catalog is loaded from the string import $(D i18n/strings.xml)
20   * and specifies the primary table of string resources, which is used when:
21   * $(UL
22   * $(LI The language of the primary table matches that of the user's
23   * preferred language)
24   * $(LI A translation catalog for the user's preferred language is not
25   * supplied)
26   * $(LI A translation catalog for the user's preferred language is supplied,
27   * but does not contain a translation for the particular string being looked
28   * up)
29   * $(LI Internationalization is disabled))
30   * The primary catalog must have the following structure:
31 ----
32 $(LESS)?xml version="1.0" encoding="utf-8"?$(GREATER)
33 $(LESS)resources language="primary_catalog_language"$(GREATER)
34 	$(LESS)translation language="translation1"/$(GREATER)
35 	$(LESS)translation language="translation2"/$(GREATER)
36 	$(LESS)translation language="..."/$(GREATER)
37 	$(LESS)string name="id1"$(GREATER)text1$(LESS)/string$(GREATER)
38 	$(LESS)string name="id2"$(GREATER)text2$(LESS)/string$(GREATER)
39 	$(LESS)string name="..."$(GREATER)...$(LESS)/string$(GREATER)
40 $(LESS)/resources$(GREATER)
41 ----
42   * For the primary catalog, the root element's $(D language) attribute is
43   * required and contains the language used in the primary catalog.
44   * Each $(D translation) element declares that a translation catalog for the
45   * given language is supplied and should be loaded.
46   * All $(D language) attributes are ISO-639 language codes.
47   * Each $(D string) element defines a string resource, where the $(D name)
48   * attribute is the resource identifier, and the element's content is the
49   * resource _text.
50   *
51   * $(SECTION2 Translation Catalogs)
52   * Translation catalogs are loaded as string imports from
53   * $(D i18n/strings.$(I ll).xml) where $(D $(I ll)) is the ISO-639 language code
54   * for the language translation provided within the document. Each translation
55   * must be enumerated in the primary catalog with the $(D translation) tag.
56   *
57   * The structure of translation catalogs is a subset of the structure of the
58   * primary catalog:
59 ----
60 $(LESS)?xml version="1.0" encoding="utf-8"?$(GREATER)
61 $(LESS)resources$(GREATER)
62 	$(LESS)string name="id1"$(GREATER)text1$(LESS)/string$(GREATER)
63 	$(LESS)string name="id2"$(GREATER)text2$(LESS)/string$(GREATER)
64 	$(LESS)string name="..."$(GREATER)...$(LESS)/string$(GREATER)
65 $(LESS)/resources$(GREATER)
66 ----
67   * Each $(D string) element provides a translation of the string resource
68   * with the given identifier. The identifier must match the identifier
69   * of a string resource in the primary catalog.
70   *
71   * $(SECTION2 String References)
72   * In source code, string resources are referenced with the $(MREF strings) interface:
73 ------
74 void main() {
75     import std.stdio, i18n.text;
76     // Writes the string resource with the identifier "hello_world" to stdout
77     writeln(strings.hello_world);
78 }
79 ------
80   * $(SECTION2 Language Selection)
81   * Platform-specific standards are used for selecting the preferred language.
82   * On POSIX systems, this is the POSIX standard of using environment variables,
83   * including the fallback/priority syntax supported by $(I gettext). See
84   * $(HTTPS www.gnu.org/software/gettext/manual/html_node/Setting-the-POSIX-Locale.html#Setting-the-POSIX-Locale,
85   * gettext's documentation on setting the POSIX locale).
86   *
87   * $(SECTION2 Version Identifiers)
88   * The behavior of this module can be configured with version identifiers.
89   * $(UL
90   * $(LI $(I i18n_list_references): source code locations of string references
91   * will be output during compilation)
92   * $(LI $(I i18n_use_utf32): string resources are encoded in UTF-32 by default)
93   * $(LI $(I i18n_use_utf16): string resources are encoded in UTF-16 by default)
94   * )
95   * See_Also:
96   * $(I gettext)'s advice on $(HTTPS www.gnu.org/software/gettext/manual/gettext.html#Preparing-Strings, separating strings)
97   * and $(HTTPS www.gnu.org/software/gettext/manual/gettext.html#Names, translating proper names)
98   * Macros:
99   *    SECTION2=<h3>$1</h3>
100   */
101 module i18n.text;
102 
103 private:
104 struct StringTable
105 {
106 	struct StringResource
107 	{
108 		string id;
109 		string content;
110 	}
111 
112 	string language;
113 	StringResource[] strings;
114 
115 	string lookup(string id) const pure nothrow @safe @nogc
116 	{
117 		import std.range : assumeSorted;
118 
119 		auto lookup = strings.assumeSorted!"a.id < b.id"
120 			.equalRange(StringResource(id, null));
121 
122 		return lookup.empty? null : lookup.front.content;
123 	}
124 }
125 
126 struct Catalog
127 {
128 	struct Translation
129 	{
130 		string language, path;
131 	}
132 	Translation[] translations;
133 	StringTable table;
134 }
135 
136 Catalog parseCatalog(string language, string source, StringTable parent)
137 {
138 	// Only parser for a human-readable format I could find
139 	// that works at compile-time
140 	import arsd.dom;
141 	import std.algorithm.sorting : sort;
142 	import std.exception : enforce;
143 	import std.format : format;
144 	import std.path : buildPath;
145 
146 	immutable isPrimaryCatalog = language.length == 0;
147 	auto document = new Document(source, true, true);
148 
149 	Element root;
150 	foreach(elem; document["resources"])
151 	{
152 		assert(root is null);
153 		root = elem;
154 	}
155 	enforce(root, "root element must be `resources`");
156 
157 	if(isPrimaryCatalog)
158 	{
159 		enforce(root.hasAttribute("language"),
160 			"primary message catalog must have `language` attribute");
161 		language = root.getAttribute("language");
162 		// TODO: verify language specification
163 	}
164 
165 	Catalog catalog;
166 	catalog.table.language = language;
167 	foreach(elem; document["resources translation"])
168 	{
169 		enforce(isPrimaryCatalog,
170 			"only the primary catalog can list translations");
171 		enforce(elem.hasAttribute("language"),
172 			"translation element must have `language` attribute");
173 		auto translationLanguage = elem.getAttribute("language");
174 		// TODO: verify language specification
175 		auto inner = elem.innerText;
176 		catalog.translations ~= Catalog.Translation(translationLanguage,
177 			buildPath("i18n", inner.length? inner :
178 				"strings." ~ translationLanguage ~ ".xml"));
179 	}
180 
181 	foreach(elem; document["resources string"])
182 	{
183 		enforce(elem.hasAttribute("name"), "string resource must have name attribute");
184 
185 		auto id = elem.getAttribute("name");
186 		enforce(id.length, "string resource name cannot be empty");
187 		// TODO: verify that name follows D identifier rules
188 
189 		if(!isPrimaryCatalog)
190 			enforce(parent.lookup(id).ptr, format(
191 				"unknown string identifier in catalog `%s`: `%s`",
192 				language, id));
193 
194 		catalog.table.strings ~= StringTable.StringResource(id, elem.innerText);
195 	}
196 
197 	catalog.table.strings.sort!"a.id < b.id";
198 	return catalog;
199 }
200 
201 unittest
202 {
203 	auto catalog = parseCatalog(null, q{
204 <?xml version="1.0" encoding="utf-8"?>
205 <resources language="en">
206 	<translation language="de"/>
207 	<translation language="es">spanish.xml</translation>
208 	<string name="foo">bar</string>
209 	<string name="baz">foobar</string>
210 </resources>
211 		}, StringTable.init);
212 
213 	assert(catalog.table.language == "en");
214 
215 	assert(catalog.translations == [
216 			Catalog.Translation("de", "i18n/strings.de.xml"),
217 			Catalog.Translation("es", "i18n/spanish.xml")
218 		]);
219 
220 	alias S = StringTable.StringResource;
221 	assert(catalog.table.strings == [S("baz", "foobar"), S("foo", "bar")]);
222 }
223 
224 struct Locale
225 {
226 	version(Posix)
227 		string language, country, encoding, variant;
228 	else
229 		static assert(false);
230 }
231 
232 version(Posix)
233 {
234 	// POSIX and gettext standard
235 	auto selectedLocales() /+ nothrow +/ @safe
236 	{
237 		import std.algorithm : all, canFind, filter, find, map, splitter;
238 		import std.ascii : isAlpha;
239 		import std.functional : not;
240 		import std.process : environment;
241 		import std.range : chain, empty, front, only;
242 		import std.string : strip;
243 
244 		string localeSpecs = null;
245 
246 		// if $LANG == "C", ignore other envvars
247 		auto lang = environment.get("LANG").strip;
248 		if(lang != "C")
249 		{
250 			// These are sorted by priority
251 			static immutable envVars = ["LANGUAGE", "LC_ALL", "LC_MESSAGES"];
252 			auto localeSearch = envVars.map!(environment.get)
253 				.map!strip
254 				.chain(only(lang)) // $LANG is otherwise considered last
255 				.find!(var => var.length);
256 
257 			if (!localeSearch.empty)
258 			{
259 				if(localeSearch.front != "C" &&
260 				  localeSearch.front.front != '/')
261 					localeSpecs = localeSearch.front;
262 			}
263 		}
264 
265 		// spec = ll_CC.ENCODING@variant
266 		static Locale parseLocale(string spec)
267 		{
268 			import std.string : lastIndexOf;
269 			Locale locale;
270 			auto index = spec.lastIndexOf('@');
271 			if(index != -1)
272 			{
273 				locale.variant = spec[index + 1 .. $];
274 				spec = spec[0 .. index];
275 			}
276 			index = spec.lastIndexOf('.');
277 			if(index != -1)
278 			{
279 				locale.encoding = spec[index + 1 .. $];
280 				spec = spec[0 .. index];
281 			}
282 			index = spec.lastIndexOf('_');
283 			if(index != -1)
284 			{
285 				locale.country = spec[index + 1 .. $];
286 				locale.language = spec[0 .. index];
287 			}
288 			else
289 				locale.language = spec;
290 			return locale;
291 		}
292 
293 		return localeSpecs
294 			.splitter(':')
295 			.map!parseLocale
296 			.filter!(locale =>
297 				!locale.language.empty && locale.language.all!isAlpha);
298 	}
299 
300 	@safe unittest
301 	{
302 		import std.algorithm : equal;
303 		import std.process : environment;
304 
305 		foreach(envVar; ["LANGUAGE", "LC_ALL", "LC_MESSAGES", "LANG"])
306 			environment.remove(envVar);
307 
308 		environment["LANG"] = "en_US.UTF-8";
309 		assert(selectedLocales.equal([Locale("en", "US", "UTF-8")]));
310 
311 		environment["LANG"] = "en_US";
312 		assert(selectedLocales.equal([Locale("en", "US")]));
313 
314 		environment["LANG"] = "en";
315 		assert(selectedLocales.equal([Locale("en")]));
316 
317 		environment["LC_MESSAGES"] = "de_DE@euro";
318 		assert(selectedLocales.equal([Locale("de", "DE", null, "euro")]));
319 
320 		environment["LC_ALL"] = "ja.UTF-8";
321 		assert(selectedLocales.equal([Locale("ja", null, "UTF-8")]));
322 
323 		environment["LANGUAGE"] = "en_US.UTF-8:de_DE@euro:ja.UTF-8";
324 		assert(selectedLocales.equal([
325 			Locale("en", "US", "UTF-8"),
326 			Locale("de", "DE", null, "euro"),
327 			Locale("ja", null, "UTF-8", null)]));
328 
329 		environment["LANG"] = "C";
330 		assert(selectedLocales.empty);
331 
332 		environment["LANGUAGE"] = "en_US:en_GB:en_US:de_DE:en_GB";
333 		assert(selectedLocales.empty); // $LANG = "C" overrides $LANGUAGE
334 		environment.remove("LANG");
335 		assert(selectedLocales.equal([
336 			Locale("en", "US"),
337 			Locale("en", "GB"),
338 			Locale("en", "US"),
339 			Locale("de", "DE"),
340 			Locale("en", "GB")]));
341 	}
342 }
343 
344 public:
345 
346 ///
347 struct Strings()
348 {
349 	import std.algorithm.iteration : map;
350 	import std.algorithm.sorting : sort;
351 	import std.array : array;
352 	import std.meta : staticMap;
353 
354 	static if(__VERSION__ < 2070)
355 		import std_backport.meta : aliasSeqOf;
356 	else
357 		import std.meta : aliasSeqOf;
358 
359 	import std.path : buildPath;
360 	import std.range : chain, only, zip;
361 	import std.typecons : Tuple;
362 	import std.traits : isSomeString;
363 
364 	private:
365 	enum primaryCatalog = parseCatalog(null,
366 		import(buildPath("i18n", "strings.xml")), StringTable.init);
367 	static immutable primaryTable = primaryCatalog.table;
368 
369 	enum languages = primaryCatalog.translations.map!(
370 			(ref Catalog.Translation t) => t.language).array;
371 	enum paths = primaryCatalog.translations.map!(
372 			(ref Catalog.Translation t) => t.path).array;
373 
374 	enum Import(string path) = import(path);
375 	enum sources = zip(languages, [staticMap!(Import, aliasSeqOf!paths)]).array;
376 
377 	static if(sources.length)
378 	{
379 		static immutable translationTables = chain(only(primaryCatalog.table),
380 				sources.map!((ref Tuple!(string, string) pair) =>
381 					parseCatalog(pair.expand, primaryCatalog.table).table))
382 			.array
383 			.sort!"a.language < b.language".release();
384 
385 		static immutable size_t[translationTables.length] translationIndexesBuffer;
386 		static immutable size_t numChosenLocales;
387 
388 		static immutable(size_t)[] translationIndexes() @property pure nothrow @safe @nogc
389 		{
390 			return translationIndexesBuffer[0 .. numChosenLocales];
391 		}
392 
393 		shared static this() @safe
394 		{
395 			import std.algorithm : canFind, filter, map;
396 			import std.range : assumeSorted, empty;
397 
398 			size_t i = 0;
399 			foreach(translationIndex; selectedLocales.map!(locale =>
400 					translationTables.assumeSorted!("a.language < b.language")
401 					.equalRange(StringTable(locale.language)))
402 				.filter!(match => !match.empty) // No translation for selected language
403 				.map!(match => match.release.ptr - translationTables.ptr))
404 			{
405 				// If this is the language of a higher priority locale,
406 				// ignore this entry
407 				if(!translationIndexesBuffer[0 .. i].canFind(translationIndex))
408 					translationIndexesBuffer[i++] = translationIndex;
409 			}
410 			numChosenLocales = i;
411 		}
412 	}
413 
414 	public:
415 	@disable this(this);
416 
417 	/**
418 	  * Returns:
419 	  *   $(D true) iff id is defined in the primary catalog
420 	  * Complexity:
421 	  *   $(BIGOH log n)
422 	  */
423 	static bool identifierExists(string id) pure nothrow @safe @nogc
424 	{
425 		return primaryTable.lookup(id) != null;
426 	}
427 
428 	private template opDispatchImpl(string id, S)
429 	{
430 		import std.conv : to;
431 		static immutable fallback = primaryTable.lookup(id).to!S;
432 
433 		static if(sources.length)
434 		{
435 			static immutable S[translationTables.length] translationTable =
436 				translationTables.map!((ref immutable StringTable table) =>
437 					table.lookup(id).to!S)
438 				.array;
439 
440 			static S opDispatchImpl() @property pure nothrow @safe @nogc
441 			{
442 				foreach(index; translationIndexes)
443 				{
444 					auto text = translationTable[index];
445 					if(text.ptr)
446 						return text;
447 				}
448 				return fallback;
449 			}
450 		}
451 		else
452 			alias opDispatchImpl = fallback;
453 	}
454 
455 	version(i18n_use_utf32)
456 		alias I18NString = dstring;
457 	else version(i18n_use_utf16)
458 		alias I18NString = wstring;
459 	else
460 		alias I18NString = string;
461 
462 	version(i18n_list_references)
463 	{
464 		template opDispatch(string id, string file = __FILE__, uint line = __LINE__)
465 			if(identifierExists(id))
466 		{
467 			alias opDispatch = getEncoded!(id, I18NString, file, line);
468 		}
469 
470 		template getEncoded(string id, S, string file = __FILE__, uint line = __LINE__)
471 			if(identifierExists(id) && isSomeString!S)
472 		{
473 			import std.format : format;
474 			pragma(msg, format("i18n %s(%s): %s", file, line, id));
475 			alias getEncoded = opDispatchImpl!(id, S);
476 		}
477 	}
478 	else
479 	{
480 		template opDispatch(string id)
481 			if(identifierExists(id))
482 		{
483 			alias opDispatch = opDispatchImpl!(id, I18NString);
484 		}
485 
486 		template getEncoded(string id, S)
487 			if(identifierExists(id) && isSomeString!S)
488 		{
489 			alias getEncoded = opDispatchImpl!(id, S);
490 		}
491 	}
492 
493 	version(D_Ddoc)
494 	{
495 		/**
496 		 * Default encoding for string resources, returned by
497 		 * $(MREF Strings.opDispatch).
498 		 *
499 		 * Set version $(I i18n_use_utf32) to use $(D dstring), or
500 		 * version $(I i18n_use_utf16) to use $(D wstring); otherwise uses
501 		 * $(D string) (UTF-8).
502 		 */
503 		alias I18NString = string;
504 
505 		/**
506 		 * Get the text for $(I id) according to the user's preferred
507 		 * language(s).
508 		 * Params:
509 		 *   id = identifier of string resource (the $(D name) attribute)
510 		 *   S = encoding for returned string, either $(D string),
511 		 * $(D wstring) or $(D dstring)
512 		 * Complexity:
513 		 *   $(BIGOH 1). The upper bound is proportional to the number of
514 		 * translations provided at compile-time. The number of string
515 		 * resources does $(I not) affect runtime.
516 		 * Example:
517 		 * ----
518 		 * void main()
519 		 * {
520 		 *     import std.stdio, i18n.text;
521 		 *     writeln(strings.hello_world); // Default encoding
522 		 *     writeln(strings.getEncoded!("hello_world", wstring)); // UTF-16
523 		 * }
524 		 * ----
525 		 */
526 		@property pure nothrow @safe @nogc
527 		static I18NString opDispatch(string id)()
528 			if(identifierExists(id));
529 
530 		/// Ditto
531 		pure nothrow @safe @nogc
532 		static S getEncoded(string id, S)()
533 			if(identifierExists(id));
534 	}
535 }
536 
537 /**
538  * See_Also:
539  *  $(MREF Strings)
540  */
541 Strings!() strings()() @property pure nothrow @safe @nogc
542 {
543 	return Strings!()();
544 }
545 
546 version(test_empty) unittest
547 {
548 	static assert(!__traits(compiles, strings.nonexistant));
549 }
550 
551 // The catalogs in test/ja-de provide `ja` and `de` translations, with `en` primary.
552 // The following unit test expects priority ja -> de.
553 // Therefore, run with: LANGUAGE="[x:]ja_JP.UTF-8:[y:]de_DE.UTF-8[z:]"
554 // Where x, y and z can contain anything but an `en` spec
555 version(test_ja_de) unittest
556 {
557 	import std.algorithm : equal, map;
558 	import std.stdio;
559 
560 	assert(Strings!().translationIndexes
561 			.map!(i => Strings!().translationTables[i].language).equal(["ja", "de"]));
562 
563 	assert(strings.greeting == "今日は"); // ja
564 	assert(strings.yes == "ja"); // de
565 	assert(strings.no == "no"); // fallback to primary catalog (en)
566 
567 	static assert(!__traits(compiles, strings.nonexistant));
568 
569 	assert(strings.getEncoded!("greeting", dstring) == "今日は"d);
570 	assert(strings.getEncoded!("yes", wstring) == "ja"w);
571 	assert(strings.getEncoded!("no", string) == "no"c);
572 
573 	static assert(!__traits(compiles,
574 		strings.getEncoded!("nonexistant", dstring)));
575 }
576