Began making changes to do better handling of character sets.
[citadel.git] / webcit / gettext.c
1 /*
2  * $Id
3  */
4 /**
5  * \defgroup LocaleHeaderParser Parse the browser http locale headers and set the NLS stuff.
6  * \ingroup WebcitHttpServer 
7  */
8 /*@{*/
9 #include "webcit.h"
10 #include "webserver.h"
11
12 #ifdef ENABLE_NLS
13
14 #define NUM_LANGS 5 /**< how many different locales do we know? */
15 #define SEARCH_LANG 20 /**< how many langs should we parse? */
16
17 /** actual supported locales */
18 char *AvailLang[NUM_LANGS] = {
19         "C",
20         "en_US",
21         "de_DE",
22         "it_IT",
23         "en_GB"
24 };
25
26 locale_t wc_locales[NUM_LANGS]; /**< here we keep the parsed stuff */
27
28 /** Keep information about one locale */
29 typedef struct _lang_pref{
30         char lang[16];          /**< the language locale string */
31         char region[16];        /**< the region locale string */
32         long priority;          /**< which priority does it have */
33         int availability;       /**< do we know it? */
34         int selectedlang;       /**< is this the selected language? */
35 } LangStruct;
36
37 /* \brief parse browser locale header 
38  * seems as most browsers just do a one after coma value even if more than 10 locales are available. Sample strings:
39  * opera: 
40  * Accept-Language: sq;q=1.0,de;q=0.9,as;q=0.8,ar;q=0.7,bn;q=0.6,zh-cn;q=0.5,kn;q=0.4,ch;q=0.3,fo;q=0.2,gn;q=0.1,ce;q=0.1,ie;q=0.1 
41  * Firefox 
42  * Accept-Language: 'de-de,en-us;q=0.7,en;q=0.3' 
43  * Accept-Language: de,en-ph;q=0.8,en-us;q=0.5,de-at;q=0.3 
44  * Accept-Language: de,en-us;q=0.9,it;q=0.9,de-de;q=0.8,en-ph;q=0.7,de-at;q=0.7,zh-cn;q=0.6,cy;q=0.5,ar-om;q=0.5,en-tt;q=0.4,xh;q=0.3,nl-be;q=0.3,cs;q=0.2,sv;q=0.1,tk;q=0.1 
45  * \param LocaleString the string from the browser http headers
46  */
47
48 void httplang_to_locale(char *LocaleString)
49 {
50         LangStruct wanted_locales[SEARCH_LANG];
51         LangStruct *ls;
52
53         int i = 0;
54         int j = 0;
55         size_t len = strlen(LocaleString);
56         long prio;
57         int av;
58         int nBest;
59         int nParts;
60         char *search = (char *) malloc(len);
61         
62         memcpy(search, LocaleString, len);
63         search[len] = '\0';
64         nParts=num_tokens(search,',');
65         for (i=0; ((i<nParts)&&(i<SEARCH_LANG)); i++)
66         {
67                         char buf[16];
68                         char sbuf[16];
69                         char lbuf[16];
70                         int blen;
71                         
72                         ls=&wanted_locales[i];
73
74                         extract_token(&buf[0],search, i,',',16);
75                         /** we are searching, if this list item has something like ;q=n*/
76                         if (num_tokens(&buf[0],'=')>1) {
77                                 int sbuflen, k;
78                                 extract_token(&sbuf[0],&buf[0], 1,'=',16);
79                                 sbuflen=strlen(&sbuf[0]);
80                                 for (k=0; k<sbuflen; k++) if (sbuf[k]=='.') sbuf[k]='0';
81                                 ls->priority=atol(&sbuf[0]);
82                         }
83                         else {
84                                 ls->priority=1000;
85                         }
86                         /** get the locale part */
87                         extract_token(&sbuf[0],&buf[0],0,';',16);
88                         /** get the lang part, which should be allways there */
89                         extract_token(&ls->lang[0],&sbuf[0],0,'-',16);
90                         /** get the area code if any. */
91                         if (num_tokens(&sbuf[0],'-')>1) {
92                                 extract_token(&ls->region[0],&sbuf[0],1,'-',16);
93                         }
94                         else { /** no ara code? use lang code */
95                                 blen=strlen(&ls->lang[0]);
96                                 memcpy(&ls->region[0], ls->lang,blen);
97                                 ls->region[blen]='\0';
98                         } /** area codes are uppercase */
99                         blen=strlen(&ls->region[0]);
100                         for (j=0; j<blen; j++)
101                                 {
102                                         int chars=toupper(ls->region[j]);
103                                         ls->region[j]=(char)chars;/** \todo ?! */
104                                 }
105                         sprintf(&lbuf[0],"%s_%s",&ls->lang[0],&ls->region[0]);
106                         
107                         /** check if we have this lang */
108                         ls->availability=1;
109                         ls->selectedlang=-1;
110                         for (j=0; j<NUM_LANGS; j++) {
111                                 int result;
112                                 /** match against the LANG part */
113                                 result=strcasecmp(&ls->lang[0], AvailLang[j]);
114                                 if ((result<0)&&(result<ls->availability)){
115                                         ls->availability=result;
116                                         ls->selectedlang=j;
117                                 }
118                                 /** match against lang and locale */
119                                 if (0==strcasecmp(&lbuf[0], AvailLang[j])){
120                                         ls->availability=0;
121                                         ls->selectedlang=j;
122                                         j=NUM_LANGS;
123                                 }
124                         }
125         }
126         
127         prio=0;
128         av=-1000;
129         nBest=-1;
130         for (i=0; ((i<nParts)&&(i<SEARCH_LANG)); i++)
131                 {
132                         ls=&wanted_locales[i];
133                         if ((ls->availability<=0)&& 
134                                 (av<ls->availability)&&
135                                 (prio<ls->priority)&&
136                                 (ls->selectedlang!=-1)){
137                                 nBest=ls->selectedlang;
138                                 av=ls->availability;
139                                 prio=ls->priority;
140                         }
141                 }
142         if (nBest==-1) /** fall back to C */
143                 nBest=0;
144         WC->selected_language=nBest;
145         lprintf(9, "language found: %s\n", AvailLang[WC->selected_language]);
146         if (search != NULL) {
147                 free(search);
148         }
149 }
150
151 /* TODO: we skip the language weightening so far. */
152 /* Accept-Language: 'de-de,en-us;q=0.7,en;q=0.3' */
153 /* Accept-Language: de,en-ph;q=0.8,en-us;q=0.5,de-at;q=0.3 */
154 //void httplang_to_locale(char *LocaleString)
155 //{
156 //      char selected_locale[16];
157 //      int i, j;
158 //      char lang[64];
159 //      int num_accept = 0;
160 //
161 //      lprintf(9, "languageAccept: %s\n", LocaleString);
162 //
163 //      strcpy(selected_locale, "C");
164 //      num_accept = num_tokens(LocaleString, ',');
165 //
166 //      for (i=num_accept-1; i>=0; --i) {
167 //              extract_token(lang, LocaleString, i, ',', sizeof lang);
168 //
169 //              /* Strip out the weights; we don't use them.  Also convert
170 //               * hyphens to underscores.
171 //               */
172 //              for (j=0; j<strlen(lang); ++j) {
173 //                      if (lang[j] == '-') lang[j] = '_';
174 //                      if (lang[j] == ';') lang[j] = 0;
175 //              }
176 //
177 //              for (j=0; j<NUM_LANGS; ++j) {
178 //                      if (!strncasecmp(lang, AvailLang[j], strlen(lang))) {
179 //                              strcpy(selected_locale, AvailLang[j]);
180 //                      }
181 //              }
182 //      }
183 //
184 //      lprintf(9, "language found: %s\n", selected_locale);
185 //      set_selected_language(selected_locale);
186 //}
187
188
189 /**
190  * \brief show the language chooser on the login dialog
191  * depending on the browser locale change the sequence of the 
192  * language chooser.
193  */
194 void offer_languages(void) {
195         int i;
196
197         wprintf("<select name=\"language\" size=\"1\">\n");
198
199         for (i=0; i < NUM_LANGS; ++i) {
200                 wprintf("<option %s value=%s>%s</option>\n",
201                         ((WC->selected_language == i) ? "selected" : ""),
202                         AvailLang[i],
203                         AvailLang[i]
204                 );
205         }
206
207         wprintf("</select>\n");
208 }
209
210 /**
211  * \brief Set the selected language for this session.
212  * \param lang the locale to set.
213  */
214 void set_selected_language(char *lang) {
215         int i;
216
217         for (i=0; i<NUM_LANGS; ++i) {
218                 if (!strcasecmp(lang, AvailLang[i])) {
219                         WC->selected_language = i;
220                 }
221         }
222 }
223
224 /**
225  * \brief Activate the selected language for this session.
226  */
227 void go_selected_language(void) {
228         if (WC->selected_language < 0) return;
229         uselocale(wc_locales[WC->selected_language]);   /** switch locales */
230         textdomain(textdomain(NULL));                   /** clear the cache */
231 }
232
233 /**
234  * \brief Deactivate the selected language for this session.
235  */
236 void stop_selected_language(void) {
237         uselocale(LC_GLOBAL_LOCALE);                    /** switch locales */
238         textdomain(textdomain(NULL));                   /** clear the cache */
239 }
240
241
242 /**
243  * \brief Create a locale_t for each available language
244  */
245 void initialize_locales(void) {
246         int i;
247         locale_t Empty_Locale;
248         char buf[32];
249
250         /* create default locale */
251         Empty_Locale = newlocale(LC_ALL_MASK, NULL, NULL);
252
253         for (i = 0; i < NUM_LANGS; ++i) {
254                 if (i == 0) {
255                         sprintf(buf, "%s", AvailLang[i]);       // locale 0 (C) is ascii, not utf-8
256                 }
257                 else {
258                         sprintf(buf, "%s.UTF8", AvailLang[i]);
259                 }
260                 wc_locales[i] = newlocale(
261                         (LC_MESSAGES_MASK|LC_TIME_MASK),
262                         buf,
263                         (((i > 0) && (wc_locales[0] != NULL)) ? wc_locales[0] : Empty_Locale)
264                 );
265                 if (wc_locales[i] == NULL) {
266                         lprintf(1, "Error configuring locale for %s: %s\n",
267                                 buf,
268                                 strerror(errno)
269                         );
270                 }
271                 else {
272                         lprintf(3, "Configured available locale: %s\n", buf);
273                 }
274         }
275 }
276
277
278 #else   /* ENABLE_NLS */
279 /** \brief dummy for non NLS enabled systems */
280 void offer_languages(void) {
281         wprintf("English (US)");
282 }
283
284 /** \brief dummy for non NLS enabled systems */
285 void set_selected_language(char *lang) {
286 }
287
288 /** \brief dummy for non NLS enabled systems */
289 void go_selected_language(void) {
290 }
291
292 /** \brief dummy for non NLS enabled systems */
293 void stop_selected_language(void) {
294 }
295
296 #endif  /* ENABLE_NLS */
297
298
299 /*@}*/