* another rework of the locale parser. Hopefully feature complete now.
authorWilfried Göesgens <willi@citadel.org>
Mon, 12 Dec 2005 00:53:19 +0000 (00:53 +0000)
committerWilfried Göesgens <willi@citadel.org>
Mon, 12 Dec 2005 00:53:19 +0000 (00:53 +0000)
webcit/ChangeLog
webcit/gettext.c

index ce8738500df3081ce37a6b5ce51ecbc4b6695e4e..589ef13fdb93651f4b3bf6041b41105433f49d18 100644 (file)
@@ -1,5 +1,8 @@
 $Id$
 
+Mon Dec 12 1:40:03 CET 2005 dothebart
+* another rework of the locale parser. Hopefully feature complete now.
+       
 Sun Dec 11 00:03:04 EST 2005 ajc
 * Mercilessly ripped out all of the gratuitously complex GNU libintl
   detection m4 crapola and replaced it with some simple autoconf directives
index c995741107326f4932d0567cd06dc4cb547d2b50..36e7548b3fc943068d0b55f21f3ac72a09998827 100644 (file)
@@ -4,8 +4,11 @@
 
 #ifdef ENABLE_NLS
 
-#define NUM_LANGS 3
+#define NUM_LANGS 4
+#define SEARCH_LANG 20
+
 char *AvailLang[NUM_LANGS] = {
+       "C",
        "en_US",
        "de_DE",
        "it_IT"
@@ -16,47 +19,159 @@ locale_t wc_locales[NUM_LANGS];
 typedef struct _lang_pref{
        char lang[16];
        char region[16];
-       char *match;
-       double Priority;
+       long priority;
+       int availability;
+       int selectedlang;
 } LangStruct;
 
-/* TODO: we skip the language weightening so far. */
+/* seems as most browsers just do a one after coma value even if more than 10 locales are available. */
+/* opera: */
+/* Accept-Language: sq;q=1.0,de;q=0.9,as;q=0.8,ar;q=0.7,bn;q=0.6,zh-cn;q=0.5,kn;q=0.4,ch;q=0.3,fo;q=0.2,gn;q=0.1,ce;q=0.1,ie;q=0.1 */
+/* Firefox */
 /* Accept-Language: 'de-de,en-us;q=0.7,en;q=0.3' */
 /* Accept-Language: de,en-ph;q=0.8,en-us;q=0.5,de-at;q=0.3 */
+/* Accept-Language: de,en-us;q=0.9,it;q=0.9,de-de;q=0.8,en-ph;q=0.7,de-at;q=0.7,zh-cn;q=0.6,cy;q=0.5,ar-om;q=0.5,en-tt;q=0.4,xh;q=0.3,nl-be;q=0.3,cs;q=0.2,sv;q=0.1,tk;q=0.1 */
+
 void httplang_to_locale(char *LocaleString)
 {
-       char selected_locale[16];
-       int i, j;
-       char lang[64];
-       int num_accept = 0;
-
-       lprintf(9, "languageAccept: %s\n", LocaleString);
-
-       strcpy(selected_locale, "C");
-       num_accept = num_tokens(LocaleString, ',');
+       LangStruct wanted_locales[SEARCH_LANG];
+       LangStruct *ls;
 
-       for (i=num_accept-1; i>=0; --i) {
-               extract_token(lang, LocaleString, i, ',', sizeof lang);
+       int i = 0;
+       int j = 0;
+       size_t len = strlen(LocaleString);
+       long prio;
+       int av;
+       int nBest;
+       int nParts;
+       char *search = (char *) malloc(len);
+       // locale_t my_Locale;
+       // locale_t my_Empty_Locale;
+       
+       memcpy(search, LocaleString, len);
+       search[len] = '\0';
+       nParts=num_tokens(search,',');
+       for (i=0; ((i<nParts)&&(i<SEARCH_LANG)); i++)
+        {
+                       char buf[16];
+                       char sbuf[16];
+                       char lbuf[16];
+                       int blen;
+                       
+                       ls=&wanted_locales[i];
 
-               /* Strip out the weights; we don't use them.  Also convert
-                * hyphens to underscores.
-                */
-               for (j=0; j<strlen(lang); ++j) {
-                       if (lang[j] == '-') lang[j] = '_';
-                       if (lang[j] == ';') lang[j] = 0;
-               }
-
-               for (j=0; j<NUM_LANGS; ++j) {
-                       if (!strncasecmp(lang, AvailLang[j], strlen(lang))) {
-                               strcpy(selected_locale, AvailLang[j]);
+                       extract_token(&buf[0],search, i,',',16);
+                       /* we are searching, if this list item has something like ;q=n*/
+                       if (num_tokens(&buf[0],'=')>1) {
+                               int sbuflen, k;
+                               extract_token(&sbuf[0],&buf[0], 1,'=',16);
+                               sbuflen=strlen(&sbuf[0]);
+                               for (k=0; k<sbuflen; k++) if (sbuf[k]=='.') sbuf[k]='0';
+                               ls->priority=atol(&sbuf[0]);
+                       }
+                       else {
+                               ls->priority=1000;
+                       }
+                       /* get the locale part */
+                       extract_token(&sbuf[0],&buf[0],0,';',16);
+                       /* get the lang part, which should be allways there */
+                       extract_token(&ls->lang[0],&sbuf[0],0,'-',16);
+                       /* get the area code if any. */
+                       if (num_tokens(&sbuf[0],'-')>1) {
+                               extract_token(&ls->region[0],&sbuf[0],1,'-',16);
+                       }
+                       else { /* no ara code? use lang code */
+                               blen=strlen(&ls->lang[0]);
+                               memcpy(&ls->region[0], ls->lang,blen);
+                               ls->region[blen]='\0';
+                       } /* area codes are uppercase */
+                       blen=strlen(&ls->region[0]);
+                       for (j=0; j<blen; j++)
+                               {
+                                       int chars=toupper(ls->region[j]);
+                                       ls->region[j]=(char)chars;/*todo ?! */
+                               }
+                       sprintf(&lbuf[0],"%s_%s",&ls->lang[0],&ls->region[0]);
+                       
+                       /* check if we have this lang */
+                       ls->availability=1;
+                       ls->selectedlang=-1;
+                       for (j=0; j<NUM_LANGS; j++) {
+                               int result;
+                               /* match against the LANG part */
+                               result=strcasecmp(&ls->lang[0], AvailLang[j]);
+                               if ((result<0)&&(result<ls->availability)){
+                                       ls->availability=result;
+                                       ls->selectedlang=j;
+                               }
+                               /* match against lang and locale */
+                               if (0==strcasecmp(&lbuf[0], AvailLang[j])){
+                                       ls->availability=0;
+                                       ls->selectedlang=j;
+                                       j=NUM_LANGS;
+                               }
+                       }
+        }
+       
+       prio=0;
+       av=-1000;
+       nBest=-1;
+       for (i=0; ((i<nParts)&&(i<SEARCH_LANG)); i++)
+               {
+                       ls=&wanted_locales[i];
+                       if ((ls->availability<=0)&& 
+                               (av<ls->availability)&&
+                               (prio<ls->priority)&&
+                               (ls->selectedlang!=-1)){
+                               nBest=ls->selectedlang;
+                               av=ls->availability;
+                               prio=ls->priority;
                        }
                }
-       }
-
-       lprintf(9, "language found: %s\n", selected_locale);
-       set_selected_language(selected_locale);
+       if (nBest==-1) /* fall back to C */
+               nBest=0;
+       WC->selected_language=nBest;
+       lprintf(9, "language found: %s\n", AvailLang[WC->selected_language]);
+       //      set_selected_language(selected_locale);
 }
 
+/* TODO: we skip the language weightening so far. */
+/* Accept-Language: 'de-de,en-us;q=0.7,en;q=0.3' */
+/* Accept-Language: de,en-ph;q=0.8,en-us;q=0.5,de-at;q=0.3 */
+//void httplang_to_locale(char *LocaleString)
+//{
+//     char selected_locale[16];
+//     int i, j;
+//     char lang[64];
+//     int num_accept = 0;
+//
+//     lprintf(9, "languageAccept: %s\n", LocaleString);
+//
+//     strcpy(selected_locale, "C");
+//     num_accept = num_tokens(LocaleString, ',');
+//
+//     for (i=num_accept-1; i>=0; --i) {
+//             extract_token(lang, LocaleString, i, ',', sizeof lang);
+//
+//             /* Strip out the weights; we don't use them.  Also convert
+//              * hyphens to underscores.
+//              */
+//             for (j=0; j<strlen(lang); ++j) {
+//                     if (lang[j] == '-') lang[j] = '_';
+//                     if (lang[j] == ';') lang[j] = 0;
+//             }
+//
+//             for (j=0; j<NUM_LANGS; ++j) {
+//                     if (!strncasecmp(lang, AvailLang[j], strlen(lang))) {
+//                             strcpy(selected_locale, AvailLang[j]);
+//                     }
+//             }
+//     }
+//
+//     lprintf(9, "language found: %s\n", selected_locale);
+//     set_selected_language(selected_locale);
+//}
+
 
 void offer_languages(void) {
        int i;