From nobody Sat Feb 7 09:36:58 2026 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1494880715818945.1996760590217; Mon, 15 May 2017 13:38:35 -0700 (PDT) Received: from localhost ([::1]:38661 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1dAMlF-0007H0-8j for importer@patchew.org; Mon, 15 May 2017 16:38:33 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:35079) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1dAMeP-0001Q0-J1 for qemu-devel@nongnu.org; Mon, 15 May 2017 16:31:31 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1dAMeN-0003Xw-LJ for qemu-devel@nongnu.org; Mon, 15 May 2017 16:31:29 -0400 Received: from smtp2-g21.free.fr ([212.27.42.2]:57070) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1dAMeJ-0003U3-2O; Mon, 15 May 2017 16:31:23 -0400 Received: from localhost.localdomain (unknown [82.227.227.196]) by smtp2-g21.free.fr (Postfix) with ESMTP id D16682003BE; Mon, 15 May 2017 22:31:21 +0200 (CEST) From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= To: qemu-devel@nongnu.org Date: Mon, 15 May 2017 22:31:08 +0200 Message-Id: <20170515203114.9477-9-hpoussin@reactos.org> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20170515203114.9477-1-hpoussin@reactos.org> References: <20170515203114.9477-1-hpoussin@reactos.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 212.27.42.2 Subject: [Qemu-devel] [PATCH 08/13] vvfat: correctly create long names for non-ASCII filenames X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , =?UTF-8?q?Herv=C3=A9=20Poussineau?= , qemu-block@nongnu.org, Max Reitz Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Type: text/plain; charset="utf-8" Assume that input filename is encoded as UTF-8, so correctly create UTF-16 = encoding. Reuse long_file_name structure to give back to caller the generated long na= me. It will be used in next commit to transform the long file name into short f= ile name. Reference: http://stackoverflow.com/questions/7153935/how-to-convert-utf-8-= stdstring-to-utf-16-stdwstring Signed-off-by: Herv=C3=A9 Poussineau --- block/vvfat.c | 132 ++++++++++++++++++++++++++++++++++++++++++------------= ---- 1 file changed, 97 insertions(+), 35 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 7da07068b8..5f6356c834 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -357,6 +357,23 @@ typedef struct BDRVVVFATState { Error *migration_blocker; } BDRVVVFATState; =20 +typedef struct { + /* + * Since the sequence number is at most 0x3f, and the filename + * length is at most 13 times the sequence number, the maximal + * filename length is 0x3f * 13 bytes. + */ + unsigned char name[0x3f * 13 + 1]; + int checksum, len; + int sequence_number; +} long_file_name; + +static void lfn_init(long_file_name *lfn) +{ + lfn->sequence_number =3D lfn->len =3D 0; + lfn->checksum =3D 0x100; +} + /* take the sector position spos and convert it to Cylinder/Head/Sector po= sition * if the position is outside the specified geometry, fill maximum value f= or CHS * and return 1 to signal overflow. @@ -418,29 +435,90 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int= heads, int secs) =20 /* direntry functions */ =20 -/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next mult= iple of 26 */ -static inline int short2long_name(char* dest,const char* src) -{ - int i; - int len; - for(i=3D0;i<129 && src[i];i++) { - dest[2*i]=3Dsrc[i]; - dest[2*i+1]=3D0; +/* fills lfn with UTF-16 representation of src filename */ +/* return true if src is valid UTF-8 string, false otherwise */ +static bool filename2long_name(long_file_name *lfn, const char* src) +{ + uint8_t *dest =3D lfn->name; + int i =3D 0, j; + int len =3D 0; + while (src[i]) { + uint32_t uni =3D 0; + size_t todo; + uint8_t ch =3D src[i++]; + if (ch <=3D 0x7f) { + uni =3D ch; + todo =3D 0; + } else if (ch <=3D 0xbf) { + return false; + } else if (ch <=3D 0xdf) { + uni =3D ch & 0x1f; + todo =3D 1; + } else if (ch <=3D 0xef) { + uni =3D ch & 0x0f; + todo =3D 2; + } else if (ch <=3D 0xf7) { + uni =3D ch & 0x07; + todo =3D 3; + } else { + return false; + } + for (j =3D 0; j < todo; j++) { + uint8_t ch; + if (src[i] =3D=3D '\0') { + return false; + } + ch =3D src[i++]; + if (ch < 0x80 || ch >=3D 0xbf) { + return false; + } + uni <<=3D 6; + uni +=3D ch & 0x3f; + } + if (uni >=3D 0xd800 && uni <=3D 0xdfff) { + return false; + } else if (uni >=3D 0x10ffff) { + return false; + } + if (uni <=3D 0xffff) { + dest[len++] =3D uni & 0xff; + dest[len++] =3D uni >> 8; + } else { + uint16_t w; + uni -=3D 0x10000; + w =3D (uni >> 10) + 0xd800; + dest[len++] =3D w & 0xff; + dest[len++] =3D w >> 8; + w =3D (uni & 0x3ff) + 0xdc00; + dest[len++] =3D w & 0xff; + dest[len++] =3D w >> 8; + } + } + dest[len++] =3D 0; + dest[len++] =3D 0; + while (len % 26 !=3D 0) { + dest[len++] =3D 0xff; } - len=3D2*i; - dest[2*i]=3Ddest[2*i+1]=3D0; - for(i=3D2*i+2;(i%26);i++) - dest[i]=3D0xff; - return len; + lfn->len =3D len; + return true; } =20 -static inline direntry_t* create_long_filename(BDRVVVFATState* s,const cha= r* filename) +static direntry_t *create_long_filename(BDRVVVFATState *s, const char *fil= ename, + long_file_name *lfn) { - char buffer[258]; - int length=3Dshort2long_name(buffer,filename), - number_of_entries=3D(length+25)/26,i; + uint8_t *buffer; + int length, number_of_entries, i; direntry_t* entry; =20 + lfn_init(lfn); + if (!filename2long_name(lfn, filename)) { + fprintf(stderr, "vvfat: invalid UTF-8 name: %s\n", filename); + return NULL; + } + buffer =3D lfn->name; + length =3D lfn->len; + number_of_entries =3D (length + 25) / 26; + for(i=3D0;idirectory)); entry->attributes=3D0xf; @@ -612,6 +690,7 @@ static inline direntry_t* create_short_and_long_name(BD= RVVVFATState* s, int i,j,long_index=3Ds->directory.next; direntry_t* entry =3D NULL; direntry_t* entry_long =3D NULL; + long_file_name lfn; =20 if(is_dot) { entry=3Darray_get_next(&(s->directory)); @@ -620,7 +699,7 @@ static inline direntry_t* create_short_and_long_name(BD= RVVVFATState* s, return entry; } =20 - entry_long=3Dcreate_long_filename(s,filename); + entry_long =3D create_long_filename(s, filename, &lfn); =20 i =3D strlen(filename); for(j =3D i - 1; j>0 && filename[j]!=3D'.';j--); @@ -1575,23 +1654,6 @@ static void schedule_mkdir(BDRVVVFATState* s, uint32= _t cluster, char* path) commit->action =3D ACTION_MKDIR; } =20 -typedef struct { - /* - * Since the sequence number is at most 0x3f, and the filename - * length is at most 13 times the sequence number, the maximal - * filename length is 0x3f * 13 bytes. - */ - unsigned char name[0x3f * 13 + 1]; - int checksum, len; - int sequence_number; -} long_file_name; - -static void lfn_init(long_file_name* lfn) -{ - lfn->sequence_number =3D lfn->len =3D 0; - lfn->checksum =3D 0x100; -} - /* return 0 if parsed successfully, > 0 if no long name, < 0 if error */ static int parse_long_name(long_file_name* lfn, const direntry_t* direntry) --=20 2.11.0