From 618e4fa14828e73c2d2df63111aa48e35f1aeaee Mon Sep 17 00:00:00 2001 From: ygrek Date: Sun, 9 Jun 2024 15:29:48 -0400 Subject: [PATCH] wip --- src/utils/cdk/bzip2.ml | 34 ++++++++++++++++----------------- src/utils/cdk/bzip2.mli | 10 +++++----- src/utils/cdk/bzlib.ml | 4 ++-- src/utils/cdk/bzlib.mli | 4 ++-- src/utils/cdk/filename2.ml | 21 ++++++++++---------- src/utils/cdk/genlex2.ml | 12 ++++++------ src/utils/cdk/gzip.ml | 8 ++++---- src/utils/lib/md4.ml | 39 +++++++++++++++++++------------------- src/utils/lib/options.ml4 | 2 +- src/utils/lib/unix32.ml | 10 +++++----- src/utils/lib/unix32.mli | 2 +- src/utils/lib/url.ml | 6 +++--- 12 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/utils/cdk/bzip2.ml b/src/utils/cdk/bzip2.ml index 07ca41925..89b4e1dd6 100644 --- a/src/utils/cdk/bzip2.ml +++ b/src/utils/cdk/bzip2.ml @@ -7,7 +7,7 @@ let buffer_size = 1024 type in_channel = { in_chan: Pervasives.in_channel; - in_buffer: string; + in_buffer: bytes; mutable in_pos: int; mutable in_avail: int; mutable in_eof: bool; @@ -16,7 +16,7 @@ type in_channel = let open_in_chan ic = { in_chan = ic; - in_buffer = String.create buffer_size; + in_buffer = Bytes.create buffer_size; in_pos = 0; in_avail = 0; in_eof = false; @@ -32,12 +32,12 @@ let open_in filename = let read_byte iz = if iz.in_avail = 0 then begin let n = Pervasives.input iz.in_chan iz.in_buffer 0 - (String.length iz.in_buffer) in + (Bytes.length iz.in_buffer) in if n = 0 then raise End_of_file; iz.in_pos <- 0; iz.in_avail <- n end; - let c = iz.in_buffer.[iz.in_pos] in + let c = Bytes.get iz.in_buffer iz.in_pos in iz.in_pos <- iz.in_pos + 1; iz.in_avail <- iz.in_avail - 1; Char.code c @@ -53,19 +53,19 @@ let read_int32 iz = (Int32.shift_left (Int32.of_int b4) 24))) let rec input iz buf pos len = - if pos < 0 || len < 0 || pos + len > String.length buf then + if pos < 0 || len < 0 || pos + len > Bytes.length buf then invalid_arg "Bzip2.input"; if iz.in_eof then 0 else begin if iz.in_avail = 0 then begin let n = Pervasives.input iz.in_chan iz.in_buffer 0 - (String.length iz.in_buffer) in + (Bytes.length iz.in_buffer) in if n = 0 then raise(Error("truncated file")); iz.in_pos <- 0; iz.in_avail <- n end; let (finished, used_in, used_out) = try - Bzlib.decompress iz.in_stream iz.in_buffer iz.in_pos iz.in_avail + Bzlib.decompress iz.in_stream (Bytes.unsafe_to_string iz.in_buffer) iz.in_pos iz.in_avail buf pos len with Bzlib.Error(_, e) -> raise(Error(Bzlib.string_of_error e)) in @@ -88,10 +88,10 @@ let rec really_input iz buf pos len = really_input iz buf (pos + n) (len - n) end -let char_buffer = String.create 1 +let char_buffer = Bytes.create 1 let input_char iz = - if input iz char_buffer 0 1 = 0 then raise End_of_file else char_buffer.[0] + if input iz char_buffer 0 1 = 0 then raise End_of_file else Bytes.get char_buffer 0 let input_byte iz = Char.code (input_char iz) @@ -106,7 +106,7 @@ let close_in iz = type out_channel = { out_chan: Pervasives.out_channel; - out_buffer: string; + out_buffer: bytes; mutable out_pos: int; mutable out_avail: int; out_stream: Bzlib.stream; @@ -115,7 +115,7 @@ type out_channel = let open_out_chan ?(level = 6) oc = if level < 1 || level > 9 then invalid_arg "Bzip2.open_out: bad level"; { out_chan = oc; - out_buffer = String.create buffer_size; + out_buffer = Bytes.create buffer_size; out_pos = 0; out_avail = buffer_size; out_stream = Bzlib.compress_init level 0 0; @@ -125,18 +125,18 @@ let open_out ?(level = 6) filename = open_out_chan ~level (Pervasives.open_out_bin filename) let rec output oz buf pos len = - if pos < 0 || len < 0 || pos + len > String.length buf then + if pos < 0 || len < 0 || pos + len > Bytes.length buf then invalid_arg "Bzlib2.output"; (* If output buffer is full, flush it *) if oz.out_avail = 0 then begin (* Printf.printf "Flushing out_avail\n"; *) Pervasives.output oz.out_chan oz.out_buffer 0 oz.out_pos; oz.out_pos <- 0; - oz.out_avail <- String.length oz.out_buffer + oz.out_avail <- Bytes.length oz.out_buffer end; let (_, used_in, used_out) = try - Bzlib.compress oz.out_stream buf pos len + Bzlib.compress oz.out_stream (Bytes.unsafe_to_string buf) pos len oz.out_buffer oz.out_pos oz.out_avail Bzlib.BZ_RUN with Bzlib.Error(_, e) -> @@ -147,7 +147,7 @@ let rec output oz buf pos len = if used_in < len then output oz buf (pos + used_in) (len - used_in) let output_char oz c = - char_buffer.[0] <- c; + Bytes.set char_buffer 0 c; output oz char_buffer 0 1 let output_byte oz b = @@ -159,10 +159,10 @@ let flush oz = if oz.out_avail = 0 then begin Pervasives.output oz.out_chan oz.out_buffer 0 oz.out_pos; oz.out_pos <- 0; - oz.out_avail <- String.length oz.out_buffer + oz.out_avail <- Bytes.length oz.out_buffer end; let (finished, _, used_out) = - Bzlib.compress oz.out_stream oz.out_buffer 0 0 + Bzlib.compress oz.out_stream (Bytes.unsafe_to_string oz.out_buffer) 0 0 oz.out_buffer oz.out_pos oz.out_avail Bzlib.BZ_FINISH in oz.out_pos <- oz.out_pos + used_out; diff --git a/src/utils/cdk/bzip2.mli b/src/utils/cdk/bzip2.mli index 40ddf4052..019699730 100644 --- a/src/utils/cdk/bzip2.mli +++ b/src/utils/cdk/bzip2.mli @@ -22,10 +22,10 @@ val input_byte: in_channel -> int (* Same as [Bzip2.input_char], but return the 8-bit integer representing the character. Raise [End_of_file] if no more compressed data is available. *) -val input: in_channel -> string -> int -> int -> int +val input: in_channel -> bytes -> int -> int -> int (* [input ic buf pos len] uncompresses up to [len] characters from the given channel [ic], - storing them in string [buf], starting at character number [pos]. + storing them in buffer [buf], starting at character number [pos]. It returns the actual number of characters read, between 0 and [len] (inclusive). A return value of 0 means that the end of file was reached. @@ -38,10 +38,10 @@ val input: in_channel -> string -> int -> int -> int exactly [len] characters.) Exception [Invalid_argument "Bzip2.input"] is raised if [pos] and [len] do not designate a valid substring of [buf]. *) -val really_input: in_channel -> string -> int -> int -> unit +val really_input: in_channel -> bytes -> int -> int -> unit (* [really_input ic buf pos len] uncompresses [len] characters from the given channel, storing them in - string [buf], starting at character number [pos]. + buffer [buf], starting at character number [pos]. Raise [End_of_file] if fewer than [len] characters can be read. Raise [Invalid_argument "Bzip2.input"] if [pos] and [len] do not designate a valid substring of [buf]. *) @@ -82,7 +82,7 @@ val output_char: out_channel -> char -> unit val output_byte: out_channel -> int -> unit (* Same as [Bzip2.output_char], but the output character is given by its code. The given integer is taken modulo 256. *) -val output: out_channel -> string -> int -> int -> unit +val output: out_channel -> bytes -> int -> int -> unit (* [output oc buf pos len] compresses and writes [len] characters from string [buf], starting at offset [pos], and writes the compressed data to the channel [oc]. diff --git a/src/utils/cdk/bzlib.ml b/src/utils/cdk/bzlib.ml index ff17e9ec1..459e4ac1c 100644 --- a/src/utils/cdk/bzlib.ml +++ b/src/utils/cdk/bzlib.ml @@ -23,7 +23,7 @@ type action = BZ_RUN | BZ_FLUSH | BZ_FINISH external compress_init: int -> int -> int -> stream = "camlzip_bzCompressInit" external compress: - stream -> string -> int -> int -> string -> int -> int -> action + stream -> string -> int -> int -> bytes -> int -> int -> action -> bool * int * int = "camlzip_bzCompress_bytecode" "camlzip_bzCompress" external compress_end: stream -> unit = "camlzip_bzCompressEnd" @@ -31,7 +31,7 @@ external compress_end: stream -> unit = "camlzip_bzCompressEnd" external decompress_init: int -> bool -> stream = "camlzip_bzDecompressInit" external decompress: - stream -> string -> int -> int -> string -> int -> int -> bool * int * int + stream -> string -> int -> int -> bytes -> int -> int -> bool * int * int = "camlzip_bzDecompress_bytecode" "camlzip_bzDecompress" external decompress_end: stream -> unit = "camlzip_bzDecompressEnd" diff --git a/src/utils/cdk/bzlib.mli b/src/utils/cdk/bzlib.mli index 74f6d09fd..12945b71c 100644 --- a/src/utils/cdk/bzlib.mli +++ b/src/utils/cdk/bzlib.mli @@ -10,7 +10,7 @@ type action = BZ_RUN | BZ_FLUSH | BZ_FINISH external compress_init: int -> int -> int -> stream = "camlzip_bzCompressInit" external compress: - stream -> string -> int -> int -> string -> int -> int -> action + stream -> string -> int -> int -> bytes -> int -> int -> action -> bool * int * int = "camlzip_bzCompress_bytecode" "camlzip_bzCompress" external compress_end: stream -> unit = "camlzip_bzCompressEnd" @@ -18,7 +18,7 @@ external compress_end: stream -> unit = "camlzip_bzCompressEnd" external decompress_init: int -> bool -> stream = "camlzip_bzDecompressInit" external decompress: - stream -> string -> int -> int -> string -> int -> int -> bool * int * int + stream -> string -> int -> int -> bytes -> int -> int -> bool * int * int = "camlzip_bzDecompress_bytecode" "camlzip_bzDecompress" external decompress_end: stream -> unit = "camlzip_bzDecompressEnd" diff --git a/src/utils/cdk/filename2.ml b/src/utils/cdk/filename2.ml index 38032f2f1..3b3e49f10 100644 --- a/src/utils/cdk/filename2.ml +++ b/src/utils/cdk/filename2.ml @@ -116,19 +116,19 @@ let to_string filename = List.fold_left (fun file f -> f file) filename !to_strings let path_of_filename filename = - let filename = String.copy filename in let len = String.length filename in + let filename = Bytes.of_string filename in for i = 0 to len - 1 do - if filename.[i] = '\\' then filename.[i] <- '/'; + if Bytes.get filename i = '\\' then Bytes.set filename i '/'; done; let filename = - if len > 2 && filename.[1] = ':' && - match filename.[0] with + if len > 2 && Bytes.get filename 1 = ':' && + match Bytes.get filename 0 with 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false then - Printf.sprintf "%s/%s" (String.sub filename 0 2) - (String.sub filename 2 (len-2)) - else filename + Printf.sprintf "%s/%s" (Bytes.sub_string filename 0 2) + (Bytes.sub_string filename 2 (len-2)) + else Bytes.unsafe_to_string filename in split_simplify filename '/' @@ -142,11 +142,12 @@ let filesystem_compliant name fstype namemax = (* replace all illegal characters with a valid one. assumes all filesystems accept '_'s in filenames *) let escape_chars p filename = - let s = String.copy filename in + let s = Bytes.of_string filename in for i = 0 to String.length filename - 1 do - if p s.[i] then s.[i] <- '_' + if p (Bytes.get s i) then Bytes.set s i '_' done; - s in + Bytes.unsafe_to_string s + in (* remove all illegal characters at the beginning of filename *) let trim_left p filename = diff --git a/src/utils/cdk/genlex2.ml b/src/utils/cdk/genlex2.ml index de90afe49..dc211dc0b 100644 --- a/src/utils/cdk/genlex2.ml +++ b/src/utils/cdk/genlex2.ml @@ -28,7 +28,7 @@ type token = (* The string buffering machinery *) -let initial_buffer = String.create 32 +let initial_buffer = Bytes.create 32 let buffer = ref initial_buffer let bufpos = ref 0 @@ -36,16 +36,16 @@ let bufpos = ref 0 let reset_buffer () = buffer := initial_buffer; bufpos := 0 let store c = - if !bufpos >= String.length !buffer then + if !bufpos >= Bytes.length !buffer then begin - let newbuffer = String.create (2 * !bufpos) in - String.blit !buffer 0 newbuffer 0 !bufpos; buffer := newbuffer + let newbuffer = Bytes.create (2 * !bufpos) in + Bytes.blit !buffer 0 newbuffer 0 !bufpos; buffer := newbuffer end; - String.set !buffer !bufpos c; + Bytes.set !buffer !bufpos c; incr bufpos let get_string () = - let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s + let s = Bytes.sub_string !buffer 0 !bufpos in buffer := initial_buffer; s (* The lexer *) diff --git a/src/utils/cdk/gzip.ml b/src/utils/cdk/gzip.ml index d170263be..239ece758 100644 --- a/src/utils/cdk/gzip.ml +++ b/src/utils/cdk/gzip.ml @@ -28,7 +28,7 @@ type in_channel = in_stream: Zlib.stream; mutable in_size: int32; mutable in_crc: int32; - char_buffer: string } + char_buffer: bytes } let open_in ic = (* Superficial parsing of header *) @@ -73,7 +73,7 @@ let open_in ic = in_stream = Zlib.inflate_init false; in_size = Int32.zero; in_crc = Int32.zero; - char_buffer = String.create 1 } + char_buffer = Bytes.create 1 } let open_in_file filename = let ic = Pervasives.open_in_bin filename in @@ -176,7 +176,7 @@ type 'a out_channel = out_stream: Zlib.stream; mutable out_size: int32; mutable out_crc: int32; - char_buffer: string } + char_buffer: bytes } let open_out ?(level = 6) oc = if level < 1 || level > 9 then invalid_arg "Gzip_stream.open_output: bad level"; @@ -195,7 +195,7 @@ let open_out ?(level = 6) oc = out_stream = Zlib.deflate_init level false; out_size = Int32.zero; out_crc = Int32.zero; - char_buffer = String.create 1 } + char_buffer = Bytes.create 1 } let open_out_file ?level filename = let oc = Pervasives.open_out_bin filename in diff --git a/src/utils/lib/md4.ml b/src/utils/lib/md4.ml index 40bb02828..986b0e91d 100644 --- a/src/utils/lib/md4.ml +++ b/src/utils/lib/md4.ml @@ -103,32 +103,32 @@ module Base32 = struct 'A' .. 'Z' -> int_of_char n - 65 | 'a' .. 'z' -> int_of_char n - 97 | _ -> (int_of_char n+26)-50 - + let of_string hash_length r = let len = String.length r in assert (len = (hash_length * 8 + 4)/5); - let s = String.make hash_length '\000' in + let s = Bytes.make hash_length '\000' in for i = 0 to len - 1 do let pos = i * 5 in let byte = pos / 8 in let bit = pos mod 8 in let c = int5_of_char r.[i] in - if bit < 3 then + if bit < 3 then let x = c lsl (3-bit) in - s.[byte] <- char_of_int (int_of_char s.[byte] lor x); + s.[byte] <- char_of_int (int_of_char (Bytes.get s byte) lor x); else let x = (c lsr (bit - 3)) land 0xff in - s.[byte] <- char_of_int (int_of_char s.[byte] lor x); + s.[byte] <- char_of_int (int_of_char (Bytes.get s byte) lor x); if byte+1 < hash_length then let y = (c lsl (11 - bit)) land 0xff in - s.[byte+1] <- char_of_int (int_of_char s.[byte+1] lor y); + s.[byte+1] <- char_of_int (int_of_char (Bytes.get s (byte+1)) lor y); done; - s - + Bytes.unsafe_to_string s + let to_string hash_length s = assert (String.length s = hash_length); let len = (hash_length * 8 + 4)/5 in - let r = String.create len in + let r = Bytes.create len in for i = 0 to len - 1 do let pos = i * 5 in let byte = pos / 8 in @@ -145,12 +145,12 @@ module Base32 = struct let c = (x lsr (11 - bit)) land 0x1f in r.[i] <- char_of_int5 c done; - r + Bytes.unsafe_to_string r let char_of_int5 upper n = char_of_int (if n < 26 then (if upper then 65 else 97)+n else 50+(n-26)) - + let to_string_case upper hash_length s = assert (String.length s = hash_length); let len = (hash_length * 8 + 4)/5 in @@ -181,7 +181,7 @@ module Base6427 = struct let _ = assert (String.length base64tbl = 64) let to_string _ hashbin = - let hash64 = String.create 30 in + let hash64 = Bytes.create 30 in let hashbin n = int_of_char hashbin.[n] in hash64.[0] <- '='; let j = ref 1 in @@ -198,16 +198,17 @@ module Base6427 = struct done done; hash64.[!j-1] <- '='; - String.sub hash64 0 !j + Bytes.sub_string hash64 0 !j - let base64tbl_inv = String.create 126 - let _ = + let base64tbl_inv = + let table = Bytes.create 126 in for i = 0 to 63 do - base64tbl_inv.[int_of_char base64tbl.[i]] <- char_of_int i - done + table.[int_of_char base64tbl.[i]] <- char_of_int i + done; + Bytes.unsafe_to_string table let of_string _ hash64 = - let hashbin = String.make 20 '\000' in + let hashbin = Bytes.make 20 '\000' in let hash64 n = let c = hash64.[n] in int_of_char base64tbl_inv.[int_of_char c] @@ -233,7 +234,7 @@ module Base6427 = struct hashbin.[!j+1] <- char_of_int ((!tmp lsr 8) land 0xff); j := !j + 2; done; - hashbin + Bytes.unsafe_to_string hashbin let to_string_case _ = to_string end diff --git a/src/utils/lib/options.ml4 b/src/utils/lib/options.ml4 index 823372bfc..c18137ffb 100644 --- a/src/utils/lib/options.ml4 +++ b/src/utils/lib/options.ml4 @@ -208,7 +208,7 @@ and parse_option = parser | [< 'Int i >] -> IntValue i | [< 'Float f >] -> FloatValue f | [< 'Kwd "@"; 'Int i; v = parse_once_value i >] -> OnceValue v -| [< 'Char c >] -> StringValue (let s = String.create 1 in s.[0] <- c; s) +| [< 'Char c >] -> StringValue (String.make 1 c) | [< 'Kwd "["; v = parse_list [] >] -> List v | [< 'Kwd "("; v = parse_list [] >] -> List v diff --git a/src/utils/lib/unix32.ml b/src/utils/lib/unix32.ml index 044ba7c9f..1d5ada761 100644 --- a/src/utils/lib/unix32.ml +++ b/src/utils/lib/unix32.ml @@ -327,13 +327,13 @@ module FDCache = struct check_destroyed t2; let buffer_len = 128 * 1024 in let buffer_len64 = Int64.of_int buffer_len in - let buffer = String.make buffer_len '\001' in + let buffer = Bytes.make buffer_len '\001' in let rec iter remaining pos1 pos2 = let len64 = min remaining buffer_len64 in let len = Int64.to_int len64 in if len > 0 then begin read t1 pos1 buffer 0 len; - write t2 pos2 buffer 0 len; + write t2 pos2 (Bytes.unsafe_to_string buffer) 0 len; iter (remaining -- len64) (pos1 ++ len64) (pos2 ++ len64) end in @@ -363,7 +363,7 @@ module type File = sig val mtime64 : t -> float val exists : t -> bool val remove : t -> unit - val read : t -> int64 -> string -> int -> int -> unit + val read : t -> int64 -> bytes -> int -> int -> unit val write : t -> int64 -> string -> int -> int -> unit val destroy : t -> unit val is_closed : t -> bool @@ -1349,12 +1349,12 @@ let copy_chunk t1 t2 pos1 pos2 len = flush_fd t1; flush_fd t2; let buffer_size = 128 * 1024 in - let buffer = String.make buffer_size '\001' in + let buffer = Bytes.make buffer_size '\001' in let rec iter remaining pos1 pos2 = let len = mini remaining buffer_size in if len > 0 then begin read t1 pos1 buffer 0 len; - write t2 pos2 buffer 0 len; + write t2 pos2 (Bytes.unsafe_to_string buffer) 0 len; let len64 = Int64.of_int len in iter (remaining - len) (pos1 ++ len64) (pos2 ++ len64) end diff --git a/src/utils/lib/unix32.mli b/src/utils/lib/unix32.mli index 026362db6..8136183ef 100644 --- a/src/utils/lib/unix32.mli +++ b/src/utils/lib/unix32.mli @@ -58,7 +58,7 @@ val write : t -> int64 -> string -> int -> int -> unit val max_buffered : int64 ref val remove : t -> unit -val read : t -> int64 -> string -> int -> int -> unit +val read : t -> int64 -> bytes -> int -> int -> unit (*val allocate_chunk : t -> int64 -> int -> unit*) val copy_chunk : t -> t -> int64 -> int64 -> int -> unit diff --git a/src/utils/lib/url.ml b/src/utils/lib/url.ml index 03ff7e6c1..b7bdb6da3 100644 --- a/src/utils/lib/url.ml +++ b/src/utils/lib/url.ml @@ -36,7 +36,7 @@ type url = { let encode s = let pos = ref 0 in let len = String.length s in - let res = String.create (3*len) in + let res = Bytes.create (3*len) in let hexa_digit x = if x >= 10 then Char.chr (Char.code 'A' + x - 10) else Char.chr (Char.code '0' + x) in @@ -50,7 +50,7 @@ let encode s = res.[!pos+2] <- hexa_digit (Char.code c mod 16); pos := !pos + 3 done; - String.sub res 0 !pos + Bytes.sub_string res 0 !pos (** decodes a sting according RFC 1738 or x-www-form-urlencoded ('+' with ' ') @@ -304,4 +304,4 @@ open Options let option = define_option_class "URL" (fun v -> of_string (value_to_string v)) - (fun url -> string_to_value (to_string url)) \ No newline at end of file + (fun url -> string_to_value (to_string url))