(* \chaptertitle{PDFImage}{PDF Images} *)
open Utility

(* What's supported and needs supporting\\
   Unsupported CODECs: DCTDecode, JBIG2Decode, JPXDecode\\\\ 
   RGB, 8bpc\\
   CMYK, 8bpc\\
   Gray, 8bpc\\
   Black and white, 1bpp. The only one that /Decode works for\\
   Indexed, RGB and CMYK, 8bpp\\
   Indexed, RGB and CMYK, 4bpp\\
   Separation, CMYK\\
   ICCBased, knows how to find alternate colorspace\\
*)

type pixel_layout =
  | BPP1
  | BPP8
  | BPP24
  | BPP48

(* FIXME: We need to deal with decode and other things for JPEG, if we're not
going to decode them. *)
type image =
  | JPEG of bytestream
  | JPEG2000 of bytestream
  | JBIG2 of bytestream
  | Raw of int * int * pixel_layout * bytestream

let string_of_layout = function
  | BPP1 -> "BPP1"
  | BPP8 -> "BPP8"
  | BPP24 -> "BPP24"
  | BPP48 -> "BPP48"

let string_of_image = function
  | JPEG _ -> "JPEG"
  | JPEG2000 _ -> "JPEG2000"
  | JBIG2 _ -> "JBIG2"
  | Raw (w, h, layout, data) ->
      "RAW: " ^ string_of_int w ^ " " ^ string_of_int h
      ^ " " ^ string_of_layout layout ^ " bytes of data = "
      ^ string_of_int (stream_size data)

(* FIXME: Only copes with [1 0] for now, and only 8BPP *)
let decode entry image =
  match entry, image with
  | Some (Pdf.Array [Pdf.Integer 1; Pdf.Integer 0]), Raw (w, h, BPP24, s) ->
      for x = 0 to (stream_size s / 3) - 1 do
        sset s (x * 3) (255 - sget s (x * 3));
        sset s (x * 3 + 1) (255 - sget s (x * 3 + 1));
        sset s (x * 3 + 2) (255 - sget s (x * 3 + 2))
      done
  | _ -> ()

(* Decode until it is either plain or a type of decoding we can't deal with
natively. *) 
let rec decode_to_image pdf = function
  | Pdf.Stream {contents = Pdf.Dictionary d, s} as stream ->
      begin match lookup "/Filter" d with
      | None
      | Some (Pdf.Array [])
      | Some (Pdf.Name ("/DCTDecode" | "/DCT" | "/JBIG2Decode" | "/JPXDecode"))
      | Some (Pdf.Array [Pdf.Name ("/DCTDecode" | "/DCT" | "/JBIG2Decode" | "/JPXDecode")]) -> ()
      | _ ->
          Pdfcodec.decode_pdfstream_onestage pdf stream;
          decode_to_image pdf stream 
      end
  | _ -> raise (Pdf.PDFError "decode_to_image: bad stream")

(* Basic CMYK to RGB conversion *)
let rgb_of_cmyk c m y k =
  let c = float c and m = float m and y = float y and k = float k in
  let r = 255. -. fmin 255. ((c /.  255.) *. (255. -. k) +. k) 
  and g = 255. -. fmin 255. ((m /.  255.) *. (255. -. k) +. k)
  and b = 255. -. fmin 255. ((y /.  255.) *. (255. -. k) +. k) in
    toint r, toint g,  toint b

let read_cmyk_8bpp_as_rgb24 width height data =
  let data' = mkstream (width * height * 3) in
    for p = 0 to width * height - 1 do
      let c = sget data (p * 4)
      and m = sget data (p * 4 + 1)
      and y = sget data (p * 4 + 2)
      and k = sget data (p * 4 + 3) in
        let r, g, b = rgb_of_cmyk c m y k in
          sset data' (p * 3) r;
          sset data' (p * 3 + 1) g;
          sset data' (p * 3 + 2) b
    done;
    data'

let read_gray_8bpp_as_rgb24 width height data =
  let data' = mkstream (width * height * 3) in
    for pout = 0 to width * height - 1 do
      sset data' (pout * 3) (sget data pout);
      sset data' (pout * 3 + 1) (sget data pout);
      sset data' (pout * 3 + 2) (sget data pout);
    done;
    data'

(* Input is 1bpp, rows padded to bytes. *)
let read_1bpp_as_rgb24 width height s =
  let s' = mkstream (width * height * 3)
  and s_bits = Pdfio.bitstream_of_input (Pdfio.input_of_bytestream s) in
    let pout = ref 0 in
      for row = 0 to height - 1 do
        let bits_to_do = ref width in
          while !bits_to_do > 0 do
            let bit = if Pdfio.getbit s_bits then 255 else 0 in
              sset s' !pout bit;
              sset s' (!pout + 1) bit;
              sset s' (!pout + 2) bit;
              decr bits_to_do;
              pout += 3
          done;
          Pdfio.align s_bits 
      done;
      s'

(* 4bpp, rows padded to bytes. *)
let read_4bpp_gray_as_rgb24 width height s =
  let s' = mkstream (width * height * 3)
  and s_bits = Pdfio.bitstream_of_input (Pdfio.input_of_bytestream s) in
    let pout = ref 0 in
      for row = 0 to height - 1 do
        let pix_to_do = ref width in
          while !pix_to_do > 0 do
            let a = if Pdfio.getbit s_bits then 1 else 0 in
            let b = if Pdfio.getbit s_bits then 1 else 0 in
            let c = if Pdfio.getbit s_bits then 1 else 0 in
            let d = if Pdfio.getbit s_bits then 1 else 0 in
              let col = (a * 8 + b * 4 + c * 2 + d) * (16 + 1) in
                sset s' !pout col;
                sset s' (!pout + 1) col;
                sset s' (!pout + 2) col;
                decr pix_to_do;
                pout += 3
          done;
          Pdfio.align s_bits 
      done;
      s'

let read_8bpp_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    for x = 0 to width * height - 1 do
      match Hashtbl.find table (sget s x) with
      | [r; g; b] ->
          sset s' (x * 3) r;
          sset s' (x * 3 + 1) g;
          sset s' (x * 3 + 2) b
      | _ -> failwith "read_8bpp_indexed_as_rgb24"
    done;
    s'

let read_8bpp_cmyk_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    for x = 0 to width * height - 1 do
      match Hashtbl.find table (sget s x) with
      | [c; m; y; k] ->
          let r, g, b = rgb_of_cmyk c m y k in
            sset s' (x * 3) r;
            sset s' (x * 3 + 1) g;
            sset s' (x * 3 + 2) b
      | _ -> failwith "read_8bpp_indexed_as_rgb24"
    done;
    s'

let read_4bpp_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    let posin = ref 0
    and posout = ref 0 in
      for row = 0 to height - 1 do
        for byte = 0 to (width + 1) / 2 - 1 do
          let p1 = sget s !posin lsr 4
          and p2 = sget s !posin land 15 in
            begin match Hashtbl.find table p1 with
            | [r1; g1; b1] ->
                sset s' !posout r1; incr posout;
                sset s' !posout g1; incr posout;
                sset s' !posout b1; incr posout;
            | _ -> failwith "read_4bpp_indexed_as_rgb24"
            end;
            begin
              if not (odd width && byte = (width + 1) / 2 - 1) then
              match Hashtbl.find table p2 with
              | [r2; g2; b2] ->
                   sset s' !posout r2; incr posout;
                   sset s' !posout g2; incr posout;
                   sset s' !posout b2; incr posout;
              | _ -> failwith "read_4bpp_indexed_as_rgb24"
            end;
            incr posin
        done
      done;
      s'

let read_4bpp_cmyk_indexed_as_rgb24 table width height s =
  let s' = mkstream (width * height * 3) in
    let posin = ref 0
    and posout = ref 0 in
      for row = 0 to height - 1 do
        for byte = 0 to (width + 1) / 2 - 1 do
          let p1 = sget s !posin lsr 4
          and p2 = sget s !posin land 15 in
            begin match Hashtbl.find table p1 with
            | [c; m; y; k] ->
                let r1, g1, b1 = rgb_of_cmyk c m y k in
                  sset s' !posout r1; incr posout;
                  sset s' !posout g1; incr posout;
                  sset s' !posout b1; incr posout;
            | _ -> failwith "read_4bpp_cmyk_indexed_as_rgb24"
            end;
            begin
              if not (odd width && byte = (width + 1) / 2 - 1) then
              match Hashtbl.find table p2 with
              | [c; m; y; k] ->
                  let r2, g2, b2 = rgb_of_cmyk c m y k in
                    sset s' !posout r2; incr posout;
                    sset s' !posout g2; incr posout;
                    sset s' !posout b2; incr posout;
              | _ -> failwith "read_4bpp_cmyk_indexed_as_rgb24"
            end;
            incr posin
        done
      done;
      s'

(* Separation, CMYK alternate, tint transform function. *)
let read_separation_cmyk_as_rgb24 f width height s = 
  let s' = mkstream (width * height * 3) in
    for p = 0 to width * height - 1 do
      let v = sget s p in
        match Pdffun.eval_function f [float v /. 255.] with
        | [c; y; m; k] ->
            let c = toint (c *. 255.)
            and m = toint (m *. 255.)
            and y = toint (y *. 255.)
            and k = toint (k *. 255.) in
              let r, g, b = rgb_of_cmyk c m y k in
                sset s' (p * 3) r;
                sset s' (p * 3 + 1) g;
                sset s' (p * 3 + 2) b;
        | _ ->
            raise (Pdf.PDFError "Bad tint transform function")
    done;
    s'

let rec read_raw_image size colspace bpc pdf resources width height dict data =
  match size, colspace, bpc with
  | size, (Pdfspace.DeviceRGB | Pdfspace.CalRGB _), Some (Pdf.Integer 8)
      when size >= width * height * 3 ->
        Raw (width, height, BPP24, data)
  | size, Pdfspace.DeviceCMYK, Some (Pdf.Integer 8)
      when size >= width * height * 4 ->
        Raw (width, height, BPP24, read_cmyk_8bpp_as_rgb24 width height data)
  | size, (Pdfspace.DeviceGray | Pdfspace.CalGray _), Some (Pdf.Integer 8)
      when size >= width * height ->
        Raw (width, height, BPP24, read_gray_8bpp_as_rgb24 width height data)
  | size, _, Some (Pdf.Integer 1)
      when size >= width * height / 8 ->
        Raw (width, height, BPP24, read_1bpp_as_rgb24 width height data)
  | size, Pdfspace.DeviceGray, Some (Pdf.Integer 4)
      when size >= width * height / 2 ->
        Raw (width, height, BPP24, read_4bpp_gray_as_rgb24 width height data)
  | size, Pdfspace.Indexed ((Pdfspace.DeviceRGB | Pdfspace.CalRGB _), table), Some (Pdf.Integer 8)
  | size,
    Pdfspace.Indexed
      ((Pdfspace.DeviceN (_, (Pdfspace.DeviceRGB | Pdfspace.CalRGB _), _, _) |
       Pdfspace.ICCBased {Pdfspace.icc_alternate = (Pdfspace.DeviceRGB |
       Pdfspace.CalRGB _)}) , table),
    Some (Pdf.Integer 8)
      when size >= width * height ->
        Raw (width, height, BPP24, read_8bpp_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Indexed (Pdfspace.DeviceCMYK, table), Some (Pdf.Integer 8)
      when size >= width * height ->
        Raw (width, height, BPP24, read_8bpp_cmyk_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Indexed ((Pdfspace.DeviceRGB | Pdfspace.CalRGB _), table), Some (Pdf.Integer 4)
  | size, Pdfspace.Indexed (Pdfspace.ICCBased {Pdfspace.icc_alternate = (Pdfspace.DeviceRGB | Pdfspace.CalRGB _)}, table), Some (Pdf.Integer 4)
      when size >= width * height / 2 ->
        Raw (width, height, BPP24, read_4bpp_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Indexed ((Pdfspace.DeviceCMYK), table), Some (Pdf.Integer 4)
  | size, Pdfspace.Indexed (Pdfspace.ICCBased {Pdfspace.icc_alternate = (Pdfspace.DeviceCMYK)}, table), Some (Pdf.Integer 4)
      when size >= width * height / 2 ->
        Raw (width, height, BPP24, read_4bpp_cmyk_indexed_as_rgb24 table width height data)
  | size, Pdfspace.Separation (_, Pdfspace.DeviceCMYK, fn), Some (Pdf.Integer 8)
      when size >= width * height ->
          Raw (width, height, BPP24, read_separation_cmyk_as_rgb24 fn width height data)
  | size, Pdfspace.ICCBased {Pdfspace.icc_alternate = cs}, _ ->
      read_raw_image size cs bpc pdf resources width height dict data
  | size, cs, bpc ->
     Printf.eprintf "NO IMAGE:\n size:%i\n cspace\n%s\n bpc\n%s\n width %i\n
     height %i\n" size
     (Pdfspace.string_of_colourspace cs)
     (match bpc with None -> "NONE" | Some bpc -> Pdfwrite.string_of_pdf bpc)
     width
     height;
     raise (Pdf.PDFError "No image\n")

let rec get_raw_image pdf resources width height dict data =
  try
  let size =
    stream_size data
  and colspace =
    (* If an image mask, it's /DeviceGray, effectively *)
    match Pdf.lookup_direct_orelse pdf "/ImageMask" "/IM" dict with
    | Some (Pdf.Boolean true) -> Pdfspace.DeviceGray
    | _ ->
      let colspace =
        Pdf.lookup_direct_orelse pdf "/ColorSpace" "/CS" dict
      in
        let space =
          match Pdf.lookup_direct pdf "/ColorSpace" resources, colspace with
          | Some (Pdf.Dictionary _ as d), Some (Pdf.Name c) ->
              begin match Pdf.lookup_direct pdf c d with
              | Some colspace -> colspace
              | _ -> (Pdf.Name c)
              end
          | _ ->
              match colspace with
              | Some c -> c
              | _ -> failwith "PDf image: no cololurspace"
        in
          Pdfspace.read_colourspace pdf resources space
  and bpc =
    Pdf.lookup_direct_orelse pdf "/BitsPerComponent" "/BPC" dict
  in
    (*i flprint ("IMAGE SPACE:\n" ^ Pdfspace.string_of_colourspace colspace ^
     * "\n"); i*)
    read_raw_image size colspace bpc pdf resources width height dict data
  with
    e ->
      (*i Printf.eprintf (Pdfwrite.string_of_pdf (Pdf.direct pdf dict)); i*)
      raise e 

let get_image_24bpp pdf resources stream =
  let streamdict, data =
    Pdf.getstream stream;
    match stream with
    | Pdf.Stream {contents = (s, Pdf.Got d)} ->
        s, d
    | _ -> raise (Assert_failure ("", 0, 0)) (*r [Pdf.getstream] would have failed *)
  in
    let width = 
      match (Pdf.lookup_direct_orelse pdf "/Width" "/W" streamdict) with
      | Some (Pdf.Integer x) -> x
      | _ -> raise (Pdfread.PDFSemanticError "Malformed /Image width")
    and height =
      match (Pdf.lookup_direct_orelse pdf "/Height" "/H" streamdict) with
      | Some (Pdf.Integer x) -> x
      | _ -> raise (Pdfread.PDFSemanticError "Malformed /Image height")
    in
      decode_to_image pdf stream;
      match stream with
      | Pdf.Stream {contents = (Pdf.Dictionary d) as dict, Pdf.Got s} ->
          begin match Pdf.lookup_direct_orelse pdf "/Filter" "/F" dict with
          | None | Some (Pdf.Array []) ->
              let raw = get_raw_image pdf resources width height dict s
              and decode_entry = Pdf.lookup_direct_orelse pdf "/Decode" "/D" dict in
                decode decode_entry raw;
                raw
          | Some (Pdf.Name ("/DCTDecode" | "/DCT"))
          | Some (Pdf.Array [Pdf.Name ("/DCTDecode" | "/DCT")]) -> JPEG s
          | Some (Pdf.Name "/JBIG2Decode")
          | Some (Pdf.Array [Pdf.Name "/JBIG2Decode"]) -> JBIG2 s
          | Some (Pdf.Name "/JPXDecode")
          | Some (Pdf.Array [Pdf.Name "/JPXDecode"]) -> JPEG2000 s
          | _ -> raise (Pdf.PDFError "decode_to_image")
          end
      | _ -> raise (Assert_failure ("", 0, 0))

