Skip to content

Listing content of zip files in Fortran #1003

Open
@awvwgk

Description

@awvwgk

Motivation

I tried reading zip files in pure Fortran and found that the implementation is actually quite straight-foward to get a minimal working example, which can parse standard zip files. Maybe it is worth to add this directly as a feature to stdlib. Posting an example from my project here as reference.

Prior Art

list_zip_file in tblite

! SPDX-License: MIT

!> Provide routines for handling zip files
module stdlib_io_zip
   use iso_fortran_env, only : i2 => int16, i4 => int32
   implicit none
   private

   public :: list_zip_file, zip_file

   integer(i4), parameter :: &
       & zip_global_sig = int(z'02014b50', i4), &
       & zip_local_sig = int(z'04034b50', i4), &
       & zip_footer_sig = int(z'06054b50', i4)

   integer(i2), parameter :: zip_min_version = 20_i2

   type :: zip_record
      character(len=:), allocatable :: path
   end type zip_record

   type :: zip_file
      type(zip_record), allocatable :: records(:)
      character(len=:), allocatable :: global_header
      integer(i4) :: global_header_offset = 0_i4
      integer(i2) :: nrecs = 0_i2
   end type zip_file


contains

!> List content of zip file
subroutine list_zip_file(io, filename, zip, stat, msg)
   !> Unformatted IO unit
   integer, intent(in) :: io
   !> File name for error reporting
   character(len=*), intent(in) :: filename
   !> Descriptor of the zip file
   type(zip_file), intent(out) :: zip
   !> Status of the operation
   integer, intent(out) :: stat
   !> Status message
   character(len=:), allocatable :: msg

   integer :: irec
   integer(i2) :: path_size, extra_field_size, comment_size
   integer(i2) :: disk_no, disk_start, nrecs_on_disk
   integer(i4) :: nbytes_compressed, global_header_size
   character(len=512) :: errmsg
   integer :: res, length, pos
   integer(i4) :: header_sig
   character(len=:), allocatable :: path

   stat = 0
   irec = 0
   pos = 1
   read(io, pos=pos, iostat=stat, iomsg=errmsg) header_sig
   do while(stat == 0 .and. is_local_header(header_sig))
      irec = irec + 1

      if (stat == 0) &
         read(io, pos=pos+18, iostat=stat, iomsg=errmsg) nbytes_compressed
      if (stat == 0) &
         read(io, pos=pos+26, iostat=stat, iomsg=errmsg) path_size
      if (stat == 0) &
         read(io, pos=pos+28, iostat=stat, iomsg=errmsg) extra_field_size

      if (stat == 0) then
         if (allocated(path)) deallocate(path)
         allocate(character(len=path_size) :: path, stat=stat)
      end if
      if (stat == 0) &
         read(io, pos=pos+30, iostat=stat, iomsg=errmsg) path

      pos = pos + 30 + path_size + extra_field_size + nbytes_compressed
      read(io, pos=pos, iostat=stat, iomsg=errmsg) header_sig
   end do
   if (stat /= 0) then
      msg = "Failed to read local header block for '"//filename//"'"
      if (len_trim(errmsg) > 0) &
         msg = msg // " ("//trim(errmsg)//")"
      return
   end if

   if (.not.is_global_header(header_sig)) then
      stat = 400
      msg = "Expected global header signature for '"//filename//"' got "// &
         & format_string(header_sig, '(z0.8)')
      return
   end if

   allocate(zip%records(irec))

   irec = 0
   ! global_header_offset = pos - 1
   do while(stat == 0 .and. is_global_header(header_sig))
      irec = irec + 1

      if (stat == 0) &
         read(io, pos=pos+28, iostat=stat, iomsg=errmsg) path_size
      if (stat == 0) &
         read(io, pos=pos+30, iostat=stat, iomsg=errmsg) extra_field_size
      if (stat == 0) &
         read(io, pos=pos+32, iostat=stat, iomsg=errmsg) comment_size

      if (stat == 0) then
         if (allocated(path)) deallocate(path)
         allocate(character(len=path_size) :: path, stat=stat)
      end if
      if (stat == 0) &
         read(io, pos=pos+46, iostat=stat, iomsg=errmsg) path

      zip%records(irec)%path = path

      pos = pos + 46 + path_size + extra_field_size + comment_size
      read(io, pos=pos, iostat=stat, iomsg=errmsg) header_sig
   end do
   if (stat /= 0) then
      msg = "Failed to read global header block for '"//filename//"'"
      if (len_trim(errmsg) > 0) &
         msg = msg // " ("//trim(errmsg)//")"
      return
   end if
   if (.not.is_footer_header(header_sig)) then
      stat = 401
      msg = "Expected footer signature for '"//filename//"' got "// &
         & format_string(header_sig, '(z0.8)')
      return
   end if
   ! global_header_size = pos - global_header_offset + 1

   read(io, pos=pos+4, iostat=stat, iomsg=errmsg) &
      & disk_no, disk_start, nrecs_on_disk, zip%nrecs, &
      & global_header_size, zip%global_header_offset, comment_size

   if (stat == 0) &
      allocate(character(len=global_header_size) :: zip%global_header, stat=stat)
   if (stat == 0) &
      read(io, iostat=stat, pos=zip%global_header_offset+1) zip%global_header

   if (stat /= 0) then
      msg = "Failed to read footer for '"//filename//"'"
      if (len_trim(errmsg) > 0) &
         msg = msg // " ("//trim(errmsg)//")"
      return
   end if

   if (disk_no /= 0) then
      stat = 402
      msg = "Cannot read zip file with disk_no != 0"
   end if

   if (disk_start /= 0) then
      stat = 403
      msg = "Cannot read zip file with disk_start != 0"
   end if

   if (nrecs_on_disk /= zip%nrecs) then
      stat = 404
      msg = "Cannot read zip file with nrecs_on_disk != nrecs"
   end if
end subroutine list_zip_file

pure function is_local_header(header_sig) result(is_local)
   integer(i4), intent(in) :: header_sig
   logical :: is_local

   is_local = header_sig == zip_local_sig
end function is_local_header

pure function is_global_header(header_sig) result(is_global)
   integer(i4), intent(in) :: header_sig
   logical :: is_global

   is_global = header_sig == zip_global_sig
end function is_global_header

pure function is_footer_header(header_sig) result(is_footer)
   integer(i4), intent(in) :: header_sig
   logical :: is_footer

   is_footer = header_sig == zip_footer_sig
end function is_footer_header

pure function format_string(val, format) result(str)
   integer, intent(in) :: val
   character(len=*), intent(in) :: format
   character(len=:), allocatable :: str

   character(len=128) :: buffer
   integer :: stat

   write(buffer, format, iostat=stat) val
   if (stat == 0) then
      str = trim(buffer)
   else
      str = "*"
   end if
end function format_string

end module stdlib_io_zip

Additional Information

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    ideaProposition of an idea and opening an issue to discuss ittopic: IOCommon input/output related features

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions