#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2017-06-14 16:26:27 +0300 (Wed, 14 Jun 2017) $
#$Revision: 5443 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.1/scripts/cif_validate $
#------------------------------------------------------------------------------
#*
#* Parse CIF file(s) and CIF dictionary(ies).
#* Check CIF file against CIF dictionaries.
#*
#* USAGE:
#*    $0 --dictionaries 'cif_core.dic,cif_cod.dic' --options input1.cif input*.cif
#**

use strict;
use warnings;
use File::Basename qw( basename );
use Scalar::Util qw( looks_like_number );
use List::MoreUtils qw( uniq );
use COD::CIF::Parser qw( parse_cif ) ;
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::UserMessage qw( note debug_note );
use COD::ErrorHandler qw( process_warnings
                          process_parser_messages
                          report_message );
use COD::ToolsVersion;

my @dict_files;
my $use_parser = 'c';
my $set_of_enum = ['_atom_site_refinement_flags'];
my $case_sensitive = 1;
my $report_local_tags = 0;
my $report_deprecated = 0;
my $debug = 0;

my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

sub severity_name($);
sub get_dict($);
sub is_in_range($$);
sub is_in_range_numeric($$);
sub is_in_range_char($$);
sub get_list_reference_tags($$);
sub get_replacement_tags($$);
sub check_list_link_parent($$$);

#* OPTIONS:
#*   -d, --dictionaries 'cif_core.dic,cif_cod.dic'
#*                     A list of CIF dictionary files (according to DDL2)
#*                     to be used in CIF file validation. List elements
#*                     are separated either by ',' or by ' '. To include
#*                     dictionaries with filenames containing these symbols,
#*                     the --add-dictionary option is used.
#*   -D, --add-dictionary 'cif new dictionary.dic'
#*                     Add additional CIF dictionary to the list.
#*   --clear-dictionaries
#*                     Remove all CIF dictionaries from the list.
#*   --treat-as-set _atom_site_refinement_flags
#*                     Treat values of given data items as a set. For example,
#*                     more than one enumeration value could be defined
#*                     for a single element. Any number of data item tags can
#*                     be provided in the following way:
#*                     $0 --treat-as-set _tag_1 --treat-as-set _tag_2
#*                     Default is the '_atom_site_refinement_flags' data item.
#*   --no-treat-as-set, dont-treat-as-set
#*                     Do not treat values of any data items as sets.
#*                     (see --treat-as-set).
#*   --report-local-tags
#*                     Validate data items having tags prefixed with
#*                     the '_[local]' prefix. The prefix was used in the COD
#*                     to denote locally used data items prior to the creation
#*                     of cif_cod.dic. The default is to ignore these data items.
#*   --no-report-local-tags, --ignore-local-tags
#*                     Ignore data items having tags prefixed with '_[local]'
#*                     prefix. Default.
#*   --ignore-case
#*                     Ignore letter case while validating enumeration values.
#*                     For example, even though '_atom_site_adp_type' is
#*                     restricted to values ('Uani', 'Uiso', 'Uovl', ...),
#*                     value 'UANI' would still be treated as valid.
#*   --respect-case, --case-sensitive, --dont-ignore-case
#*                     Respect letter case while validating enumeration
#*                     values. Default.
#*   --report-deprecated
#*                     Report data items that are marked as deprecated in the
#*                     dictionaries. Data item deprecation usually means that
#*                     it has been replaced with an another data item(s).
#*   --ignore-deprecated
#*                     Do not report data items that are marked as deprecated
#*                     in the dictionaries (default).
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default)
#*
#*   -c, --always-continue
#*                     Continue processing and return successful return status
#*                     even if errors are diagnosed.
#*   -c-, --always-die
#*                     Stop and return error status if errors are diagnosed.
#*   --continue-on-errors
#*                     Do not terminate script if errors are raised (default).
#*   --die-on-errors
#*                     Terminate script immediately if errors are raised.
#*   --continue-on-warnings
#*                     Do not terminate script if warnings are raised (default).
#*   --die-on-warnings
#*                     Terminate script immediately if warnings are raised.
#*   --continue-on-notes
#*                     Do not terminate script if notes are raised (default).
#*   --die-on-notes
#*                     Terminate script immediately if notes are raised.
#*   --debug
#*                     Output extra information for debugging purposes.
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   -v, --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '-d,--dictionaries'    => sub{ @dict_files = split m/,|\s+/, get_value() },
    '-D,--add-dictionary'  => sub{ push @dict_files, get_value() },
    '--clear-dictionaries' => sub{ @dict_files = () },

    '--treat-as-set'                    => $set_of_enum,
    '--no-treat-as-set'                 => sub{ $set_of_enum = [] },

    '--ignore-case'                     => sub{ $case_sensitive = 0 },
    '--dont-ignore-case,--respect-case' => sub{ $case_sensitive = 1 },
    '--case-sensitive'                  => sub{ $case_sensitive = 1 },

    '--report-local-tags'               => sub{ $report_local_tags = 1 },
    '--no-report-local-tags'            => sub{ $report_local_tags = 0 },
    '--ignore-local-tags'               => sub{ $report_local_tags = 0 },

    '--report-deprecated'               => sub{ $report_deprecated = 1 },
    '--ignore-deprecated'               => sub{ $report_deprecated = 0 },

    '--use-perl-parser'                 => sub{ $use_parser = 'perl' },
    '--use-c-parser'                    => sub{ $use_parser = 'c' },

    '-c,--always-continue'              => sub { $die_on_errors   = 0;
                                                 $die_on_warnings = 0;
                                                 $die_on_notes    = 0 },
    '-c-,--always-die'                  => sub { $die_on_errors   = 1;
                                                 $die_on_warnings = 1;
                                                 $die_on_notes    = 1 },

    '--continue-on-errors'          => sub { $die_on_errors = 0 },
    '--die-on-errors'               => sub { $die_on_errors = 1 },

    '--continue-on-warnings' => sub { $die_on_warnings = 0 },
    '--die-on-warnings'      => sub { $die_on_warnings = 1 },

    '--continue-on-notes'    => sub { $die_on_notes = 0 },
    '--die-on-notes'         => sub { $die_on_notes = 1 },

    '--options'         => sub{ options; exit },
    '--help,--usage'    => sub{ usage; exit; },
    '--debug'           => sub{ $debug = 1 },
    '-v,--version'      => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

my $die_on_error_level = {
    ERROR   => $die_on_errors,
    WARNING => $die_on_warnings,
    NOTE    => $die_on_notes
};

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my %dict_tags;

if( @dict_files ) {
    my $tag_count = 0;
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    foreach my $dict ( @dict_files ) {
        my ( $data, $err_count, $messages ) = parse_cif( $dict, $options );
        process_parser_messages( $messages, $die_on_error_level );

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $dict,
                                     }, $die_on_error_level ) };

        $dict_tags{$dict} = get_dict( $data );
        if ( scalar( keys( %{$dict_tags{$dict}} ) ) == 0 ) {
            warn "no data item definitions found\n";
        }
        $tag_count += scalar( keys( %{$dict_tags{$dict}} ) );
    }

    if( $tag_count == 0 ) {
        report_message( {
            'program'   => $0,
            'err_level' => 'ERROR',
            'message'   => 'no data item definitions were found in the '
                         . 'provided dictionary files '
                         . '(\'' . join( '\', \'', @dict_files ) . '\')'
        }, $die_on_errors );
    }
} else {
    report_message( {
        'program'   => $0,
        'err_level' => 'ERROR',
        'message'   => 'at least one dictionary file should be provided by '
                     . 'using the \'--dictionaries\' option. Automatic '
                     . 'dictionary download is not implemeted yet'
    }, $die_on_errors );
    my $dict_iucr_uri = 'ftp://ftp.iucr.org/pub/cif_core.dic';
}

@ARGV = ('-') unless @ARGV;
# start-iterate-through-CIF-files
CIFFILEANAL: for my $filename ( @ARGV ) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    # convert all tags to a 'cannonical' form
    canonicalize_all_names( $data );
    next CIFFILEANAL if ( !defined $data || $data == 1 );

    for my $block ( @{$data} ) {
        my $dataname = 'data_' . $block->{name} if defined $block->{name};

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $filename,
                                       'add_pos'       => $dataname
                                     }, $die_on_error_level ) };

        if( $debug ) {
            debug_note( $0, $filename, $dataname, 'analysis start', undef );
        }
        # 2-D array ( [loop_number][list_of_tags] ) to store names of looped
        # list reference tags that have been reported as missing
        my @reported_block_references;
        #
        # start iteration through CIF values
        #
        my @tag_value_notes;
        for my $tag_analysed ( @{$block->{tags}} ) {
            if(!defined $block->{types}{$tag_analysed} && $debug) {
                debug_note( $0, $filename, $dataname,
                            "data item '$tag_analysed' has no associated type ",
                            'an error in CIF parser had probably occured' );
            }
            my $lc_tag_analysed = lc $tag_analysed;
            my $defined = 0;
            if( ! $report_local_tags && $lc_tag_analysed =~ m/^_\[local\]/ ) {
                $defined++;
            }
            #
            # start iteration through dictionaries
            #
            for my $dict_f ( sort keys %dict_tags ) {
                my $dict_d = $dict_tags{$dict_f};

                # check if data items are defined in the dictionary
                if( exists $dict_d->{$lc_tag_analysed} ) {
                    $defined = 1;
                    if ( $debug ) {
                      debug_note( $0, $filename, $dataname, "data item '$tag_analysed' "
                                . "is defined in dictionary '$dict_f'", undef );
                    }
                } else {
                    if ( $debug ) {
                      debug_note( $0, $filename, $dataname, "data item '$tag_analysed' "
                               . "is not defined in dictionary '$dict_f'", undef );
                    }
                    next;
                }

                # check if data items are deprecated (replaced with other data items)
                my $repl_tags = get_replacement_tags($dict_d, $lc_tag_analysed);
                if( $report_deprecated && @{$repl_tags} != 0 ) {
                    warn "NOTE, data items '$tag_analysed' has been replaced by "
                        . 'data item(-s) [' . join(', ',
                                             map {"'$_'"} @{$repl_tags}) . '] '
                        . "according to '$dict_f'" . "\n";
                }

                # check if looped lists contain reference data items required by
                # other data items in the looped list (together acting as a unique key)
                my $dict_tag_values = $dict_d->{$lc_tag_analysed}{values};

                if ( exists $block->{inloop}{$tag_analysed} ) {
                    my $ref_tags = get_list_reference_tags($dict_d, $lc_tag_analysed);
                    for ( my $i = 0; $i < @{$block->{loops}}; $i++ ) {
                      next if !grep { $_ eq $tag_analysed } @{$block->{loops}[$i]};
                        foreach my $loop_ref_tag (@{$ref_tags}) {
                            # TO-DO: the reference property is also transitional
                            # to the 'alternate' data items and this must be tested
                            if( grep { lc($_) eq $loop_ref_tag } @{$block->{loops}[$i]} ) {
                                if ( $debug ) {
                                    debug_note ( $0, $filename, $dataname,
                                       "data item '$loop_ref_tag' is mandatory "
                                     . 'in loop when data item(s) ['
                                     . join(', ', map {"'$_'"}
                                                @{$dict_tag_values->{_name}})
                                     . '] are defined according to '
                                     . "'$dict_f' and was found", undef );
                                }
                            } elsif (!grep { $_ eq $loop_ref_tag }
                                                 @{$reported_block_references[$i]}) {
                                    warn "NOTE, data item '$loop_ref_tag' is "
                                       . 'mandatory in loop when data item(s) ['
                                       . join(', ', map {"'$_'"}
                                                 @{$dict_tag_values->{_name}})
                                       . "] are defined according to '$dict_f' "
                                       . 'but was not found' . "\n";
                                    push @{$reported_block_references[$i]},
                                         $loop_ref_tag;
                            }
                        }
                    }
                } elsif( $debug ) {
                    debug_note( $0, $filename, $dataname,
                                "data item $tag_analysed "
                              . 'is not in a loop', 'test for missing looped '
                              . 'list reference data item will be skipped' );
                }

                # check list mandatory
                if( exists $dict_tag_values->{_list} &&
                    $dict_tag_values->{_list}[0] eq 'yes' ) {
                    if( exists $block->{inloop}{$tag_analysed} ) {
                        if ( $debug ) {
                            debug_note( $0, $filename, $dataname,
                                "data item '$tag_analysed' must be in a loop " .
                                "according to '$dict_f' and is in a loop", undef );
                        }
                    } else {
                        warn "NOTE, data item '$tag_analysed' must be in a loop "
                           . "according to '$dict_f' but is not in a loop" . "\n";
                    }
                }

                # check link to parent
                if( exists $dict_tag_values->{_list_link_parent} ) {
                    check_list_link_parent( $block, $tag_analysed,
                                            $dict_tag_values->{_list_link_parent} );
                }

                # check types of values
                my $range = {};
                my %range_for_print;
                if( exists $dict_tag_values->{_enumeration_range} ) {
                    ( $range->{'min'}, $range->{'max'} ) = split(/:/,
                        $dict_tag_values->{_enumeration_range}[0],
                        2);
                    %range_for_print = %{$range};
                    if( length($range->{min}) == 0 ) {
                        delete $range->{min};
                        $range_for_print{min} = '<any>';
                    }
                    if( length($range->{max}) == 0 ) {
                        delete $range->{max};
                        $range_for_print{max} = '<any>';
                    }
                }

                for my $tag_index ( 0..$#{$block->{values}{$tag_analysed}} ) {
                    my $tag_value = $block->{values}{$tag_analysed}[$tag_index];

                    next if $tag_value eq '.' or $tag_value eq '?';
                    my $value = $tag_value;

                    #
                    # check values in enumeration list
                    #
                    if( $dict_tag_values->{_enumeration} ) {
                        my $value_found_in_list = 0;
                        my $emulate_set = 0;
                        if( grep { lc($_) eq $lc_tag_analysed } @{$set_of_enum} ) {
                            $emulate_set = 1;
                            my $set_of_enum_values =
                                join '|', @{$dict_tag_values->{_enumeration}};
                            $set_of_enum_values =~ s/\./\\./;
                            my $regexp_set_of_enum_values =
                                qr/^($set_of_enum_values)+$/s;
                            if( $case_sensitive ) {
                                if( $value =~ m/$regexp_set_of_enum_values/s ) {
                                    $value_found_in_list = 1;
                                }
                            } else {
                                if( $value =~ m/$regexp_set_of_enum_values/is ) {
                                    $value_found_in_list = 1;
                                }
                            }
                        } else {
                            if( $case_sensitive ) {
                                if( grep { $_ eq $value }
                                    @{$dict_tag_values->{_enumeration}} ) {
                                    $value_found_in_list = 1;
                                }
                            } else {
                                my $lc_value = lc $value;
                                if( grep { lc($_) eq $lc_value }
                                    @{$dict_tag_values->{_enumeration}} ) {
                                    $value_found_in_list = 1;
                                }
                           }
                       }

                       my $list_enum_values_for_tag =
                         join ', ', @{$dict_tag_values->{_enumeration}};
                       if( $value_found_in_list == 0 ) {
                            push @tag_value_notes,
                                 "data item '$tag_analysed' value \"$value\" "
                               . 'must be one of the enumeration values '
                               . "[$list_enum_values_for_tag]";
                       } else {
                            my $message = "data item '$tag_analysed' value "
                                        . "\"$value\" is one of the enumeration "
                                        . "values [$list_enum_values_for_tag]";

                            $message .= ' (case ignored)' if( !$case_sensitive );

                            $message .= ', enum was treated as set'
                                                              if( $emulate_set );

                            if ($debug) {
                                debug_note( $0, $filename, $dataname, $message, undef );
                            }
                       }
                    }
                    #
                    # Check if values match the declared types:
                    #

                    if( defined $dict_tag_values->{_type} &&
                        $dict_tag_values->{_type}[0] eq 'numb' ||
                        defined $dict_tag_values->{'_item_type.code'} &&
                        ($dict_tag_values->{'_item_type.code'}[0] eq 'float' ||
                             $dict_tag_values->{'_item_type.code'}[0] eq 'int') &&
                        defined $block->{types}{$tag_analysed} ) {
                            if( 'FLOAT' ne $block->{types}{$tag_analysed}[$tag_index] &&
                                'INT' ne $block->{types}{$tag_analysed}[$tag_index]) {
                                push @tag_value_notes,
                                    "data item '$tag_analysed' value \""
                                   . $block->{values}{$tag_analysed}[$tag_index]
                                   . '" is of type \''
                                   . $block->{types}{$tag_analysed}[$tag_index]
                                   . '\' while it should be numeric, i.e. '
                                   . '\'FLOAT\' or \'INT\'';
                            } elsif( $debug ) {
                                debug_note( $0, $filename, $dataname,
                                            "data item '$tag_analysed' is of type '" .
                                            $block->{types}{$tag_analysed}[$tag_index] .
                                            '\' as expected, as it should be either ' .
                                            '\'FLOAT\' or \'INT\'', undef );
                            }
                    }

                    # check if standard uncertainty is permitted
                    # some non 'numb' type values might have been parsed
                    # as numbers (s.u. values recorded separately) so
                    # type checking is used to avoid false positive alert
                    my $su_permitted = 0;
                    if ( defined $dict_tag_values->{_type} ) {
                        if( $dict_tag_values->{_type}[0] ne 'numb' ) {
                            $su_permitted = 1
                        } elsif( defined $dict_tag_values->{_type_conditions} &&
                                 grep { $_ eq 'esd' || $_ eq 'su' }
                                      @{$dict_tag_values->{_type_conditions}} ) {
                            $su_permitted = 1 ;
                        }
                    }

                    my $sigma = $block->{precisions}{$tag_analysed}[$tag_index];
                    if ( $value =~ /([(]\d+[)])$/ && !$su_permitted ) {
                        push @tag_value_notes,
                             "data item '$tag_analysed' value \"$value\" "
                           . 'is not permitted to contain standard '
                           . 'uncertainty "' . $1 . '" -- '
                           . 'standard uncertainty will be ignored in '
                           . 'further validation';
                        $sigma = undef;
                    }

                    #
                    # Check values against enumeration_range
                    #
                    if( %{$range} ) {
                        my $value_wo_su = $value;
                        if( $dict_tag_values->{_type}[0] eq 'numb' ) {
                            $value_wo_su =~ s/\s*\(.*$//;
                        }
                        if( is_in_range( $value_wo_su,
                                { 'type'  => $dict_tag_values->{'_type'}[0],
                                  'min'   => $range->{'min'},
                                  'max'   => $range->{'max'},
                                  'sigma' => $sigma
                                } ) <= 0 ) {
                                push @tag_value_notes,
                                     "data item '$tag_analysed' value " .
                                     "\"$value\" should be in range [" .
                                     $range_for_print{min} .
                                     ', ' . $range_for_print{max} . ']';
                        } elsif( $debug ) {
                            debug_note( $0, $filename, $dataname,
                                        "data item '$tag_analysed' value " .
                                        "\"$value\" is in range [" .
                                        $range_for_print{min} . ', ' .
                                        $range_for_print{max} . ']', undef );
                        }
                    } elsif( $debug ) {
                        debug_note( $0, $filename, $dataname,
                                    'there are no value range restrictions ' .
                                    "for data item \"$tag_analysed\"",
                                    'skipping the range test' );
                    }
                }
            }

            if( !$defined ) {
                push @tag_value_notes,
                      "data item '$tag_analysed' was not found in the " .
                      'provided dictionaries';
            }
        }

        my %tag_value_note_count;
        foreach (@tag_value_notes) {
            $tag_value_note_count{$_}++
        }

        my $print_note_count = 1;
        foreach my $note (sort keys %tag_value_note_count) {
            warn "NOTE, $note" .
                 ( $print_note_count && $tag_value_note_count{$note} > 1 ?
                   " ($tag_value_note_count{$note} times)\n" : "\n" );
        }
    }
}

##
# Returns a string that describes "severity level" in a human readable
# form (for discussions of severity levels, see ../doc/error-levels.txt.
# @params $severity
#       Code number of the error severity level.
# @return
#       Error severity level text string.
##
sub severity_name($)
{
    my ($severity) = @_;

    if( $severity & 1 ) {
        return 'ERROR';
    } elsif( $severity & 2 ) {
        return 'WARNING';
    } elsif( $severity & 4 ) {
        return 'NOTE';
    } elsif( $severity & 8 ) {
        return 'INFO-ERROR';
    } elsif( $severity & 16 ) {
        return 'INFO-WARNING';
    } elsif( $severity & 32 ) {
        return 'INFO-NOTICE';
    } elsif( $severity & 64 ) {
        return 'INFO-INDICATOR';
    } else {
        return "ERROR-LEVEL=$severity";
    }
}

##
# Extracts data items from dictionary (parsed using COD::CIF::Parser).
# @param $dict_f
#       Reference to COD::CIF::Parser output CIF object.
# @return
#       Hash of tags and related references to parsed data blocks.
##
sub get_dict($)
{
    my ($dict_f) = @_;
    my $tags = {};
    my $datan = 0;
    while( $datan < @{$dict_f} ) {
        if( exists $dict_f->[$datan]{values}{'_dictionary.ddl_conformance'} &&
            $dict_f->[$datan]{values}{'_dictionary.ddl_conformance'}[0] =~ /^3\./ ) {
            warn "dictionary is DDLm-compliant and can not be handled " .
                 "in this version\n";
        }
        if( exists $dict_f->[$datan]{save_blocks} ) {
            for my $saveblk ( @{$dict_f->[$datan]{save_blocks}} ) {
                ## print ">>>> processing save block '$saveblk->{name}'\n";
                next if !exists $saveblk->{values}{'_item.name'};
                for ( @{$saveblk->{values}{'_item.name'}} ) {
                    ## print ">>> defining '$_'\n";
                    $tags->{lc $_} = $saveblk;
                    $tags->{lc $_}{values}{_dataname} = $_;
                }
            }
        }

        if ( exists $dict_f->[$datan]{values}{_type} ) {
            for ( @{$dict_f->[$datan]{values}{_name}} ) {
                $tags->{lc $_} = $dict_f->[$datan];
                $tags->{lc $_}{values}{_dataname} = $dict_f->[$datan]{name};
            }
        }

        $datan++;
    }

    return $tags;
}

##
# Returns an array of tags of data items that have superseded the data item.
# @param $dict
#       Reference to a dictionary object as returned by 'get_dict' subroutine.
# @param $tag
#       Name of the data item.
# @return
#       Array of tags of data items that have superseded the data item. If the
#       data item has not been deprecated, an empty array is returned.
##
sub get_replacement_tags($$)
{
    my ( $dict, $tag, ) = @_;

    my $tag_values = $dict->{lc $tag}{values};

    my @replace_with;
    # check if data items are deprecated (replaced with other data items)
    if( defined $tag_values->{_related_item} ) {
        for( my $i = 0; $i < @{$tag_values->{_related_item}}; $i++ ) {
            if( $tag_values->{_related_function}[$i] eq 'replace' ) {
                push @replace_with, $tag_values->{_related_item}[$i];
            }
        }
    }
    return \@replace_with;
}

##
# Check value against range (defined in dictionary).
# @param $value
#       Value to be checked.
# @param $param
#       Parameter hash with the following keys:
#         {
#           'type'  => 'numb',
#                String, representing the type of value ('numb' or 'char').
#           'min'   => 0,
#                The minimum range value.
#           'max'   => 10,
#                The maximum range value.
#           'sigma  => 0.1
#               Standard deviation to be used when comparing numeric values
#               (3 sigma rule). If sigma is not provided, values are compared
#               disregarding the standard deviation.
#         }
# @return
#       -1 if no ranges were provided for the value;
#        0 if the value is out of the provided range;
#        1 if the value is in the provided range.
##
sub is_in_range($$)
{
    my ( $value, $param ) = @_;

    if( !exists $param->{'min'} &&
        !exists $param->{'max'} ) {
        return -1;
    }

    if( $param->{'type'} eq 'numb' ) {
        return is_in_range_numeric( $value, $param );
    } else {
        return is_in_range_char( $value, $param );
    }
}

##
# Checks numeric value against an inclusive numeric range.
# @param $value
#       Value to be checked.
# @param $param
#       Parameter hash with the following keys:
#         {
#           'min' - => 0,
#                The minimum range value.
#           'max'   => 10,
#                The maximum range value.
#           'sigma  => 0.1
#               Standard deviation to be used when comparing numeric values
#               (3 sigma rule). If sigma is not provided, values are compared
#               disregarding the standard deviation.
#         }
# @return
#        0 if the value is out of the provided range or is not a number
#          at all;
#        1 if the value is in the provided range.
##
sub is_in_range_numeric($$)
{
    my ( $value, $param ) = @_;

    my $min   = $param->{'min'};
    my $max   = $param->{'max'};
    my $sigma = $param->{'sigma'};

    if( ! looks_like_number($value) ) {
        return 0;
    }

    if( defined $sigma ) {
        $min = $min - 3 * $sigma if defined $min;
        $max = $max + 3 * $sigma if defined $max;
    };

    if(
        ( !defined $max || $value <= $max )
        &&
        ( !defined $min || $value >= $min )
    ) {
        return 1;
    }
    return 0;
}

##
# Checks character value against an inclusive character range.
# @param $value
#       Value to be checked.
# @param $param
#       Parameter hash with the following keys:
#         {
#           'min' - => a,
#                The minimum range value.
#           'max'   => c,
#                The maximum range value.
#         }
# @return
#        0 if the value is out of the provided range;
#        1 if the value is in the provided range.
##
sub is_in_range_char($$)
{
    my ( $value, $param ) = @_;

    if(
        ( !exists $param->{max} || $value le $param->{max} )
        &&
        ( !exists $param->{min} || $value ge $param->{min} )
    ) {
        return 1;
    }
    return 0;
}

##
# Returns an array of tags of the data items that are required to be present
# in the loop containing the analyzed data item.
# @param $dict
#       Reference to a dictionary object as returned by 'get_dict' subroutine.
# @param $tag
#       Name of the data item to analyze.
# @return $list_reference_tags
#       A reference to an array of tags of data items that are required to
#       be present in the loop containing the analyzed data items.
##
sub get_list_reference_tags($$)
{
    my ( $dict, $tag ) = @_;

    my $tag_values = $dict->{lc $tag}{values};
    my @list_reference_tags;
    if ( defined $tag_values && defined $tag_values->{_list_reference} ) {
        # _list_reference identifies data item(s) that must collectively be
        # in a loop. They are referenced by the names of their data blocks
        foreach my $ref_dataname (@{$tag_values->{_list_reference}}) {
          foreach my $tags ( sort keys %{$dict} ) {
              if ( '_' . $dict->{$tags}{values}{_dataname} eq $ref_dataname ) {
                  push @list_reference_tags, $tags;
              }
          }
        }
    }

    return \@list_reference_tags;
}

##
# Checks the existence of parent (foreign) keys.
##
sub check_list_link_parent($$$)
{
    my( $block, $tag, $parents ) = @_;

    return if @$parents > 1; # not handled yet, unsure how to do that

    my $parent = $parents->[0];
    return if !exists $block->{values}{$parent};

    my %parent_values = map { $_ => 1 } @{$block->{values}{$parent}};

    my @unmatched = uniq sort grep { !exists $parent_values{$_} }
                         @{$block->{values}{$tag}};

    for my $value (@unmatched) {
        next if ( $value eq '.' || $value eq '?' );
        warn "NOTE, value \"$value\" of data item \"$parent\" is required " .
             "as the data item is a parent of the \"$tag\" data item\n";
    }
}
