Sophie

Sophie

distrib > Mandriva > 2011.0 > i586 > media > contrib-release-debug > by-pkgid > e1717ba861d7ea800395d82cd47d3fc8 > files > 15

foremost-debug-1.5.7-2mdv2011.0.i586.rpm


/* FOREMOST
 *
 * By Jesse Kornblum
 *
 * This is a work of the US Government. In accordance with 17 USC 105,
 * copyright protection is not available for any work of the US Government.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 */
 
//#define DEBUG 1
   
#ifndef __FOREMOST_H
#define __FOREMOST_H

/* Version information is defined in the Makefile */

#define AUTHOR      "Jesse Kornblum, Kris Kendall, and Nick Mikus"

/* We use \r\n for newlines as this has to work on Win32. It's redundant for
   everybody else, but shouldn't cause any harm. */
#define COPYRIGHT   "This program is a work of the US Government. "\
"In accordance with 17 USC 105,\r\n"\
"copyright protection is not available for any work of the US Government.\r\n"\
"This is free software; see the source for copying conditions. There is NO\r\n"\
"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\r\n"

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <math.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <signal.h>

/* For va_arg */
#include <stdarg.h>

#ifdef __LINUX
#include <sys/ioctl.h>
#include <sys/mount.h>
#define   u_int64_t   unsigned long long
#endif 


#ifdef __LINUX

#ifndef __USE_BSD
#define __USE_BSD
#endif
#include <endian.h>

#elif defined (__SOLARIS)

#define BIG_ENDIAN    4321
#define LITTLE_ENDIAN 1234

#include <sys/isa_defs.h>
#ifdef _BIG_ENDIAN       
#define BYTE_ORDER BIG_ENDIAN
#else
#define BYTE_ORDER LITTLE_ENDIAN
#endif

#elif defined (__WIN32)
#include <sys/param.h>

#elif defined (__MACOSX)
#include <machine/endian.h>
#define __U16_TYPE unsigned short
#endif


#define TRUE   1
#define FALSE  0
#define ONE_MEGABYTE  1048576


/* RBF - Do we need these type definitions? */ 
#ifdef __SOLARIS
#define   u_int32_t   unsigned int
#define   u_int64_t   unsigned long long
#endif 


/* The only time we're *not* on a UNIX system is when we're on Windows */
#ifndef __WIN32
#ifndef __UNIX
#define __UNIX
#endif  /* ifndef __UNIX */
#endif  /* ifndef __WIN32 */


#ifdef __UNIX

#ifndef __U16_TYPE
#define __U16_TYPE unsigned short
#endif

#include <libgen.h>

#ifndef BYTE_ORDER 

#define BIG_ENDIAN    4321
#define LITTLE_ENDIAN 1234

#define BYTE_ORDER LITTLE_ENDIAN

#endif
/* This avoids compiler warnings on older systems */
int fseeko(FILE *stream, off_t offset, int whence);
off_t ftello(FILE *stream);


#define CMD_PROMPT "$"
#define DIR_SEPARATOR   '/'
#define NEWLINE "\n"
#define LINE_LENGTH 74
#define BLANK_LINE \
"                                                                          "

#endif /* #ifdef __UNIX */

/* This allows us to open standard input in binary mode by default 
   See http://gnuwin32.sourceforge.net/compile.html for more */
#include <fcntl.h>

/* Code specific to Microsoft Windows */
#ifdef __WIN32

/* By default, Windows uses long for off_t. This won't do. We
   need an unsigned number at minimum. Windows doesn't have 64 bit
   numbers though. */
#ifdef off_t
#undef off_t
#endif
#define off_t unsigned long

#define CMD_PROMPT "c:\\>"
#define  DIR_SEPARATOR   '\\'
#define NEWLINE "\r\n"
#define LINE_LENGTH 72
#define BLANK_LINE \
"                                                                        "


/* It would be nice to use 64-bit file lengths in Windows */
#define ftello   ftell
#define fseeko   fseek

#ifndef __CYGWIN
#define  snprintf         _snprintf
#endif

#define  u_int32_t        unsigned long

/* We create macros for the Windows equivalent UNIX functions.
   No worries about lstat to stat; Windows doesn't have symbolic links */
#define lstat(A,B)      stat(A,B)

#define u_int64_t unsigned __int64

#ifndef __CYGWIN
	#define realpath(A,B)   _fullpath(B,A,PATH_MAX) 
#endif
/* Not used in md5deep anymore, but left in here in case I 
   ever need it again. Win32 documentation searches are evil.
   int asprintf(char **strp, const char *fmt, ...);
*/

char *basename(char *a);
extern char *optarg;
extern int optind;
int getopt(int argc, char *const argv[], const char *optstring);

#endif   /* ifdef _WIN32 */


/* On non-glibc systems we have to manually set the __progname variable */
#ifdef __GLIBC__
extern char *__progname;
#else
char *__progname;
#endif /* ifdef __GLIBC__ */

/* -----------------------------------------------------------------
   Program Defaults
   ----------------------------------------------------------------- */
#define MAX_STRING_LENGTH   1024
#define COMMENT_LENGTH   64

/* Modes refer to options that can be set by the user. */

#define mode_none                0
#define mode_verbose          1<<1
#define mode_quiet            1<<2
#define mode_ind_blk          1<<3
#define mode_quick            1<<4
#define mode_write_all        1<<5
#define mode_write_audit      1<<6
#define mode_multi_file	      1<<7

#define MAX_NEEDLES                   254
#define NUM_SEARCH_SPEC_ELEMENTS        6
#define MAX_SUFFIX_LENGTH               8
#define MAX_FILE_TYPES                100
#define FOREMOST_NOEXTENSION_SUFFIX "NONE"
/* Modes 3 to 31 are reserved for future use. We shouldn't use
   modes higher than 31 as Win32 can't go that high. */

#define DEFAULT_MODE              mode_none
#define DEFAULT_CONFIG_FILE       "foremost.conf"
#define DEFAULT_OUTPUT_DIRECTORY  "output"
#define AUDIT_FILE_NAME           "audit.txt"
#define FOREMOST_DIVIDER          "------------------------------------------------------------------"

#define JPEG 0
#define GIF 1
#define BMP 2
#define MPG 3
#define PDF 4
#define DOC 5
#define AVI 6
#define WMV 7
#define HTM 8
#define ZIP 9
#define MOV 10
#define XLS 11
#define PPT 12
#define WPD 13
#define CPP 14
#define OLE 15
#define GZIP 16
#define RIFF 17
#define WAV 18
#define VJPEG 19
#define SXW 20
#define SXC 21
#define SXI 22
#define CONF 23
#define PNG 24
#define RAR 25
#define EXE 26
#define ELF 27
#define REG 28
#define DOCX 29
#define XLSX 30
#define PPTX 31
#define MP4 32


#define KILOBYTE                  1024
#define MEGABYTE                  1024 * KILOBYTE
#define GIGABYTE                  1024 * MEGABYTE
#define TERABYTE                  1024 * GIGABYTE
#define PETABYTE                  1024 * TERABYTE
#define EXABYTE                   1024 * PETABYTE

#define UNITS_BYTES                     0
#define UNITS_KILOB                     1
#define UNITS_MEGAB                     2
#define UNITS_GIGAB                     3
#define UNITS_TERAB                     4
#define UNITS_PETAB                     5
#define UNITS_EXAB                      6

#define SEARCHTYPE_FORWARD      0
#define SEARCHTYPE_REVERSE      1
#define SEARCHTYPE_FORWARD_NEXT 2
#define SEARCHTYPE_ASCII        3

#define FOREMOST_BIG_ENDIAN 0
#define FOREMOST_LITTLE_ENDIAN 1
/*DEFAULT CHUNK SIZE In MB*/
#define CHUNK_SIZE 100 


/* Wildcard is a global variable because it's used by very simple
   functions that don't need the whole state passed to them */

/* -----------------------------------------------------------------
   State Variable and Global Variables
   ----------------------------------------------------------------- */
char wildcard;
typedef struct f_state 
{
  off_t mode;
  char *config_file;
  char *input_file;
  char *output_directory;
  char *start_time;
  char *invocation;
  char *audit_file_name;
  FILE *audit_file;
  int audit_file_open;
  int num_builtin;
  int chunk_size; /*IN MB*/
  int fileswritten;
  int block_size;
  int skip;
  
  int time_stamp;
} f_state;

typedef struct marker
{
    unsigned char* value;
    int len;
    size_t marker_bm_table[UCHAR_MAX+1];
}marker;

typedef struct s_spec
{
    char* suffix;
    int type;
    u_int64_t max_len;
    unsigned char* header;
    unsigned int header_len;
    size_t header_bm_table[UCHAR_MAX+1];

    unsigned char* footer;
    unsigned int footer_len;
    size_t footer_bm_table[UCHAR_MAX+1];
    marker markerlist[5];
    int num_markers;
    int searchtype;                               

    int case_sen;
    
    int found;
    
    char comment[MAX_STRING_LENGTH];/*Used for audit*/
    int written; /*used for -a mode*/
}s_spec;

s_spec search_spec[50];  /*ARRAY OF BUILTIN SEARCH TYPES*/

typedef struct f_info {
  char *file_name;
  off_t total_bytes;

  /* We never use the total number of bytes in a file, 
     only the number of megabytes when we display a time estimate */
  off_t total_megs;
  off_t bytes_read;

#ifdef __WIN32
  /* Win32 is a 32-bit operating system and can't handle file sizes
     larger than 4GB. We use this to keep track of overflows */
  off_t last_read;
  off_t overflow_count;
#endif

  FILE *handle;
  int is_stdin;
} f_info;

/* Set if the user hits ctrl-c */
int signal_caught;

/* -----------------------------------------------------------------
   Function definitions
   ----------------------------------------------------------------- */

/* State functions */

int initialize_state(f_state *s, int argc, char **argv);
void free_state(f_state *s);

char *get_invocation(f_state *s);
char *get_start_time(f_state *s);

int set_config_file(f_state *s, char *fn);
char* get_config_file(f_state *s);

int set_output_directory(f_state *s, char *fn);
char* get_output_directory(f_state *s);

void set_audit_file_open(f_state *s);
int get_audit_file_open(f_state *s);

void set_mode(f_state *s, off_t new_mode);
int get_mode(f_state *s, off_t check_mode);

int set_search_def(f_state *s,char* ft,u_int64_t max_file_size);
void get_search_def(f_state s);

void set_input_file(f_state *s,char* filename);
void get_input_file(f_state *s);

void set_chunk(f_state *s, int size);

void init_bm_table(unsigned char *needle, size_t table[UCHAR_MAX + 1], size_t len, int casesensitive,int searchtype);

void set_skip(f_state *s, int size);
void set_block(f_state *s, int size);


#ifdef __DEBUG
void dump_state(f_state *s);
#endif

/* The audit file */
int open_audit_file(f_state *s);
void audit_msg(f_state *s, char *format, ...);
int close_audit_file(f_state *s);


/* Set up our output directory */
int create_output_directory(f_state *s);
int write_to_disk(f_state *s,s_spec * needle,u_int64_t len,unsigned char* buf,  u_int64_t t_offset);
int create_sub_dirs(f_state *s);
void cleanup_output(f_state *s);

/* Configuration Files */
int load_config_file(f_state *s);


/* Helper functions */
char *current_time(void);
off_t find_file_size(FILE *f);
char *human_readable(off_t size, char *buffer);
char *units(unsigned int c);
unsigned int chop(char *buf);
void print_search_specs(f_state *s);
int memwildcardcmp(const void *s1, const void *s2,size_t n,int caseSensitive);
int charactersMatch(char a, char b, int caseSensitive);
void printx(unsigned char* buf,int start, int end);
unsigned short htos(unsigned char s[],int endian);
unsigned int htoi(unsigned char s[],int endian);
u_int64_t htoll(unsigned char s[],int endian);
int displayPosition(f_state* s,f_info* i,u_int64_t pos);


/* Interface functions 
   These functions stay the same regardless if we're using a
   command line interface or a GUI */
void fatal_error(f_state *s, char *msg);
void print_error(f_state *s, char *fn, char *msg);
void print_message(f_state *s, char *format, va_list argp);
void print_stats(f_state *s);

/* Engine */
int process_file(f_state *s);
int process_stdin(f_state *s);
unsigned char *bm_search(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
	size_t table[UCHAR_MAX + 1], int case_sen,int searchtype);
unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
	size_t table[UCHAR_MAX + 1], int casesensitive,int searchtype, int start_pos) ;	
#endif /* __FOREMOST_H */

/* BUILTIN */
unsigned char* extract_file(f_state *s,  u_int64_t c_offset,unsigned char *foundat,  u_int64_t buflen, s_spec * needle, u_int64_t f_offset);