#!/usr/bin/env python # ---------------------------------- """ f2n 1.0 Convert a text file from any foreign format to the native one. This just handles different line-end conventions. Unless otherwise directed, it guesses the input format by counting the number of occurances of different line-end conventions in the input. This works well. If there is more than one input file, the result will be concatenated. Usage: f2n [options] [-o ] Restriction: For simplicity, f2n will read the whole file into memory. This should not be a problem with current memory sizes and typical text files. If your files are larger than your swap space, you are in trouble. Options: -h Print this information and exit. -v Be verbose - print some information about files and guesses. -in=DOS Assume that the input format is in DOS/Windows (Carriage Return/New Line) format. -in=OldMac Assume that the input format is in New Line/Carriage Return format as allegedly used by MacOS up to 9. -in=UNIX Assume that the input format is simple New Line, as in UNIX. -in=CR Assume the input uses Carriage Return for line ends. Copyright 2005 Stephan Schulz, schulz@eprover.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program ; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA The original copyright holder can be contacted as Stephan Schulz Helene-Mayer-Ring 10/907 80809 Muenchen Germany or via email (address above). """ import sys import re import string import os NoStdRWStreamException = "You cannot open '-' for both reading and writing!" UsageErrorException = "Usage Error" def flexopen(name, mode): """ Open a file or stdin/out for reading. Uses the convention that a dash represents stdin/out. """ if(name == "-"): if(mode[0] == "r"): return sys.stdin; elif(mode[0] == "w"): return sys.stdout else: raise NoStdRWStreamException name = os.path.expanduser(name) return open(name, mode) def flexclose(file): """ "Closes" a file object. If stdin/out, flushes it, otherwise calls close on it. """ if((file == sys.stdout) or (file == sys.stderr)): file.flush() else: file.close() def get_options(argv=sys.argv[1:]): """ Filter argument list for arguments starting with a -. """ options = filter(lambda x:x[0:1]=="-", argv) return options def get_args(argv=sys.argv[1:]): """ Filter argument list for real arguments. """ files = filter(lambda x:x[0:1]!="-", argv) return files nlcr_re = re.compile("\012\015") crnl_re = re.compile("\015\012") nl_re = re.compile("\012") cr_re = re.compile("\015") def find_nl_convention(str): """ Find out which newline convention is used in str and return a regexp decribing it. """ # This is funky, as empty lines generated cr/nl and nl/cr for # either convention. So the numbers may not be correct (they will # still correctly decide file type, though! nlcr_count = len(nlcr_re.findall(str)) crnl_count = len(crnl_re.findall(str)) nl_count = len(nl_re.findall(str)) -(nlcr_count+crnl_count) cr_count = len(cr_re.findall(str)) -(nlcr_count+crnl_count) l = [nlcr_count, crnl_count, nl_count, cr_count] l.sort() if l[-1] == nlcr_count: if verbose: sys.stderr.write("File uses NL/CR convention (old Mac?)\n") return nlcr_re elif l[-1]== crnl_count: if verbose: sys.stderr.write("File uses CR/NL convention (DOS)\n") return crnl_re elif l[-1]== nl_count: if verbose: sys.stderr.write("File uses NL convention (UNIX)\n") return nl_re elif l[-1]== cr_count: if verbose: sys.stderr.write("File uses CR convention (weird)\n") return cr_re if verbose: sys.stderr.write("File follows' no known new line convention, no conversion\n") return None out = sys.stdout hardnewlineconv = None verbose = False if __name__ == '__main__': for option in get_options(): if option == "-h": print __doc__ sys.exit() if option == "-v": verbose = True elif option[:2] == "-o": out = flexopen(option[2:], "w") elif option=="-in=DOS": hardnewlineconv = crnl_re elif option=="-in=oldMac": hardnewlineconv = nlcr_re elif option=="-in-UNIX": hardnewlineconv = nl_re else: sys.exit("Unknown option "+ option) args = get_args() if len(args)==0: args = ["-"] for file in args: if verbose: sys.stderr.write("Processing file "+file+"\n") infile = flexopen(file, "rb"); data = infile.read() if not hardnewlineconv: newlineconv = find_nl_convention(data) else: newlineconf = hardnewlineconv if newlineconv: data = re.sub(newlineconv, "\n", data) # The data just had all new lines converted to a simple # \n. Now we just write it as a text file - it's the system # libraries task to convert \n to native convention ;-) out.write(data) flexclose(out)