#!/usr/bin/python ''' Created on 21 Okt 2016 @author: Elmarie van Heerden and Aris Karastergiou ''' ########################################################################### # 1. Include predefined libraries. ########################################################################### import numpy as np from collections import deque # similar to circular buffer in C++ import struct import os import argparse # to parse the function arguments import sys import time ########################################################################### # 2. Help function -- how to set the variables in the arguments list. ########################################################################### def RFI_Clipper_info(): print( "\nRFI_Clipper [options] \n"\ "\n"\ "Clip RFI in SIGPROC filterbank file. Writes to stdout.\n"\ "Function originally written by Aris Karastergiou in C++\n"\ "Elmarie van Heerden (2016) - elmarie007@hotmail.com\n"\ "\n"\ "OPTIONS:\n"\ " -info This info text\n"\ " -inputFile File to be processed\n"\ " -outputFile Output file name (def = 'stdout.fil')" " -bandPassAve Starting value for bandpass average value (def=110)\n"\ " -bandPassVar Starting value for bandpass variance value (def=16)\n"\ " -nBands Number of bands to divide bandpass in (def=8)\n"\ " -maxHistory Size of buffer to store past values and calculate statistics, s (def=1 s)\n"\ " -crFactor margin of tolerance for bright channels (def=4)\n"\ " -srFactor margin of tolerance for the spectrum (def=3)\n"\ " -zeroDMing Normalize to zero mean (Yes/No) (def=Yes)\n"\ " -dataChunksInSec Size of data to be read and processed, s (def=8 s)\n"\ "\n"); return exit(1) ########################################################################### # 3. Check if input file exists. ########################################################################### def extant_file(x): """ 'Type' for argparse - checks that file exists but does not open. """ if not os.path.exists(x): # Argparse uses the ArgumentTypeError to give a rejection message like: # error: argument input: x does not exist raise argparse.ArgumentTypeError("{0} does not exist".format(x)) return x if __name__ == "__main__": startTime = time.time() ########################################################################### # 4. Set the default values before processing the data ########################################################################### #II# #Define all the global variables nSubbands = 1 goodSamples = 0 spectrumRMS = 0.0 spectrumSum = 0.0 dataModel = 0.0 _rmsRunAve = 0.0 _meanRunAve = 0.0 _useMeanOverRMS = 0 k = 4 meanMinusMinimum = k / np.sqrt(2.0*k) _lastGoodSpectrum = [] NumberOfFlaggedSamples = 0 positionOfFlaggedSamples = [] ########################################################################### # 5.Parse the arguments passed to RFI_Clipper from the terminal. ########################################################################### parser = argparse.ArgumentParser() parser.add_argument("-info", help="Information regarding the function",action="store_true") parser.add_argument("-inputFile", help="Input file path and name", required=True, metavar="FILE", type=extant_file) parser.add_argument("-outputFile", help="Output file name (def=stdout.fil)", action="store") parser.add_argument("-bandPassAve", help="Starting value for bandpass average value (def=110)", action="store", type=float ) parser.add_argument("-bandPassVar", help="Starting value for bandpass variance value (def=16)", action="store", type=float ) parser.add_argument("-nBands", help="Number of bands to divide bandpass in (def=8)", action="store", type=int ) parser.add_argument("-maxHistory", help="Size of buffer to store past values, s (def=1 s)", action="store", type=float ) parser.add_argument("-crFactor", help="margin of tolerance for bright channels (def=10)", type=float) parser.add_argument("-srFactor", help="margin of tolerance for the spectrum (def=3)", type=float) parser.add_argument("-zeroDMing", help="Normalize to zero mean (Yes/No) (def=Yes)", action="store") parser.add_argument("-dataChunksInSec", help="Size of data to be read and processed, s (def=8 s)", type=float) args = parser.parse_args() print(args) if args.info: RFI_Clipper_info() sys.exit(1) if args.inputFile: filename_in=args.inputFile if args.outputFile: filename_out=args.outputFile else: filename_out='stdout.fil' if args.bandPassAve: bandPassAve=args.bandPassAve else: bandPassAve = 110.0 if args.bandPassVar: bandPassVar=args.bandPassVar else: bandPassVar = 16.0 if args.nBands: nBands=args.nBands else: nBands = 8 if args.maxHistory: maxHistory = args.maxHistory else: maxHistory = 1.0 #Average filter length if args.crFactor: _crFactor=args.crFactor else: _crFactor= 10.0 if args.srFactor: _srFactor = args.srFactor else: _srFactor = 3.0 if args.zeroDMing: if (args.zeroDMing=='Yes'): _zeroDMing = 1 else: _zeroDMing = 0 else: _zeroDMing = 1 if args.dataChunksInSec: dataChunksInSec=args.dataChunksInSec else: dataChunksInSec = 8.0 print(filename_in) ########################################################################### # 6.Read the header of the input file to set additional parameters ########################################################################### #I# Read in the header and append the output file with the header. Also #read in some of the global variables from the header of the SIGPROC file #filename_in = 'C:\\Users\\Public\\Documents\\PhD_Pulsars\\Code\\PhDplots\\SNInject0.fil' #filename_in = '/home/elmarie/Journal Publications/RFI Documentation/Data/NonStat/SNInject0.fil' #filename_out = 'C:\\Users\\Public\\Documents\\PhD_Pulsars\\Code\\PhDplots\\SNInject0_normalised.fil' #filename_out = '/home/elmarie/Journal Publications/RFI Documentation/Data/NonStat/SNInject0_Normalised.fil' f = open(filename_in,'rb') f2 = open(filename_out,'wb') readInHeader= f.read(1000) headerLength = readInHeader.find('END') headerLength += 3 f.seek(0) writeHeader= f.read(headerLength) f2.write(writeHeader) placeOfNbits = readInHeader.find('nbits') f2.seek(placeOfNbits+5) f2.write(struct.pack('i', 32)) f2.close() plek = readInHeader.find('nbits') f.seek(plek+5) nbits = f.read(4) nbits = struct.unpack('i', nbits)[0] if (nbits==8): vartype = 'uint8' elif (nbits == 16): vartype = 'uint16' elif (nbits == 32): vartype = 'float32' plek = readInHeader.find('nchans') f.seek(plek+6) nchans = f.read(4) nChannels = struct.unpack('i', nchans)[0] nBins = nChannels*nSubbands plek = readInHeader.find('tsamp') f.seek(plek+5) tsamp = f.read(8) tsamp = struct.unpack('d', tsamp)[0] tobs = (os.path.getsize(filename_in)-headerLength)/nChannels*tsamp nSample = np.int32(tobs/tsamp) maxHistory = maxHistory/tsamp # maxhistory is given in seconds -> need to convert it to number of samples _meanBuffer = deque(maxlen=maxHistory) _rmsBuffer = deque(maxlen=maxHistory) NumberOfChunks = np.int64(np.floor(tobs/dataChunksInSec)) # number of chunks to read the data -> memory manageable segments NumberOfSamplesPerChunk = np.int64(np.floor(dataChunksInSec/tsamp)) # number of samples per chunk of read data averageSamplestoReadFromFile = np.int64(nChannels*NumberOfSamplesPerChunk) # times samples x number channels ########################################################################### # 7. Read the input file and start the processing ########################################################################### for a in range(0,NumberOfChunks): TimeElapsed = time.time() - startTime print('a='+str(a)+' Time elapsed since started:'+str(TimeElapsed)) if (a==0): f = open(filename_in,'rb') # Find and skip the header f.seek(headerLength) # read in the data one time stamp at a time, i.e. all the channel values per time sample data = np.float32(np.fromfile(f, dtype=vartype, count=averageSamplestoReadFromFile)) f.close() f = open(filename_in,'rb') f.seek(headerLength+a*averageSamplestoReadFromFile) # read in the data one time stamp at a time, i.e. all the channel values per time sample data = np.float32(np.fromfile(f, dtype=vartype, count=averageSamplestoReadFromFile)) f.close() I = np.reshape(data,(NumberOfSamplesPerChunk,nBins)) I = I.T #III# #Import and run over each time sample for t in range(0,NumberOfSamplesPerChunk): #TimeElapsed2 = time.time() - startTime #print('t='+str(t)+' Time elapsed since started:'+str(TimeElapsed2)) #IV# # Define local variables that get reset with every call bandPass = np.ones((nBins,2)) bandPass[:,0] = bandPass[:,0]*bandPassAve bandPass[:,1] = bandPass[:,1]*bandPassVar SpectrumSum = 0.0 SpectrumSumSq = 0.0 goodChannels = np.zeros((nBands,1)) channelsPerBand = nBins/nBands miniData = np.ones((nBands,1))*1e6 miniModel = np.ones((nBands,1))*1e6 dataMinusModel = np.zeros((nBands,1)) bandSigma = np.zeros((nBands,1)) _fractionBadChannels = 0.0 spectrumRMStolerance = 0.0 _badSpectra = 0 if (np.size(_lastGoodSpectrum) != nBins): _lastGoodSpectrum = np.zeros(nBins)*0.0 _remainingZeros = nBins #V# # Start filling the rmsBuffer with values if (np.size(_rmsBuffer) < maxHistory): for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels+c band = int(binLocal/channelsPerBand) if I[binLocal,t] margin): NumberOfFlaggedSamples +=1 I[binLocal,t] = _lastGoodSpectrum[binLocal] positionOfFlaggedSamples.append([binLocal,(a*NumberOfSamplesPerChunk+t),0]) else: band = int(binLocal / channelsPerBand) goodChannels[band] +=1 spectrumSum += I[binLocal,t] totalGoodChannels = np.sum(goodChannels) _fractionBadChannels += float((nBins - totalGoodChannels)/nBins) spectrumSum /= totalGoodChannels badBands = 0 for b in range(0,8): if (goodChannels[b] < (0.8*channelsPerBand)): badBands +=1 if (goodChannels[b] == 0): badBands +=4 if (totalGoodChannels < 0.8*nBins): badBands +=4 #Build running average of the spectrum if (np.size(_meanBuffer) spectrumRMStolerance) or (badBands >= 4)): for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels+c I[binLocal,t] = _lastGoodSpectrum[binLocal] NumberOfFlaggedSamples +=1 positionOfFlaggedSamples.append([binLocal,(a*NumberOfSamplesPerChunk+t),1]) _badSpectra +=1 _meanBuffer.pop() _rmsBuffer.pop() _meanBuffer.append(_meanBuffer[-1]) _rmsBuffer.append(_rmsBuffer[-1]) else: if (_remainingZeros != 0): for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels+c if ((_lastGoodSpectrum[binLocal] == 0.0) and (I[binLocal,t] != 0.0)): _lastGoodSpectrum[binLocal] = I[binLocal,t] _remainingZeros -=1 spectrumSum = 0.0 spectrumSumSq = 0.0 for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels +c I[binLocal,t] = I[binLocal,t] - (bandPass[binLocal,0] + dataModel) spectrumSum += I[binLocal,t] spectrumSumSq += I[binLocal,t]*I[binLocal,t] spectrumSum /= nBins spectrumRMS = np.sqrt(spectrumSumSq/nBins - np.power(spectrumSum,2)) if (spectrumRMS == 0.0): spectrumRMS = 1.0 for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels +c if (_zeroDMing == 1): I[binLocal,t] -= _zeroDMing * spectrumSum else: I[binLocal,t] -= _rmsRunAve I[binLocal,t] /= spectrumRMS if (I[binLocal,t] > _crFactor): I[binLocal,t] = 0.0 positionOfFlaggedSamples.append([binLocal,(a*NumberOfSamplesPerChunk+t),2]) NumberOfFlaggedSamples +=1 bandPassAve = _meanRunAve bandPassVar = _rmsRunAve f2 = open(filename_out,'ab') I=np.reshape(I, averageSamplestoReadFromFile, order ='F') I.tofile(f2) f2.close() ########################################################################### # 8. Take care of last chunk of data if there is one ########################################################################### if ((a == NumberOfChunks-1) and ((tobs/dataChunksInSec-NumberOfChunks)>0)): f = open(filename_in,'rb') f.seek(headerLength+NumberOfChunks*averageSamplestoReadFromFile) # read in the data one time stamp at a time, i.e. all the channel values per time sample SamplestoReadFromFile = np.int64(nChannels*(np.floor(tobs/tsamp)-NumberOfChunks*NumberOfSamplesPerChunk)) data = np.float32(np.fromfile(f, dtype=vartype, count=SamplestoReadFromFile)) f.close() I = np.reshape(data,(np.int64(tobs/tsamp-NumberOfChunks*NumberOfSamplesPerChunk),nBins)) I = I.T #III# #Import and run over each time sample for t in range(0,(np.int64(tobs/tsamp-NumberOfChunks*NumberOfSamplesPerChunk))): #IV# # Define local variables that get reset with every call bandPass = np.ones((nBins,2)) bandPass[:,0] = bandPass[:,0]*bandPassAve bandPass[:,1] = bandPass[:,1]*bandPassVar SpectrumSum = 0.0 SpectrumSumSq = 0.0 goodChannels = np.zeros((nBands,1)) channelsPerBand = nBins/nBands miniData = np.ones((nBands,1))*1e6 miniModel = np.ones((nBands,1))*1e6 dataMinusModel = np.zeros((nBands,1)) bandSigma = np.zeros((nBands,1)) _fractionBadChannels = 0.0 spectrumRMStolerance = 0.0 _badSpectra = 0 if (np.size(_lastGoodSpectrum) != nBins): _lastGoodSpectrum = np.zeros(nBins)*0.0 _remainingZeros = nBins #V# # Start filling the rmsBuffer with values if (np.size(_rmsBuffer) < maxHistory): for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels+c band = int(binLocal/channelsPerBand) if I[binLocal,t] margin): NumberOfFlaggedSamples +=1 I[binLocal,t] = _lastGoodSpectrum[binLocal] positionOfFlaggedSamples.append([binLocal,(a*NumberOfSamplesPerChunk+t),0]) else: band = int(binLocal / channelsPerBand) goodChannels[band] +=1 spectrumSum += I[binLocal,t] totalGoodChannels = np.sum(goodChannels) _fractionBadChannels += float((nBins - totalGoodChannels)/nBins) spectrumSum /= totalGoodChannels badBands = 0 for b in range(0,8): if (goodChannels[b] < (0.8*channelsPerBand)): badBands +=1 if (goodChannels[b] == 0): badBands +=4 if (totalGoodChannels < 0.8*nBins): badBands +=4 #Build running average of the spectrum if (np.size(_meanBuffer) spectrumRMStolerance) or (badBands >= 4)): for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels+c I[binLocal,t] = _lastGoodSpectrum[binLocal] NumberOfFlaggedSamples +=1 positionOfFlaggedSamples.append([binLocal,(a*NumberOfSamplesPerChunk+t),1]) _badSpectra +=1 _meanBuffer.pop() _rmsBuffer.pop() _meanBuffer.append(_meanBuffer[-1]) _rmsBuffer.append(_rmsBuffer[-1]) else: if (_remainingZeros != 0): for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels+c if ((_lastGoodSpectrum[binLocal] == 0.0) and (I[binLocal,t] != 0.0)): _lastGoodSpectrum[binLocal] = I[binLocal,t] _remainingZeros -=1 spectrumSum = 0.0 spectrumSumSq = 0.0 for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels +c I[binLocal,t] = I[binLocal,t] - (bandPass[binLocal,0] + dataModel) spectrumSum += I[binLocal,t] spectrumSumSq += I[binLocal,t]*I[binLocal,t] spectrumSum /= nBins spectrumRMS = np.sqrt(spectrumSumSq/nBins - np.power(spectrumSum,2)) if (spectrumRMS == 0.0): spectrumRMS = 1.0 for s in range(0,nSubbands): for c in range(0,nChannels): binLocal = s*nChannels +c if (_zeroDMing == 1): I[binLocal,t] -= _zeroDMing * spectrumSum else: I[binLocal,t] -= _rmsRunAve I[binLocal,t] /= spectrumRMS if (I[binLocal,t] > _crFactor): I[binLocal,t] = 0.0 positionOfFlaggedSamples.append([binLocal,(a*NumberOfSamplesPerChunk+t),2]) NumberOfFlaggedSamples +=1 bandPassAve = _meanRunAve bandPassVar = _rmsRunAve f2 = open(filename_out,'ab') I=np.reshape(I, SamplestoReadFromFile, order ='F') I.tofile(f2) f2.close() print(NumberOfFlaggedSamples,'out of ', tobs/(tsamp*nChannels))