Really-amin commited on
Commit
908bf6c
·
verified ·
1 Parent(s): 145fde1

Upload 3 files

Browse files
Files changed (3) hide show
  1. detect.py +110 -0
  2. ocr.py +24 -0
  3. textcleaner.txt +715 -0
detect.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import pytesseract
3
+ import numpy as np
4
+ import cv2
5
+ import tempfile
6
+ import io
7
+
8
+
9
+ # get grayscale image
10
+ def get_grayscale(image):
11
+ return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
12
+
13
+
14
+ # noise removal
15
+ def remove_noise(image):
16
+ return cv2.medianBlur(image, 3)
17
+
18
+
19
+ # thresholding
20
+ def thresholding(image):
21
+ return cv2.threshold(image, 160, 255, cv2.THRESH_BINARY)[1]
22
+ # return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
23
+
24
+
25
+ # dilation
26
+ def dilate(image):
27
+ kernel = np.ones((5, 5), np.uint8)
28
+ return cv2.dilate(image, kernel, iterations=1)
29
+
30
+
31
+ # erosion
32
+ def erode(image):
33
+ kernel = np.ones((5, 5), np.uint8)
34
+ return cv2.erode(image, kernel, iterations=1)
35
+
36
+
37
+ # opening - erosion followed by dilation
38
+ def opening(image):
39
+ kernel = np.ones((5, 5), np.uint8)
40
+ return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
41
+
42
+
43
+ # canny edge detection
44
+ def canny(image):
45
+ return cv2.Canny(image, 100, 200)
46
+
47
+
48
+ # skew correction
49
+ def deskew(image):
50
+ coords = np.column_stack(np.where(image > 0))
51
+ angle = cv2.minAreaRect(coords)[-1]
52
+ if angle < -45:
53
+ angle = -(90 + angle)
54
+
55
+ else:
56
+ angle = -angle
57
+ (h, w) = image.shape[:2]
58
+ center = (w // 2, h // 2)
59
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
60
+ rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
61
+ return rotated
62
+
63
+
64
+ # template matching
65
+ def match_template(image, template):
66
+ return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
67
+
68
+
69
+ def main(inputfile="Inputs/1.jpg", outputfile="result.txt", langs="fa", mode="tn"):
70
+ im = Image.open(inputfile)
71
+ length_x, width_y = im.size
72
+ factor = float(1024.0 / length_x)
73
+ size = int(factor * length_x), int(factor * width_y)
74
+ image_resize = im.resize(size, Image.Resampling.LANCZOS)
75
+ image_resize.save(f"{inputfile}_Upscaled.png", dpi=(300, 300))
76
+ img = cv2.imread(f"{inputfile}_Upscaled.png")
77
+ gray = get_grayscale(img)
78
+ ## Different Modes for image proccessing
79
+ img = gray
80
+ # deskew = deskew(gray)
81
+ # erode = erode(gray)
82
+ # thresh = thresholding(gray)
83
+ # thresh = remove_noise(gray)
84
+ # opening = opening(gray)
85
+ # canny = canny(gray)
86
+ if langs == "fa":
87
+ if mode == "t":
88
+ custom_config = r'-l fas --psm 6 -c tessedit_char_blacklist="۰١۲۳۴۵۶۷۸۹«»1234567890#"'
89
+ if mode == "tn":
90
+ custom_config = r'-l fas --psm 6 -c tessedit_char_whitelist="آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهی ۰١۲۳۴۵۶۷۸۹.?!,،:;/"'
91
+ if mode == "table":
92
+ custom_config = r'-l fas --psm 6 -c tessedit_char_whitelist="آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهی۰١۲۳۴۵۶۷۸۹"'
93
+ elif langs == "en":
94
+ custom_config = r'-l eng --psm 6'
95
+ elif langs == "faen":
96
+ custom_config = r'-l fas+eng --psm 6 '
97
+ else:
98
+ print("Choose valid Options.")
99
+ exit(0)
100
+
101
+ ## Convert Image to Text
102
+ text = pytesseract.image_to_string(img, config=custom_config)
103
+
104
+ ## Write Results to Result.txt with UTF-8 Encoding.
105
+ with io.open(outputfile, 'w', encoding='utf8') as f:
106
+ f.write(text)
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
ocr.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, getopt
2
+ from OCR import detect
3
+
4
+ def main(argv):
5
+ inputfile = ''
6
+ outputfile = 'result.txt'
7
+ try:
8
+ opts, args = getopt.getopt(argv,"hi:o:m:",["ifile=","ofile=","mode="])
9
+ except getopt.GetoptError:
10
+ print ('test.py -i <inputfile> -o <outputfile> -m <mode>')
11
+ sys.exit(2)
12
+ for opt, arg in opts:
13
+ if opt == '-h':
14
+ print ('test.py -i <inputfile> -o <outputfile> -m <mode>')
15
+ sys.exit()
16
+ elif opt in ("-i", "--ifile"):
17
+ inputfile = arg
18
+ elif opt in ("-o", "--ofile"):
19
+ outputfile = arg
20
+ elif opt in ("-m", "--mode"):
21
+ mode = arg
22
+ detect.main(inputfile=inputfile, outputfile=outputfile)
23
+ if __name__ == "__main__":
24
+ main(sys.argv[1:])
textcleaner.txt ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #
3
+ # Developed by Fred Weinhaus 6/9/2009 .......... revised 2/18/2022
4
+ #
5
+ # ------------------------------------------------------------------------------
6
+ #
7
+ # Licensing:
8
+ #
9
+ # Copyright © Fred Weinhaus
10
+ #
11
+ # My scripts are available free of charge for non-commercial use, ONLY.
12
+ #
13
+ # For use of my scripts in commercial (for-profit) environments or
14
+ # non-free applications, please contact me (Fred Weinhaus) for
15
+ # licensing arrangements. My email address is fmw at alink dot net.
16
+ #
17
+ # If you: 1) redistribute, 2) incorporate any of these scripts into other
18
+ # free applications or 3) reprogram them in another scripting language,
19
+ # then you must contact me for permission, especially if the result might
20
+ # be used in a commercial or for-profit environment.
21
+ #
22
+ # My scripts are also subject, in a subordinate manner, to the ImageMagick
23
+ # license, which can be found at: http://www.imagemagick.org/script/license.php
24
+ #
25
+ # ------------------------------------------------------------------------------
26
+ #
27
+ ####
28
+ #
29
+ # USAGE: textcleaner [-r rotate] [-l layout] [-c cropoff] [-g] [-e enhance ]
30
+ # [-f filtersize] [-o offset] [-u] [-P] [-t threshold] [-s sharpamt]
31
+ # [-s saturation] [-a adaptblur] [-T] [-p padamt] [-b bgcolor] [-F fuzzval]
32
+ # [-i invert] [-C compression] [-D density] [-R resize] infile outfile
33
+ # USAGE: textcleaner [-help]
34
+ #
35
+ # OPTIONS:
36
+ #
37
+ # -r rotate (pre-)rotate image 90 degrees in direction specified if
38
+ # aspect ratio does not match layout; options are cw
39
+ # (or clockwise), ccw (or counterclockwise) and n
40
+ # (or none); default=none or no rotation
41
+ # -l layout desired layout; options are p (or portrait) or
42
+ # l (or landscape); default=portrait
43
+ # -c cropoff image cropping offsets after potential rotate 90;
44
+ # choices: one, two or four non-negative integer comma
45
+ # separated values; one value will crop all around;
46
+ # two values will crop at left/right,top/bottom;
47
+ # four values will crop left,top,right,bottom
48
+ # -g convert document to grayscale before enhancing
49
+ # -e enhance enhance image brightness before cleaning;
50
+ # choices are: none, stretch or normalize;
51
+ # default=stretch
52
+ # -f filtersize size of filter used to clean background;
53
+ # integer>0; default=15
54
+ # -o offset offset of filter in percent used to reduce noise;
55
+ # integer>=0; default=5
56
+ # -u unrotate image; cannot unrotate more than
57
+ # about 5 degrees
58
+ # -P preserve input size after unrotate
59
+ # -t threshold text smoothing threshold; 0<=threshold<=100;
60
+ # nominal value is about 50; default is no smoothing
61
+ # -s sharpamt sharpening amount in pixels; float>=0;
62
+ # nominal about 1; default=0
63
+ # -S saturation color saturation expressed as percent; integer>=0;
64
+ # only applicable if -g not set; a value of 100 is
65
+ # no change; default=200 (double saturation)
66
+ # -a adaptblur alternate text smoothing using adaptive blur;
67
+ # floats>=0; default=0 (no smoothing)
68
+ # -T trim background around outer part of image
69
+ # -p padamt border pad amount around outer part of image;
70
+ # integer>=0; default=0
71
+ # -b bgcolor desired color for background or "image"; default=white
72
+ # -F fuzzval fuzz value for determining bgcolor when bgcolor=image;
73
+ # integer>=0; default=10
74
+ # -i invert invert colors; choices are: 1 or 2 for one-way or two-ways
75
+ # (input or input and output); default is no inversion
76
+ # -C compression TIFF output compression mode; choices are: none (n),
77
+ # lzw(l), zip(z), fax(f), and group4(g); default=none
78
+ # -D density input density when reading single page vector (PDF)
79
+ # files; integer>0; default is no input density
80
+ # -R resize resize after reading a single page vector when
81
+ # supersampling to rasterize; 0<=integer<100 (percent);
82
+ # default is no resize
83
+ #
84
+ ###
85
+ #
86
+ # NAME: TEXTCLEANER
87
+ #
88
+ # PURPOSE: To process a scanned document of text to clean the text background.
89
+ #
90
+ # DESCRIPTION: TEXTCLEANER processses a scanned document of text to clean
91
+ # the text background and enhance the text. The order of processing is:
92
+ # 1) optional apply density for single page vector (PDF) files
93
+ # 2) optional 90 degree rotate if aspect does not match layout
94
+ # 3) optional crop,
95
+ # 4) optional convert to grayscale,
96
+ # 5) optional enhance,
97
+ # 6) filter to clean background and optionally smooth/antialias,
98
+ # 7) optional unrotate (limited to about 5 degrees or less),
99
+ # 8) optional text smoothing,
100
+ # 9) optional sharpening,
101
+ # 10) optional saturation change (if -g is not specified),
102
+ # 11) optional alternate text smoothing via adaptive blur
103
+ # 12) optional resize to compensate for increased read density for vector to raster output
104
+ # 13) optional auto trim of border (effective only if background well-cleaned),
105
+ # 14) optional pad of border
106
+ #
107
+ # OPTIONS:
108
+ #
109
+ # -r rotate ... ROTATE image either clockwise or counterclockwise by 90 degrees,
110
+ # if image aspect ratio does not match the layout mode. Choices are: cc (or
111
+ # clockwise), ccw (or counterclockwise) and n (or none). The default is no rotation.
112
+ # Note: this is a pre-rotation
113
+ #
114
+ # -l layout ... LAYOUT for determining if rotation is to be applied. The choices
115
+ # are p (or portrait) or l (or landscape). The image will be rotated if rotate is
116
+ # specified and the aspect ratio of the image does not match the layout chosen.
117
+ # The default is portrait.
118
+ #
119
+ # -c cropoffsets ... CROPOFFSETS are the image cropping offsets after potential
120
+ # rotate 90. Choices: one, two or four non-negative integer comma separated
121
+ # values. One value will crop all around. Two values will crop at
122
+ # left/right,top/bottom. Four values will crop left,top,right,bottom.
123
+ #
124
+ # -g ... Convert the document to grayscale.
125
+ #
126
+ # -e enhance ... ENHANCE brightness of image. The choices are: none, stretch,
127
+ # or normalize. The default=stretch.
128
+ #
129
+ # -f filtersize ... FILTERSIZE is the size of the filter used to clean up the
130
+ # background. Values are integers>0. The filtersize needs to be larger than
131
+ # the thickness of the writing, but the smaller the better beyond this. Making it
132
+ # larger will increase the processing time and may lose text. The default is 15.
133
+ #
134
+ # -o offset ... OFFSET is the offset threshold in percent used by the filter
135
+ # to eliminate noise. Values are integers>=0. Values too small will leave much
136
+ # noise and artifacts in the result. Values too large will remove too much
137
+ # text leaving gaps. The default is 5.
138
+ #
139
+ # -u ... UNROTATE the image. This is limited to about 5 degrees or less.
140
+ #
141
+ # -P ... PRESERVE input size after unrotate
142
+ #
143
+ # -t threshold ... THRESHOLD is the text smoothing threshold. Values are integers
144
+ # between 0 and 100. Smaller values smooth/thicken the text more. Larger values
145
+ # thin, but can result in gaps in the text. Nominal value is in the middle at
146
+ # about 50. The default is to disable smoothing.
147
+ #
148
+ # -s sharpamt ... SHARPAMT is the amount of pixel sharpening to be applied to
149
+ # the resulting text. Values are floats>=0. If used, it should be small
150
+ # (suggested about 1). The default=0 (no sharpening).
151
+ #
152
+ # -S saturation ... SATURATION is the desired color saturation of the text
153
+ # expressed as a percentage. Values are integers>=0. A value of 100 is no change.
154
+ # Larger values will make the text colors more saturated. The default=200
155
+ # indicates double saturation. Not applicable when -g option specified.
156
+ #
157
+ # -a adaptblur ... ADAPTBLUR applies an alternate text smoothing using
158
+ # an adaptive blur. The values are floats>=0. The default=0 indicates no
159
+ # blurring.
160
+ #
161
+ # -T ... TRIM the border around the image.
162
+ #
163
+ # -p padamt ... PADAMT is the border pad amount in pixels. The default=0.
164
+ #
165
+ # -b bgcolor ... BGCOLOR is the desired background color after it has been
166
+ # cleaned up. Any valid IM color may be use. If bgcolor=image, then the color will
167
+ # be computed from the top left corner pixel and a fuzzval. The final color will be
168
+ # computed subsequently as an average over the whole image. The default is white.
169
+ #
170
+ # -F fuzzval ... FUZZVAL is the fuzz value for determining bgcolor when bgcolor=image.
171
+ # Values are integers>=0. The default=10.
172
+ #
173
+ # -i invert ... INVERT colors for example to convert white text on black background to
174
+ # black text on white background. The choices are: 1 or 2 for one-way or two-ways
175
+ # (input or input and output). The default is no inversion.
176
+ #
177
+ # -C compression ... COMPRESSION mode for TIFF output. The choices are: none (n),
178
+ # lzw(l), zip(z), fax(f), and group4(g). The default=none.
179
+ #
180
+ # -D density ... input DENSITY when reading a single page vector (PDF) file
181
+ # and outputting a raster image. Values are integers>0. The default is no
182
+ # input density to be applied. Note multi-page vector files will not be
183
+ # properly processed by this script.
184
+ #
185
+ # -R resize ... final RESIZE when reading a single page vector (PDF) file when
186
+ # supersampling to rasterize the output. Values are 0<=integer<100 (percent)
187
+ # to downsample the image (typically back to its default size at 72 dpi).
188
+ # The default is no special resize. Note multi-page vector files will not be
189
+ # properly processed by this script.
190
+ #
191
+ # CAVEAT: No guarantee that this script will work on all platforms,
192
+ # nor that trapping of inconsistent parameters is complete and
193
+ # foolproof. Use At Your Own Risk.
194
+ #
195
+ ######
196
+ #
197
+
198
+ # set default values
199
+ rotate="none" # rotate 90 clockwise (cw) or counterclockwise (ccw)
200
+ layout="portrait" # rotate 90 to match layout; portrait or landscape
201
+ cropoff="" # crop amounts; comma separate list of 1, 2 or 4 integers
202
+ numcrops=0 # number of crops flag
203
+ gray="no" # convert to grayscale flag
204
+ enhance="stretch" # none, stretch, normalize
205
+ filtersize=15 # local area filter size
206
+ offset=5 # local area offset to remove "noise"; too small-get noise, too large-lose text
207
+ threshold="" # smoothing threshold
208
+ sharpamt=0 # sharpen sigma
209
+ saturation=200 # color saturation percent; 100 is no change
210
+ adaptblur=0 # adaptive blur
211
+ unrotate="no" # unrotate flag
212
+ preserve="no" # preserve input size after unrotate flag
213
+ trim="no" # trim flag
214
+ padamt=0 # pad amount
215
+ bgcolor="white" # color for output whiteboard background
216
+ fuzzval=10 # fuzz value for determining bgcolor from image
217
+ invert="" # invert colors: 1 or 2
218
+ compression="none" # TIFF compression modes
219
+ density="" # input density for rasterizing vector files
220
+ resize="" # final resize
221
+
222
+ # set directory for temporary files
223
+ dir="." # suggestions are dir="." or dir="/tmp"
224
+
225
+ # set up functions to report Usage and Usage with Description
226
+ PROGNAME=`type $0 | awk '{print $3}'` # search for executable on path
227
+ PROGDIR=`dirname $PROGNAME` # extract directory of program
228
+ PROGNAME=`basename $PROGNAME` # base name of program
229
+ usage1()
230
+ {
231
+ echo >&2 ""
232
+ echo >&2 "$PROGNAME:" "$@"
233
+ sed >&2 -e '1,/^####/d; /^###/g; /^#/!q; s/^#//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
234
+ }
235
+ usage2()
236
+ {
237
+ echo >&2 ""
238
+ echo >&2 "$PROGNAME:" "$@"
239
+ sed >&2 -e '1,/^####/d; /^######/g; /^#/!q; s/^#*//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
240
+ }
241
+
242
+
243
+ # function to report error messages
244
+ errMsg()
245
+ {
246
+ echo ""
247
+ echo $1
248
+ echo ""
249
+ usage1
250
+ exit 1
251
+ }
252
+
253
+
254
+ # function to test for minus at start of value of second part of option 1 or 2
255
+ checkMinus()
256
+ {
257
+ test=`echo "$1" | grep -c '^-.*$'` # returns 1 if match; 0 otherwise
258
+ [ $test -eq 1 ] && errMsg "$errorMsg"
259
+ }
260
+
261
+ # test for correct number of arguments and get values
262
+ if [ $# -eq 0 ]
263
+ then
264
+ # help information
265
+ echo ""
266
+ usage2
267
+ exit 0
268
+ elif [ $# -gt 32 ]
269
+ then
270
+ errMsg "--- TOO MANY ARGUMENTS WERE PROVIDED ---"
271
+ else
272
+ while [ $# -gt 0 ]
273
+ do
274
+ # get parameter values
275
+ case "$1" in
276
+ -h|-help) # help information
277
+ echo ""
278
+ usage2
279
+ exit 0
280
+ ;;
281
+ -r) # rotate
282
+ shift # to get the next parameter
283
+ # test if parameter starts with minus sign
284
+ errorMsg="--- INVALID ROTATE SPECIFICATION ---"
285
+ checkMinus "$1"
286
+ rotate=`echo "$1" | tr "[:upper:]" "[:lower:]"`
287
+ case "$rotate" in
288
+ none|n) rotate="none" ;;
289
+ clockwise|cw) rotate="cw" ;;
290
+ counterclockwise|ccw) rotate="ccw" ;;
291
+ *) errMsg "--- ROTATE=$rotate IS NOT A VALID CHOICE ---" ;;
292
+ esac
293
+ ;;
294
+ -l) # layout
295
+ shift # to get the next parameter
296
+ # test if parameter starts with minus sign
297
+ errorMsg="--- INVALID LAYOUT SPECIFICATION ---"
298
+ checkMinus "$1"
299
+ layout=`echo "$1" | tr "[:upper:]" "[:lower:]"`
300
+ case "$layout" in
301
+ portrait|p) layout="portrait" ;;
302
+ landscape|l) layout="landscape" ;;
303
+ *) errMsg "--- LAYOUT=$layout IS NOT A VALID CHOICE ---" ;;
304
+ esac
305
+ ;;
306
+ -c) # get cropoffsets
307
+ shift # to get the next parameter
308
+ # test if parameter starts with minus sign
309
+ errorMsg="--- INVALID CROPOFFSETS SPECIFICATION ---"
310
+ checkMinus "$1"
311
+ cropoff="$1"
312
+ cropoff="${cropoff},"
313
+ cropoff=`expr "$cropoff" : '\([,0-9]*\)'`
314
+ numcrops=`echo "$cropoff" | tr "," " " | wc -w`
315
+ [ "$cropoff" = "" ] && errMsg "--- ONE OR TWO OR FOUR OFFSETS MUST BE PROVIDED ---"
316
+ [ $numcrops -ne 1 -a $numcrops -ne 2 -a $numcrops -ne 4 ] && errMsg "--- ONE OR TWO OR FOUR OFFSETS MUST BE PROVIDED ---"
317
+ crop1=`echo "$cropoff" | cut -d, -f1`
318
+ crop2=`echo "$cropoff" | cut -d, -f2`
319
+ crop3=`echo "$cropoff" | cut -d, -f3`
320
+ crop4=`echo "$cropoff" | cut -d, -f4`
321
+ ;;
322
+ -g) # set grayscale
323
+ gray="yes"
324
+ ;;
325
+ -e) # get enhance
326
+ shift # to get the next parameter
327
+ # test if parameter starts with minus sign
328
+ errorMsg="--- INVALID ENHANCE SPECIFICATION ---"
329
+ checkMinus "$1"
330
+ enhance="$1"
331
+ case "$1" in
332
+ none) ;;
333
+ stretch) ;;
334
+ normalize) ;;
335
+ *) errMsg "--- ENHANCE=$enhance IS NOT A VALID CHOICE ---" ;;
336
+ esac
337
+ ;;
338
+ -f) # get filtersize
339
+ shift # to get the next parameter
340
+ # test if parameter starts with minus sign
341
+ errorMsg="--- INVALID FILTERSIZE SPECIFICATION ---"
342
+ checkMinus "$1"
343
+ filtersize=`expr "$1" : '\([0-9]*\)'`
344
+ [ "$filtersize" = "" ] && errMsg "--- FILTERSIZE=$filtersize MUST BE A NON-NEGATIVE INTEGER ---"
345
+ filtersizetest=`echo "$filtersize < 1" | bc`
346
+ [ $filtersizetest -eq 1 ] && errMsg "--- FILTERSIZE=$filtersize MUST BE AN INTEGER GREATER THAN 0 ---"
347
+ ;;
348
+ -o) # get offset
349
+ shift # to get the next parameter
350
+ # test if parameter starts with minus sign
351
+ errorMsg="--- INVALID OFFSET SPECIFICATION ---"
352
+ checkMinus "$1"
353
+ offset=`expr "$1" : '\([0-9]*\)'`
354
+ [ "$offset" = "" ] && errMsg "--- OFFSET=$offset MUST BE A NON-NEGATIVE INTEGER ---"
355
+ ;;
356
+ -t) # get threshold
357
+ shift # to get the next parameter
358
+ # test if parameter starts with minus sign
359
+ errorMsg="--- INVALID THRESHOLD SPECIFICATION ---"
360
+ checkMinus "$1"
361
+ threshold=`expr "$1" : '\([0-9]*\)'`
362
+ [ "$threshold" = "" ] && errMsg "--- THRESHOLD=$threshold MUST BE A NON-NEGATIVE INTEGER ---"
363
+ thresholdtestA=`echo "$threshold < 0" | bc`
364
+ thresholdtestB=`echo "$threshold > 100" | bc`
365
+ [ $thresholdtestA -eq 1 -o $thresholdtestB -eq 1 ] && errMsg "--- THRESHOLD=$threshold MUST BE AN INTEGER GREATER BETWEEN 0 AND 100 ---"
366
+ ;;
367
+ -s) # get sharpamt
368
+ shift # to get the next parameter
369
+ # test if parameter starts with minus sign
370
+ errorMsg="--- INVALID SHARPAMT SPECIFICATION ---"
371
+ checkMinus "$1"
372
+ sharpamt=`expr "$1" : '\([.0-9]*\)'`
373
+ [ "$sharpamt" = "" ] && errMsg "--- SHARPAMT=$sharpamt MUST BE A NON-NEGATIVE FLOAT ---"
374
+ ;;
375
+ -S) # get saturation
376
+ shift # to get the next parameter
377
+ # test if parameter starts with minus sign
378
+ errorMsg="--- INVALID SATURATION SPECIFICATION ---"
379
+ checkMinus "$1"
380
+ saturation=`expr "$1" : '\([0-9]*\)'`
381
+ [ "$saturation" = "" ] && errMsg "--- SATURATION=$saturation MUST BE A NON-NEGATIVE INTEGER ---"
382
+ ;;
383
+ -a) # get adaptblur
384
+ shift # to get the next parameter
385
+ # test if parameter starts with minus sign
386
+ errorMsg="--- INVALID ADAPTBLUR SPECIFICATION ---"
387
+ checkMinus "$1"
388
+ adaptblur=`expr "$1" : '\([.0-9]*\)'`
389
+ [ "$adaptblur" = "" ] && errMsg "--- ADAPTBLUR=$adaptblur MUST BE A NON-NEGATIVE FLOAT ---"
390
+ ;;
391
+ -u) # set unrotate
392
+ unrotate="yes"
393
+ ;;
394
+ -P) # set preserve unrotate
395
+ preserve="yes"
396
+ ;;
397
+ -T) # set trim
398
+ trim="yes"
399
+ ;;
400
+ -p) # get padamt
401
+ shift # to get the next parameter
402
+ # test if parameter starts with minus sign
403
+ errorMsg="--- INVALID PADAMT SPECIFICATION ---"
404
+ checkMinus "$1"
405
+ padamt=`expr "$1" : '\([0-9]*\)'`
406
+ [ "$padamt" = "" ] && errMsg "--- PADAMT=$padamt MUST BE A NON-NEGATIVE INTEGER ---"
407
+ ;;
408
+ -b) # get bgcolor
409
+ shift # to get the next parameter
410
+ # test if parameter starts with minus sign
411
+ errorMsg="--- INVALID BACKGROUND COLOR SPECIFICATION ---"
412
+ checkMinus "$1"
413
+ bgcolor="$1"
414
+ ;;
415
+ -F) # get fuzzval
416
+ shift # to get the next parameter
417
+ # test if parameter starts with minus sign
418
+ errorMsg="--- INVALID FUZZVAL SPECIFICATION ---"
419
+ checkMinus "$1"
420
+ fuzzval=`expr "$1" : '\([0-9]*\)'`
421
+ [ "$fuzzval" = "" ] && errMsg "--- FUZZVAL=$fuzzval MUST BE A NON-NEGATIVE INTEGER ---"
422
+ ;;
423
+ -i) # get invert
424
+ shift # to get the next parameter
425
+ # test if parameter starts with minus sign
426
+ errorMsg="--- INVALID INVERT SPECIFICATION ---"
427
+ checkMinus "$1"
428
+ invert=`expr "$1" : '\([0-9]*\)'`
429
+ [ "$invert" = "" ] && errMsg "--- INVERT=$invert MUST BE A NON-NEGATIVE INTEGER ---"
430
+ testA=`echo "$invert < 1" | bc`
431
+ testB=`echo "$invert > 2" | bc`
432
+ [ $testA -eq 1 -o $testB -eq 1 ] && errMsg "--- INVERT=$invert MUST BE AN INTEGER VALUE OF 1 OR 2 ---"
433
+ ;;
434
+ -C) # compression
435
+ shift # to get the next parameter
436
+ # test if parameter starts with minus sign
437
+ errorMsg="--- INVALID COMPRESSION SPECIFICATION ---"
438
+ checkMinus "$1"
439
+ compression=`echo "$1" | tr "[:upper:]" "[:lower:]"`
440
+ case "$compression" in
441
+ none|n) compression="none" ;;
442
+ lzw|l) compression="lzw" ;;
443
+ zip|z) compression="zip" ;;
444
+ fax|f) compression="fax" ;;
445
+ group4|g) compression="group4" ;;
446
+ *) errMsg "--- COMPRESSION=$compression IS NOT A VALID CHOICE ---" ;;
447
+ esac
448
+ ;;
449
+ -D) # get density
450
+ shift # to get the next parameter
451
+ # test if parameter starts with minus sign
452
+ errorMsg="--- INVALID DENSITY SPECIFICATION ---"
453
+ checkMinus "$1"
454
+ density=`expr "$1" : '\([0-9]*\)'`
455
+ [ "$density" = "" ] && errMsg "--- DENSITY=$density MUST BE A NON-NEGATIVE INTEGER ---"
456
+ test=`echo "$density < 1" | bc`
457
+ [ $test -eq 1 ] && errMsg "--- DENSITY=$density MUST BE AN INTEGER GREATER THAN 0 ---"
458
+ ;;
459
+ -R) # get resize
460
+ shift # to get the next parameter
461
+ # test if parameter starts with minus sign
462
+ errorMsg="--- INVALID RESIZE SPECIFICATION ---"
463
+ checkMinus "$1"
464
+ resize=`expr "$1" : '\([0-9]*\)'`
465
+ [ "$resize" = "" ] && errMsg "--- RESIZE=$resize MUST BE A NON-NEGATIVE INTEGER ---"
466
+ testA=`echo "$resize < 0" | bc`
467
+ testB=`echo "$resize > 100" | bc`
468
+ [ $testA -eq 1 -o $testB -eq 1 ] && errMsg "--- RESIZE=$resize MUST BE AN INTEGER BETWEEN 0 AND 100 ---"
469
+ ;;
470
+ -) # STDIN and end of arguments
471
+ break
472
+ ;;
473
+ -*) # any other - argument
474
+ errMsg "--- UNKNOWN OPTION ---"
475
+ ;;
476
+ *) # end of arguments
477
+ break
478
+ ;;
479
+ esac
480
+ shift # next option
481
+ done
482
+ #
483
+ # get infile and outfile
484
+ infile="$1"
485
+ outfile="$2"
486
+ fi
487
+
488
+ # test that infile provided
489
+ [ "$infile" = "" ] && errMsg "NO INPUT FILE SPECIFIED"
490
+
491
+ # test that outfile provided
492
+ [ "$outfile" = "" ] && errMsg "NO OUTPUT FILE SPECIFIED"
493
+
494
+ tmpA1="$dir/textcleaner_1_$$.mpc"
495
+ tmpA2="$dir/textcleaner_1_$$.cache"
496
+ trap "rm -f $tmpA1 $tmpA2; exit 0;" 0
497
+ trap "rm -f $tmpA1 $tmpA2; exit 1" 1 2 3 15
498
+ #trap "rm -f $tmpA1 $tmpA2; exit 1" ERR
499
+
500
+ # get im version
501
+ im_version=`convert -list configure | \
502
+ sed '/^LIB_VERSION_NUMBER */!d; s//,/; s/,/,0/g; s/,0*\([0-9][0-9]\)/\1/g' | head -n 1`
503
+
504
+ # test for hdri enabled
505
+ # NOTE: must put grep before trap using ERR in case it does not find a match
506
+ if [ "$im_version" -ge "07000000" ]; then
507
+ hdri_on=`convert -version | grep "HDRI"`
508
+ else
509
+ hdri_on=`convert -list configure | grep "enable-hdri"`
510
+ fi
511
+
512
+ # colorspace RGB and sRGB swapped between 6.7.5.5 and 6.7.6.7
513
+ # though probably not resolved until the latter
514
+ # then -colorspace gray changed to linear between 6.7.6.7 and 6.7.8.2
515
+ # then -separate converted to linear gray channels between 6.7.6.7 and 6.7.8.2,
516
+ # though probably not resolved until the latter
517
+ # so -colorspace HSL/HSB -separate and -colorspace gray became linear
518
+ # but we need to use -set colorspace RGB before using them at appropriate times
519
+ # so that results stay as in original script
520
+ # The following was determined from various version tests using textcleaner
521
+ # with IM 6.7.4.10, 6.7.6.10, 6.7.9.0
522
+ if [ "$im_version" -lt "06070607" -o "$im_version" -gt "06070707" ]; then
523
+ setcspace="-set colorspace RGB"
524
+ else
525
+ setcspace=""
526
+ fi
527
+ # no need for setcspace for grayscale or channels after 6.8.5.4
528
+ if [ "$im_version" -gt "06080504" ]; then
529
+ setcspace=""
530
+ fi
531
+
532
+ # set up density
533
+ if [ "$density" = "" ]; then
534
+ applydensity=""
535
+ else
536
+ applydensity="-density $density"
537
+ fi
538
+
539
+ if [ "$invert" != "" ]; then
540
+ inversion1="-negate"
541
+ else
542
+ inversion1=""
543
+ fi
544
+
545
+
546
+ # read the input image into the TMP cached image.
547
+ convert -quiet $applydensity "$infile" +repage $rotation $inversion1 "$tmpA1" ||
548
+ errMsg "--- FILE $infile NOT READABLE OR HAS ZERO SIZE ---"
549
+
550
+ # get image size
551
+ ww=`convert $tmpA1 -ping -format "%w" info:`
552
+ hh=`convert $tmpA1 -ping -format "%h" info:`
553
+
554
+ # get image h/w aspect ratio and determine if portrait=1 (h/w>1) or landscape=0 (h/w<1)
555
+ aspect=`convert xc: -format "%[fx:($hh/$ww)>=1?1:0]" info:`
556
+ #echo "ww=$ww; hh=$hh; aspect=$aspect"
557
+
558
+ # set up rotation
559
+ if [ "$layout" = "portrait" -a $aspect -eq 0 -a "$rotate" = "cw" ]; then
560
+ rotation="-rotate 90"
561
+ elif [ "$layout" = "portrait" -a $aspect -eq 0 -a "$rotate" = "ccw" ]; then
562
+ rotation="-rotate -90"
563
+ elif [ "$layout" = "landscape" -a $aspect -eq 1 -a "$rotate" = "cw" ]; then
564
+ rotation="-rotate 90"
565
+ elif [ "$layout" = "landscape" -a $aspect -eq 1 -a "$rotate" = "ccw" ]; then
566
+ rotation="-rotate -90"
567
+ else
568
+ rotation=""
569
+ fi
570
+
571
+ # set up cropping
572
+ if [ "$cropoff" != "" -a $numcrops -eq 1 ]; then
573
+ wwc=`convert xc: -format "%[fx:$ww-2*$crop1]" info:`
574
+ hhc=`convert xc: -format "%[fx:$hh-2*$crop1]" info:`
575
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop1 +repage"
576
+ elif [ "$cropoff" != "" -a $numcrops -eq 2 ]; then
577
+ wwc=`convert xc: -format "%[fx:$ww-2*$crop1]" info:`
578
+ hhc=`convert xc: -format "%[fx:$hh-2*$crop2]" info:`
579
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop2 +repage"
580
+ elif [ "$cropoff" != "" -a $numcrops -eq 4 ]; then
581
+ wwc=`convert xc: -format "%[fx:$ww-($crop1+$crop3)]" info:`
582
+ hhc=`convert xc: -format "%[fx:$hh-($crop2+$crop4)]" info:`
583
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop2 +repage"
584
+ else
585
+ cropping=""
586
+ fi
587
+ #echo "cropoff=$cropoff; numcrops=$numcrops; cropping=$cropping"
588
+
589
+ # test if grayscale
590
+ grayscale=`convert $tmpA1 -format "%[colorspace]" info:`
591
+ typegray=`convert $tmpA1 -format '%r' info: | grep 'Gray'`
592
+ if [ "$gray" = "yes" -o "$grayscale" = "Gray" -o "$typegray" != "" ]; then
593
+ makegray="$setcspace -colorspace gray -type grayscale"
594
+ else
595
+ makegray=""
596
+ fi
597
+ #echo "makegray=$makegray"
598
+
599
+ # set up enhance
600
+ if [ "$enhance" = "stretch" ]; then
601
+ enhancing="$setcspace -contrast-stretch 0"
602
+ elif [ "$enhance" = "normalize" ]; then
603
+ enhancing="$setcspace -normalize"
604
+ else
605
+ enhancing=""
606
+ fi
607
+ #echo "enhancing=$enhancing"
608
+
609
+ # setup blurring
610
+ if [ "$threshold" = "" ]; then
611
+ blurring=""
612
+ else
613
+ # note: any 0<bluramt<=1, will be the same as using bluramt=1, since radius must be used as an integer
614
+ # bluramt=`convert xc: -format "%[fx:$threshold/100]" info:`
615
+ # blurring="-blur ${bluramt}x65535 -level ${threshold}x100%"
616
+ blurring="-blur 1x65535 -level ${threshold}x100%"
617
+ fi
618
+ #echo "blurring=$blurring"
619
+
620
+ # get background color
621
+ bgcolor=`echo "$bgcolor" | tr "[:upper:]" "[:lower:]"`
622
+ if [ "$bgcolor" = "image" ]; then
623
+ bgcolor=`convert $tmpA1 -format "%[pixel:u.p{0,0}]" info:`
624
+ fuzzval=$((100-fuzzval))
625
+ bgcolor=`convert $tmpA1 -fuzz $fuzzval% +transparent "$bgcolor" -scale 1x1! -alpha off -format "%[pixel:u.p{0,0}]" info:`
626
+ fi
627
+ #echo "$bgcolor"
628
+
629
+ # set up unrotate
630
+ if [ "$unrotate" = "yes" -a "$preserve" = "yes" ]; then
631
+ unrotating="-background $bgcolor -deskew 40% -gravity center -background $bgcolor -compose over -extent ${ww}x${hh}+0+0 +repage"
632
+ elif [ "$unrotate" = "yes" -a "$preserve" = "no" ]; then
633
+ unrotating="-background $bgcolor -deskew 40%"
634
+ else
635
+ unrotating=""
636
+ fi
637
+ #echo "unrotating=$unrotating"
638
+
639
+ # setup sharpening
640
+ if [ "$sharpamt" = "0" -o "$sharpamt" = "0.0" ]; then
641
+ sharpening=""
642
+ else
643
+ sharpening="-sharpen 0x${sharpamt}"
644
+ fi
645
+ #echo "sharpening=$sharpening"
646
+
647
+ # setup modulation
648
+ [ "$gray" = "yes" -o "$grayscale" = "Gray" -o "$typegray" != "" ] && saturation=100
649
+ if [ $saturation -eq 100 ]; then
650
+ modulation=""
651
+ else
652
+ modulation="-modulate 100,$saturation,100"
653
+ fi
654
+ #echo "modulation=$modulation"
655
+
656
+ # set up adaptiveblurring
657
+ if [ "$adaptblur" = "0" ]; then
658
+ adaptiveblurring=""
659
+ else
660
+ adaptiveblurring="-adaptive-blur $adaptblur"
661
+ fi
662
+
663
+ # set up trim
664
+ if [ "$trim" = "yes" -a "$hdri_on" != "" ]; then
665
+ # hdri is enabled
666
+ # need to round near white to pure white for trim to work
667
+ trimming="-white-threshold 99.9% -trim +repage "
668
+ elif [ "$trim" = "yes" -a "$hdri_on" = "" ]; then
669
+ # hdri is not enabled
670
+ trimming="-trim +repage "
671
+ else
672
+ trimming=""
673
+ fi
674
+ #echo "trimming=$trimming"
675
+
676
+ # set up pad
677
+ if [ $padamt -gt 0 ]; then
678
+ # note must reset -compose from -compose copy_opacity as -border uses -compose
679
+ padding="-compose over -bordercolor $bgcolor -border $padamt"
680
+ else
681
+ padding=""
682
+ fi
683
+ #echo "padding=$padding"
684
+
685
+ if [ "$invert" = 2 ]; then
686
+ inversion2="-negate"
687
+ else
688
+ inversion2=""
689
+ fi
690
+
691
+ # test compression for TIFF output
692
+ ofile="${outfile##*/}"
693
+ suffix="${ofile##*.}"
694
+ suffix2=`echo "$suffix" | tr "[:upper:]" "[:lower:]"`
695
+ if [ "$suffix2" = "tiff" -o "$suffix2" = "tif" ]; then
696
+ compressing="-compress $compression"
697
+ else
698
+ compressing=""
699
+ fi
700
+
701
+ # set up resize
702
+ if [ "$resize" = "" ]; then
703
+ resizing=""
704
+ else
705
+ resizing="-resize $resize%"
706
+ fi
707
+
708
+ # process image
709
+ convert -respect-parenthesis \( $tmpA1 $cropping $makegray $enhancing \) \
710
+ \( -clone 0 $setcspace -colorspace gray -negate -lat ${filtersize}x${filtersize}+${offset}% -contrast-stretch 0 $blurring \) \
711
+ -alpha off -compose copy_opacity -composite -fill "$bgcolor" -opaque none -alpha off \
712
+ $unrotating $sharpening $modulation $adaptiveblurring $resizing $trimming $padding $inversion2 $compressing \
713
+ "$outfile"
714
+
715
+ exit 0