forked from nkitsul/pdfformfiller2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPdfFormFiller.java
255 lines (225 loc) · 10 KB
/
PdfFormFiller.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/**
* pdfformfiller 1.0-alpha is a command line utility for filling in Adobe PDF Forms.
*
* Well known pdftk utility can be used for filling in Adobe Pdf Forms.
* However, I was not able to get the version pdftk1.4 to work with UTF-8.
* It's XFDF format support UTF-8 encoding, however it assumes Adobe uses an UTF-8
* font by default. Whereas, Adobe Readers (at least upto version X) do not,
* and UTF-8 text is entered by pdftk but is not shown in its form until user clicks
* on the form and edits it.
*
* In PdfFormFiller, you can use the -font option to specify a UTF-8 font
* to use to fill in the forms to resolve this issue.
*
* Also, our fields input file format is much simpler then XFDF of pdftk that
* requires XML parsing.
*
* Based on the Belgian iText library v. 5.2.0, http://www.itextpdf.com/
*
* (C) copyleft AGPL license, http://itextpdf.com/terms-of-use/agpl.php, Nikolay Kitsul.
*
* @author Nikolay Kitsul
* @version 1.0-alpha
*/
package PdfFormFiller;
import java.io.*;
import java.io.OutputStream;
import java.util.*;
import java.util.Map;
import java.util.Scanner;
import com.itextpdf.text.pdf.*;
import com.itextpdf.text.*;
//import com.itextpdf.text.pdf.Item;
class WrongParamsExeption extends Exception {};
public class PdfFormFiller {
static Boolean verbose;
/**
* @param args the command line arguments
*/
public static void main(String[] args){
String document, operation = "fill", fields = null, font = null, output = null;
Boolean flatten = false;
verbose = false;
try {
if (args.length < 1)
throw new WrongParamsExeption();
document = args[0];
for(int i=1; i<args.length; i++){
if (args[i].equals("-v")){
verbose = true;
}else if (args[i].equals("-flatten")){
flatten = true;
}else if (args[i].equals("-l")){
operation = "list";
}else if (args[i].equals("-f")){
if (i + 1 >= args.length)
throw new WrongParamsExeption();
fields = args[++i];
}else if (args[i].equals("-font")){
if (i + 1 >= args.length)
throw new WrongParamsExeption();
font = args[++i];
}else if (i + 1 == args.length){
output = args[i];
} else{
throw new WrongParamsExeption();
}
}
fillPDFFile(document, output, fields, font, operation, flatten, verbose);
} catch (WrongParamsExeption e){
if (e.getMessage() != null)
System.out.println(e.getMessage());
System.out.println("USAGE: pdfformfiller document.pdf [ -l ] [ -v ] [ -f fields_filename ] [ -font font_file ] [ -flatten] [ output.pdf ]\n\n" +
" document.pdf - name of source pdf file (required).\n" +
" -l - only list availible fields in document.pdf.\n" +
" -v - verbose. Use to debug the fields_filename file. \n" +
" -f fields_filename - name of file with the list of fields values to apply to document.pdf. \n" +
" if ommited, stdin is used.\n" +
" -font font_file - font to use. Needed UTF-8 support, e.g. cyrillic and non-latin alphabets.\n" +
" -flatten - Flatten pdf forms (convert them to text disabling editing in PDF Reader).\n" +
" output.pdf - name of output file. If omitted, the output if sent to stdout. \n\n" +
"fields_filename file can be in UTF-8 as is of the following format:\n" +
" On each line, one entry consists of 'field name' followed by value of that field without any quotes.\n" +
" Any number of whitespaces allowed before 'field name', and one space separates 'field name' and its value.\n" +
" In value, newline characters should be encoded as \"\\n\",\n" +
" 'U+2029 utf-8 E280A9 : PARAGRAPH SEPARATOR PS' should be encoded as \"\\p\",\n" +
" and '\\' characters should be escaped as \"\\\\\".\n" +
" For checkboxes, values are 'Yes'/'Off'.\n\n" +
" Based on the Belgian iText library v. 5.2.0, http://www.itextpdf.com/\n"
);
System.exit(1);
}
}
public static void fillPDFFile(String pdf_filename_in, String pdf_filename_out, String fields_filename){
fillPDFFile(pdf_filename_in, pdf_filename_out, fields_filename, null, "fill", false, false);
}
public static void fillPDFFile(String pdf_filename_in, String pdf_filename_out, String fields_filename, String font_file, String op, Boolean flatten, Boolean verbose) {
OutputStream os;
PdfStamper stamp;
try {
PdfReader reader = new PdfReader(pdf_filename_in);
if (pdf_filename_out != null) {
os = new FileOutputStream(pdf_filename_out);
} else {
os = System.out;
}
stamp = new PdfStamper(reader, os, '\0');
AcroFields form = stamp.getAcroFields();
if (op.equals("list")){
formList(form);
} else {
if (font_file != null){
BaseFont bf = BaseFont.createFont(font_file, BaseFont.IDENTITY_H, true);
form.addSubstitutionFont(bf);
}
Map<String, String> fields = readFile(fields_filename);
for (Map.Entry<String, String> entry : fields.entrySet()) {
if (verbose)
System.out.println("Field name = '" + entry.getKey() + "', New field value: '" + entry.getValue() + "'");
form.setField(entry.getKey(), entry.getValue());
}
stamp.setFormFlattening(flatten);
stamp.close();
}
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException: " + e.getMessage());
System.exit(2);
} catch (IOException e) {
System.err.println("Input output error: " + e.getMessage());
System.exit(3);
} catch (DocumentException e) {
System.err.println("Error while processing document: " + e.getMessage());
System.exit(4);
}
}
public static void formList(AcroFields form){
Map<String, AcroFields.Item> map = form.getFields();
System.out.println("Field names:");
for (Map.Entry<String, AcroFields.Item> entry : map.entrySet())
System.out.println(entry.getKey());
System.out.println("END: Field names");
}
/**
* <var>filename</var> file can be in UTF-8 and in of the following format:<br><br>
* On each line, one entry consists of <i>field name</i> followed by value of that field without any quotes. <br>
* Any number of whitespaces allowed before <i>field name</i> and between <i>field name</i> and its value.<br>
* In value, newline characters should be encoded as \n
* and '\' characters should be escaped as "\\". <br>
* For checkboxes, values are 'Yes'/'Off'."<br>
*
* @param filename name of file with fields and their values.
* @return
* @throws java.io.FileNotFoundException
*/
public static Map<String, String> readFile(String filename) throws java.io.FileNotFoundException{
Map<String, String> fields = new HashMap<String, String>();
String s, v;
String[] t;
Scanner input;
if (filename != null)
//input = new Scanner(new File(filename));
input = new Scanner(new BufferedReader(new FileReader(filename)));
else
input = new Scanner(System.in);
int i = 1;
while(input.hasNext()) {
s = input.nextLine().trim();
t = s.split("\\s", 2);
if (t.length == 2){
// Unescape "\n":
v = unescape(t[1]);
fields.put(t[0], v);
} else {
if (verbose)
System.out.println("Line " + i + ": " + s + "\nskipped.");
}
i++;
}
IOException ex = input.ioException();
if (ex != null)
ex.printStackTrace(System.out);
if (verbose)
System.out.println( (i - 1) + " lines from " + (filename == null ? "stdin" : filename) + " parsed.");
input.close();
return fields;
}
/**
* Unescapes "\n", etc.
*
* @param str
* @return resuling string.
*/
public static String unescape(String str){
String out = "";
char ch, next;
if (str == null) {
return null;
}
final int length = str.length();
for (int offset = 0; offset < length; ) {
ch = str.charAt(offset);
if ((ch == '\\') && ((offset + 1) < length)){
next = str.charAt(offset + 1);
switch (next){
case '\\':
out += '\\';
break;
case 'n':
out += '\n';
break;
case 'p':
// U+2029 utf-8 E280A9 : PARAGRAPH SEPARATOR PS
out += '\u2029';
break;
default:{
out += (ch + next);
}
}
offset++;
} else
out += ch;
offset++;
}
return out;
}
}