Welcome, guest! Login / Register - Why register?
[ Pastes: 81309 ] [ Members: 2131 ]

Paste

Short URL: http://goo.gl/EuhVn
Pasted as Java by kasun on Friday, March 30th, 2012 5:16pm ( 4 years ago )
         else if (userPath.equals("/topkphrases")) {
                        
              byte[] b = null;
           try {
               b = getBLOB("2");
           } catch (Exception ex) {
                Logger.getLogger(ControlerServelet.class.getName()).log(Level.SEVERE, null, ex);
           }
     
                                             ArrayList<String> sentenceList = new ArrayList<String>();      
 Line------&gt;(ControlerServelet.java:585)     sentenceList=pdfDocToSentence(b);
         
                                              request.setAttribute("allphrases", sentenceList);
     
        } 




public ArrayList<String> txtDocToSentence(String paragraph) {
    // This option shows loading and sentence-segment and tokenizing
    // a file using DocumentPreprocessor
      ArrayList<String> sentenceList = new ArrayList<String>();

  
Reader reader = new StringReader(paragraph);   
    for (List<HasWord> sentence : new DocumentPreprocessor(reader)) {
       
    sentenceList.add(sentence.toString());  
    }
    return sentenceList;
  }

    
public ArrayList<String> pdfDocToSentence(byte[] buf ) throws IOException {

    PDFParser parser;
    String parsedText;
    PDFTextStripper pdfStripper;
    PDDocument pdDoc = null ;
    COSDocument cosDoc = null ;
   
        try {
            parser = new PDFParser(new ByteArrayInputStream(buf));   
           // parser = new PDFParser(new FileInputStream(f));
        } catch (Exception e) {
           // System.out.println("Unable to open PDF Parser.");
            return null;
        }
        {
        try {
            parser.parse();
            cosDoc = parser.getDocument();
            pdfStripper = new PDFTextStripper();
            pdDoc = new PDDocument(cosDoc);
            parsedText = pdfStripper.getText(pdDoc);
            //System.out.println("An exception occured in parsing the PDF Document.");
        } catch (Exception e) {
            try {
                   if (cosDoc != null) cosDoc.close();
                   if (pdDoc != null) pdDoc.close();
               } catch (Exception e1) {
            }
            return null;
        }
        
                                          finally{
 Line--&gt;(ControlerServelet.java:755)      cosDoc.close();
                                           pdDoc.close();
         }
        
}


        if (parsedText == null) {
         //System.out.println("PDF to Text Conversion failed.");
        }
        

return txtDocToSentence(parsedText);
        
    }

 

Revise this Paste

Parent: 47395
Your Name:
Code Language:

To minimise the influx of spam: you are required to register to post any code.