Posts Tagged ‘正则’

30th
九月

Java SE简记(16): 正则之EmailSpider
马克龙 发布于: 2009年09月30日 开发与应用 标签:, , 评论 (0)

import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.*;
 
public class EmailSpider {
  public static void main(String args[]) {
    /*
    使用正则表达式抓取并打印网页源代码中的所有邮件
    */
    try {
      BufferedReader br = new BufferedReader(new FileReader("test.htm"));
      String line = null;
 
      while ((line=br.readLine()) != null) {
        parse(line);
      }
    } catch(FileNotFoundException e) {
      e.printStackTrace();
    } catch(IOException e) {
      e.printStackTrace();
    }
  }
 
  private static void parse(String line) {
    //参见 RFC2821, RFC2822
    Pattern p = Pattern.compile("[a-zA-Z](\\w*[-_]?\\w+)*@(\\w*[-_]?\\w+)+\\.[a-zA-Z]{2,3}(\\.[a-zA-Z]{2})?");
    //Pattern p = Pattern.compile(\\w+([-.+]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*);
    Matcher m = p.matcher(line);
    while (m.find()) {
      System.out.println(m.group());
    }
  } 
}
import java.util.regex.Pattern;
import java.util.regex.Matcher;
 
public class BackReference {
  public static void main(String args[]) {
    test_1();
    test_2();
    test_3();
  }
 
  private static void test_1() {
    Pattern p = Pattern.compile("(\\d\\d)\\1");
    Matcher m = p.matcher("1212");//12与12相同
    System.out.println(m.matches());//true
  }
 
  private static void test_2() {
    Pattern p = Pattern.compile("(\\d(\\d))\\2");
    Matcher m = p.matcher("1212");//1与2不同
    System.out.println(m.matches());//false
  }
 
  private static void test_3() {
    Pattern p = Pattern.compile("(\\d(\\d))\\2");
    Matcher m = p.matcher("122");//2与2相同
    System.out.println(m.matches());//true
  }
}
import java.util.regex.Pattern;
import java.util.regex.Matcher;
 
public class NonCapturing {
  public static void main(String args[]) {
    positiveLookahead_1();
    positiveLookahead_2();
    System.out.println("---------");
 
    negativeLookahead_1();
    negativeLookahead_2();
    System.out.println("---------");
 
    positiveLookbehind_1();
    System.out.println("---------");
 
    negativeLookbehind_1();
  }
 
  private static void positiveLookahead_1() {
    Pattern p = Pattern.compile(".{2}(?=a)");
    Matcher m = p.matcher("55a66b");
    while (m.find()) {
      /*
      要求匹配结果的后面是a, 匹配结果不包含a, 将捕获:
      55
      */
      System.out.println(m.group());
    }
  }
 
  private static void positiveLookahead_2() {
    Pattern p = Pattern.compile("(?=a).{2}");
    Matcher m = p.matcher("55a66b");
    while (m.find()) {
      /*
      要求匹配结果的第一位为a, 将捕获:
      a6
      */
      System.out.println(m.group());
    }
  }
 
  private static void negativeLookahead_1() {
    Pattern p = Pattern.compile("(?!a).{2}");
    Matcher m = p.matcher("55a66b");
    while (m.find()) {
      /*
      要求匹配结果的第一位不为a, 将捕获:
      55
      66
      */
      System.out.println(m.group());
    }
  }
 
  private static void negativeLookahead_2() {
    Pattern p = Pattern.compile(".{2}(?!a)");
    Matcher m = p.matcher("55a66b");
    while (m.find()) {
      /*
      要求匹配结果的后面不是a,匹配结果可以包含a, 将捕获:
      5a
      66
      */
      System.out.println(m.group());
    }
  }
 
  private static void positiveLookbehind_1() {
    Pattern p = Pattern.compile(".{2}(?<=a)");
    Matcher m = p.matcher("55a66b");
    while (m.find()) {
      /*
      5a
      */
      System.out.println(m.group());
    }
  }
 
  private static void negativeLookbehind_1() {
    Pattern p = Pattern.compile(".{2}(?<!a)");
    Matcher m = p.matcher("55a66b");
    while (m.find()) {
      /*
      55
      a6
      6b
      */
      System.out.println(m.group());
    }
  }
}