Something I use to parse email. Original source is here : http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/138889. Regexp was edited.
1 def grab_email(files = []):
2 # if passed a list of text files, will return a list of
3 # email addresses found in the files, matched according to
4 # basic address conventions. Note: supports most possible
5 # names, but not all valid ones.
6
7 found = []
8 if files != None:
9 mailsrch = re.compile(r'[\w\-][\w\-\.]*@[\w\-][\w\-\.]+[a-zA-Z]{1,4}')
10
11 for file in files:
12 for line in open(file,'r'):
13 found.extend(mailsrch.findall(line))
14
15 # remove duplicate elements
16 # borrowed from Tim Peters' algorithm on ASPN Cookbook
17 u = {}
18 for item in found:
19 u[item] = 1
20
21 # return list of unique email addresses
22 return u.keys()
Permet de parser un texte de type textarea contenant des urls et de les extraire. Renvoie un objet element. Le code intégral pour twitter est ici http://www.rsr.ch/la-1ere/twitter.js
1 function parseURL(text) {
2
3 // the main tag to be returned
4 var texttag = document.createElement('span');
5 //url regex
6 var url_pattern = /http[\S\.\/:]*/;
7 var urls = text.match(/http[\S\.\/:]*/g);
8 var url = '';
9
10
11 if (urls) {
12
13 var index_text = 0; //
14 for (url in urls) {
15
16 if ( ! isNaN(Number(url)) ) { // IE 6 ?? Plone ?? Plone & IE6 ?? ME ??
17 //alert(parseInt(url));
18 var url_index_start = text.indexOf(urls[url]);
19 var url_index_end = urls[url].length;
20
21 // create text part tag
22 if (url_index_start > 0 ) { // don't create an empty span tag
23 if ( url > 0 ) index_text = urls[ url-1 ].length + 1; // first text element
24 var text_part = text.substring(index_text, url_index_start);
25 var text_part_tag = document.createElement('span');
26 text_part_tag.appendChild(document.createTextNode(text_part));
27 index_text = text_part.length + url_index_end;
28 // append tag
29 texttag.appendChild(text_part_tag);
30 }
31
32 // create link tag
33 var linktag = document.createElement('a');
34 linktag.setAttribute('href', urls[url])
35 linktag.appendChild(document.createTextNode(urls[url]));
36 // append tag
37 texttag.appendChild(linktag);
38
39 // create last text part tag
40 if ( url == urls.length-1 ) {
41 var last_text_part = text.substring(url_index_start+url_index_end);
42 var last_text_part_tag = document.createElement('span');
43 last_text_part_tag.appendChild(document.createTextNode(last_text_part));
44 texttag.appendChild(last_text_part_tag);
45 }
46 }
47 }
48 }
49 else {
50 var texttag_text = document.createTextNode(text)
51 texttag.appendChild(texttag_text);
52 }
53 return texttag;
54 }
Permet de parser un texte de type textarea contenant des urls et de les extraire. Renvoie un objet element. Le code intégral pour twitter est ici http://www.rsr.ch/la-1ere/twitter.js
1 function parseURL(text) {
2
3 // the main tag to be returned
4 var texttag = document.createElement('span');
5 //url regex
6 var url_pattern = /http[\S\.\/:]*/;
7 var urls = text.match(/http[\S\.\/:]*/g);
8 var url = '';
9
10
11 if (urls) {
12
13 var index_text = 0; //
14 for (url in urls) {
15
16 if ( ! isNaN(Number(url)) ) { // IE 6 ?? Plone ?? Plone & IE6 ?? ME ??
17 //alert(parseInt(url));
18 var url_index_start = text.indexOf(urls[url]);
19 var url_index_end = urls[url].length;
20
21 // create text part tag
22 if (url_index_start > 0 ) { // don't create an empty span tag
23 if ( url > 0 ) index_text = urls[ url-1 ].length + 1; // first text element
24 var text_part = text.substring(index_text, url_index_start);
25 var text_part_tag = document.createElement('span');
26 text_part_tag.appendChild(document.createTextNode(text_part));
27 index_text = text_part.length + url_index_end;
28 // append tag
29 texttag.appendChild(text_part_tag);
30 }
31
32 // create link tag
33 var linktag = document.createElement('a');
34 linktag.setAttribute('href', urls[url])
35 linktag.appendChild(document.createTextNode(urls[url]));
36 // append tag
37 texttag.appendChild(linktag);
38
39 // create last text part tag
40 if ( url == urls.length-1 ) {
41 var last_text_part = text.substring(url_index_start+url_index_end);
42 var last_text_part_tag = document.createElement('span');
43 last_text_part_tag.appendChild(document.createTextNode(last_text_part));
44 texttag.appendChild(last_text_part_tag);
45 }
46 }
47 }
48 }
49 else {
50 var texttag_text = document.createTextNode(text)
51 texttag.appendChild(texttag_text);
52 }
53 return texttag;
54 }
Pages : 1