Enable to recognize most kinds of characters as URL paths (#4941)
parent
d08cf543a3
commit
c71727ca55
@ -0,0 +1,42 @@
|
||||
module Twitter
|
||||
class Regex
|
||||
|
||||
REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou
|
||||
REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
|
||||
REGEXEN[:valid_url_balanced_parens] = /
|
||||
\(
|
||||
(?:
|
||||
#{REGEXEN[:valid_general_url_path_chars]}+
|
||||
|
|
||||
# allow one nested level of balanced parentheses
|
||||
(?:
|
||||
#{REGEXEN[:valid_general_url_path_chars]}*
|
||||
\(
|
||||
#{REGEXEN[:valid_general_url_path_chars]}+
|
||||
\)
|
||||
#{REGEXEN[:valid_general_url_path_chars]}*
|
||||
)
|
||||
)
|
||||
\)
|
||||
/iox
|
||||
REGEXEN[:valid_url_path] = /(?:
|
||||
(?:
|
||||
#{REGEXEN[:valid_general_url_path_chars]}*
|
||||
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
|
||||
#{REGEXEN[:valid_url_path_ending_chars]}
|
||||
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
|
||||
)/iox
|
||||
REGEXEN[:valid_url] = %r{
|
||||
( # $1 total match
|
||||
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter
|
||||
( # $3 URL
|
||||
(https?:\/\/)? # $4 Protocol (optional)
|
||||
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
||||
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
||||
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
|
||||
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
|
||||
)
|
||||
)
|
||||
}iox
|
||||
end
|
||||
end
|
Loading…
Reference in new issue