Keywords Urdu Word Segmentation Space Omission problem Space Insertion problem. I. INTRODUCTION. Word tokenization is a prelimary step in any system ...
2012 International Conference on Asian Language Processing
!"#$%&$'()*+",)-./"0*/1"2'*/"3.45.(&)&$'("0-$(4"6)7$515"6)&%8$(4" !94'*$&85":'*"3;)%."9.5 !"#$%"&!"'($)*&+,,-"#&."/$0 1233,4,&20&5,3,62--78$6"/$28&984$8,,:$84* ;"/$28"3&.%)1-."C.*+"9.--"D'*F"8)-" >..( " /'(.B " G( " 0*/1 " -;)%. " %)(('& " >. " 1-./ " $( " 5)*F$(4 " D'*/" >'1(/)*+ " >.%)1-. " $& " $- " ('& " %'(-$-&.(&9+ " 1-./B " 0*/1 " D'*/" -.45.(&)&$'("$-"/$::.*.(&":*'5"'&8.*"!-$)("9)(41)4.-"$("&8)&"$&" %'(-$-&">'&8"3;)%."9.5-B"G&"1-.-"-$5;9.")(/")9*.)/+"1-./" >)-$% " &.%8($H1.- " $( " ) " /$::.*.(& " D)+ " &' " /.C.9'; " )( " .::$%$.(&" 3.45.(&)&$'("!94'*$&85B"6'*;8'9'4$%)9")()9+-$-"':"0*/1"?.7&" $-" )9-' " &)F.( " $(&' " )%%'1(&B " #$%&$'()*+ " $- " 1-./ " :'* " C.*$:$%)&$'(" )(/"$/.(&$:$%)&$'("':"0*/1"2'*/-B"?8$-"D'*F"8)-">..("&.-&./"'(" D'*/-"%'99.%&./ ":*'5 "I.'JK"L)(4MK",,NO" (.D- "-$&.-")(/ "'&8.*" '(9$(."/'%15.(&-")C)$9)>9."'("$(&.*(.&B"?8.";*';'-./")94'*$&85" 8)-">..("&.-&./"'("JJKPPQ"D'*/-")(/"PRBMS"':"&8.-."D'*/-")*." -.45.(&./"%'**.%&9+B&
GGB
M & 32/ & 20 & H2:B & $' & #,$84 & 6"::$,) & 27/ & $8 & F:,''?
GWB
6& 8/52?$A":$/?1;/)3-2B +F"6,&6"8&(,3F&$8&/2B,8$E$84&'2-,&H2:)'&$8B? &wgsy>#22B'?
f{|u
x|u} >,P$'/,86,?
z{j&>:73,? ~u&>T7)4,?
{j&
kn&>3,=,3? &wjvs>'/":? llu&>3"/,'/? j{x{&
jvr
i&>'$6B? w&k
n{>:$6(? llu&>3"/,'/? xvy>0,:/$3$E,:?
>8$4(/? {x&>'/"$8?
3UI6U@?!?G-,:4,)&H2:)'?C&J,:,&)$6/$28":%& F3"%'&$/'&:23,C&@/&$'&6(,6B,)&/("/&637'/,:,)&H2:)&$'&$8&)$6/$28":%& 2:&82/*&$0&$/&$'*&/($'&-,"8'&$/&$'&"&3,4"3&H2:)&"8)&$/&)2,'&82/& 8,,) & /2 & #, & ',4-,8/,) & "8%-2:,* & $0 & $/ & $' & 82/ & F:,',8/ & $8& )$6/$28":%&$/&-,"8'&$/&628'$'/'&20&-2:,&/("8&28,&H2:)C&9C4C& 0$:'/&637'/,:,)&H2:)&$'&6(,6B,)&$C,C&g&$8&27:&6"',*&$0&$'&F:,',8/& $8 & )$6/$28":% & $/' & 3,4"3 & H2:)C & .$B,H$', & 02: & ,=,:% & 637'/,:,)& H2:)&$'&6(,6B,)&$8&)$6/$28":%C&@8&6"',&20&{k
*&$/&H$33&82/,& F:,',8/&$8&)$6/$28":%&/($'&-,"8'&$/&628'$'/'&20&-2:,&/("8&28,& H2:)'C
128'$),:&"&',8/,86,O &M &628/,8/,)&-$8)&$' &/(,&4:,"/,'/& #3,''$84 & " & -"8 & 6"8 & ,8T2% & $8 & /($' & H2:3)C & +F"6, & ),3,/$28& F:2#3,& $'& M628/,8/,)-$8)$'/(,4:,"/,'/#3,''$84"-"86"8,8T2%$8/($'H2: 3)&H($3,&+F"6,&@8',:/$28&F:2#3,-&$'&M&628&/,8/,)&-$8)C& 5($'&$'&/(,&6"',&H$/(&6":&"8)'?C&@0&/(,:,&$'& "&-73/$\/,:-&H2:)&"8)&0$:'/&/,:-&,8)'&"/&T2$8,:&"3F("#,/*&/(,:,& H$33,&"&'F"6,,/H,,8&/(,&/,:-'*&/($'&$'&/(,&+F"6,&@8',:/$28& F:2#3,-*&,C4C&lmn&opq&&>(,"3/(%?C D,&("=,&F:2F2',)&"&'237/$28&/("/&',4-,8/ &$K$)21'2?:2?.21$?:/1ED L $GHED1$'14!2'2?:1$?1)$.:$/?"!%1I J 11116))1:/1M/!)'1-$': L 2-'2 J 111>$K$)21:(21ED1$?:/1-$+":3!2' 1111111111$GH)$.:$/?"!%1./?:"$?'1-$+":3!2'I 111111111J 1111111111116))1:/1M/!)1-$':1 111111111L 111111112-'2 111111111J E/,#$?21-$+":3!2'1"?)1:(2?1.(2.51$?1111111)$.:$/?"!% 11111111L L $GH:M/1./?'2.3:$K21M/!)'1$?1M/!)1-$':1"!21K"-$)I J ;2!+21:(2,1$?:/1"1'$?+-21M/!)
I$47:,&US&M:6($/,6/7:,&20&/(,&F:2F2',)&"342:$/(-
E& =4".21F,$''$/?1;/)3-2B 5$33 & 82H& "33 & /(,& '$843, & /,:- & H2:)' & ":, & ',F":"/,)C & ;2H& $),8/$0$6"/$28&20Ė)":$,'&$8&/(,&-,:4,)&H2:)'&$'&/(,&/"'BC d":B$84Ė)":%&$8&/($'&6"',&$'&"&6("33,84,,6"7',&$/&6"8,& ',4-,8/,)&$8&"&87-#,:&20&H"%'C "& k
&&{&&
$& {k
& %& k
&&{&C
L
&&&&&&&&&&
I$47:,&YS&A',7)2\62),&20&A:2F2',)&"342:$/(-
& @8&/($'&6"',&/(,&87-#,:&20&3$4"/7:,'&$'&Y&/("/&$'&H(%&$/& ',,-' & ,"'$,:C & @8 & 2/(,: & 628)$/$28' & $/ & 6"8 & #, & -2:, & 62-F3,PC& ;,P/ & ,"6( & 637'/,:,) & H2:) & $' & )$=$),) & $8/2 & $/' & 628'$'/$84& 3$4"/7:,'&$C,C&{k
H$33,&628=,:/,)&$8/2&&&{&&k
&&& ;,P/&62-#$8"/$28'&20&/(,',&3$4"/7:,'&":,&F:2)76,)&"8)&$/&$'& 6(,6B,)&/("/&)$6/$28":%&628/"$8'&/("/&62-#$8"/$28&2:&82/C&@8& 27:&6"',&0$:'/&3$4"/7:,&$' &k
& C&@/&$'&F:,',8/&$8&/(,&)$6/$28":%& (,86,&/($'&$'&"&="3$)&H2:)'C M0/,:&/("/&',628)&3$4"/7:,&$'&6(,6B,)*&$8&27:&6"',&* &{*$/&$'& 82/&"&="3$)&H2:)&'2&H,&H$33&62-#$8,&/($'&3$4"/7:,&H$/(&8,P/& "8)&6(,6B&$0&$/&$'&"&="3$)&H2:)C&5(,&62-#$8"/$28&$'&/(,&="3$)& H2:)&'2&$/&H$33,&',4-,8/,)&"'&{C&+"-,&F:26,''&$'&:,F,"/,)& 02:&,"6(&637'/,:,)&H2:)C&5($'&$'&(2H&'F"6,&2-$''$28&F:2#3,-& $'&'23=,)C M$4&$''7,&$'&H($6(&"342:$/(-&/2&7',&.284,'/&-"/6($84& "342:$/(-&'(273),&7',)&2:&-"P$-7-&-"/6($84&"342:$/(-& D,&":,&7'$84&"&62-#$8"/$28&20&/(,',&"342:$/(-'C&128'$),:&"& 637'/,:,)&H2:)&ij}{jvyC&I2332H$84&H$33,&$/'&3$4"/7:,' • vy • j • { • } • j • i I$:'/ &vy& $'&6(,6B,)&$8&)$6/$28":%*&$/&$'&"&="3$)&H2:)C&5(,8& 0$:'/&"8)&',628)&3$4"/7:,&$'&62-#$8,)&"8)&$/&$'&6(,6B,)&$8&
)$6/$28":%&02:&$/'&="3$)$/%*&jvy&$'&"&="3$)&H2:)&/22C&;2H&H($6(& ',4-,8/"/$28&$'&62::,6/&5(,&H2:)&$'&vy&2:&jvy&5(,&:,'/&20&/(,& 3$4"/7:,' & H$33 & #, & 62-#$8,) & /2 & 6(,6B & 02: & /(, & 62::,6/& ',4-,8/"/$28C&I$:'/*&$/&$'&6(,6B,)&02: &vyC&;,P/&H2:)&H$33,& 4,8,:"/,)%&62-#$8$84&3$4"/7:,'&}{j&$'&"&="3$)&H2:)/&ij& $'&82/*&(,86,&/(,&0$:'/&H2:)&$'&jvyC >& =4".21@?'2!:$/?1;/)3-2B 5(,8 & $8 & /(, & ,8) & /(,:, & $' & /($' & 'F"6, & $8',:/$28 & F:2#3,-& H($6(&8,,)'&/2,&'23=,)C&d73/$\&/,:-'&8,,)'&/2,&),/,6/,)C& ;2H&H,&("=, &',F":"/,) &,"6( &H2:)C&D,&H$33 &62-#$8,&/H2& H2:)' &"8)&6(,6B&$0&$/N'&"&="3$)&H2:)&2:&82/C&@0 &$/&$'*&/(2',& H2:)'&H$33,&-,:4,)O&,C4C&628'$),:&H2:) &l &xv& & /$33&82H& xv&"8) &l&":, & )$00,:,8/ & H2:)'C & D, & H$33 & 62-#$8, & /(,',& H2:)'&"8)&6(,6B&$0&/(,%&":,&F:,',8/&$8&)$6/$28":%*&$0&/(,%&":,& F:,',8/&/(,',&H2:)'&H$33,&-,:4,)&$8/2&"&'$843,&H2:)C&I$47:,& U & '(2H' & /(, & 2=,:"33 & ":6($/,6/7:, & "8) & I$47:, & Y & F:,',8/' & /(,& F',7)2&62),&20&/(,&F:2F2',)&"342:$/(-C C
!9+-2:/,-?&$'&82/&"&="3$)&H2:)&8,$/(,:&$8&9843$'(&82:&$8&UVVa#?C&K"',3$8,&02:&
#
104